import
urllib2;
from
lxml import etree;
def
main():
req
= urllib2.Request(userMainUrl);
resp
= urllib2.urlopen(req);
respHtml
= resp.read();
print
"Method 3: Use lxml to extract info from html";
htmlElement
= etree.HTML(respHtml);
print
"htmlElement=",htmlElement;
h1userElement
= htmlElement.find(".//h1[@class='h1user']");
print
"h1userElement=",h1userElement;
print
"type(h1userElement)=",type(h1userElement);
print
"dir(h1userElement)=",dir(h1userElement);
print
"h1userElement.text=",h1userElement.text;
attributes
= h1userElement.attrib;
print
"attributes=",attributes;
print
"type(attributes)=",type(attributes);
classKeyValue
= attributes["class"];
print
"classKeyValue=",classKeyValue;
print
"type(classKeyValue)=",type(classKeyValue);
tag
= h1userElement.tag;
print
"tag=",tag;
innerHtml
= etree.tostring(h1userElement);
print
"innerHtml=",innerHtml;
if
__name__=="__main__":
main();