完整代码如下 :我又用v2ex试了下 虽然都是utf8编码 ,v2ex就可以输出结果,我想爬的这个站就不行 麻烦各位帮忙看看:
# -*- coding:utf-8 -*-
import urllib2
import HTMLParser
class MyParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag == 'a':
for name, value in attrs:
if name == 'href':
print value
my = MyParser()
Url = '
http://blog.sina.com.cn/s/articlelist_1743100694_0_1.html/'Url2 = '
https://v2ex.com/'print 'Url:',Url
request = urllib2.Request (Url)
m = urllib2.urlopen(request)
con = m.read()
my.feed(con.decode('utf-8'))