代码简单不过多解释 以抑郁症吧为例子
from urllib.request import urlopenimport codecsfrom bs4 import BeautifulSoupi = 0f = codecs.open("douban.txt", "w","utf-8")while i< 100000: a = "http://tieba.baidu.com/f?kw=%E6%8A%91%E9%83%81%E7%97%87&ie=utf-8&pn="+str(i) i+=50 z = (i/50) PRint("第"+ str(z) + "页") html = urlopen(a) bsObj = BeautifulSoup(html, "html.parser") for links in bsObj.findAll("a", {"class": "j_th_tit"}): print(links.attrs["href"] + " " + links.text) f.write(links.text+"+++"+"/n")f.close()新闻热点
疑难解答