def ana(data,mysite): o = re.compile("href=/"(.+?)/"") f = o.findall(data) line = 1 for ff in f: if not re.search("baidu",ff) and not re.search("^s/?",ff) and re.search("^http:////",ff): if re.search(mysite,ff): print "* " ,line ,ff else: print line ,ff line += 1
if __name__ == "__main__": mysite = sys.argv[2] data = baidu(sys.argv[1].decode('utf-8')) ana(data,mysite)