import urllib2 from sgmllib import SGMLParser import smtplib import time #from email.mime.text import MIMEText #from bs4 import BeautifulSoup #import re
class UrlParser(SGMLParser): urls = [] def do_a(self,attrs): '''''parse tag a''' for name,value in attrs: if name=='href': self.urls.append(value) else: continue
def do_link(self,attrs): '''''parse tag link''' for name,value in attrs: if name=='href': self.urls.append(value); else: continue
dailyUrls = [] detailUrl = "" for url in urls: if 'daily' in url: dailyUrls.append(url); if not detailUrl and not isDetail and 'www.bc5u.com' in url: detailUrl = url
page.close() parser.close()
if isDetail: return dailyUrls else: return dailyUrls,detailUrl