下载8000首儿歌的python的代码:
代码如下:
#-*- coding: UTF-8 -*-
from pyquery import PyQuery as py
from lxml import etree
import urllib
import re
import os
import sys
import logging
def format(filename):
tuple=(' ',''','/'')
for char in tuple:
if (filename.find(char)!=-1):
filename=filename.replace(char,"_")
return filename
def download_mp3(mp3_url, filename,dir):
f = dir+"//"+filename
if os.path.exists(f):
logger.debug(f+" is existed.")
return
try:
open(f, 'wb').write(urllib.urlopen(mp3_url).read())
logger.debug( filename + ' is downloaded.')
except:
logger.debug( filename + ' is not downloaded.')
def download_all_mp3(start,end,dir,logger):
for x in range(start,end):
try:
url = "http://www.youban.com/mp3-d" + str(x) + ".html"
logger.debug(str(x) + ": "+url)
doc = py(url=url)
e = doc('.mp3downloadbox')
if e is None or e == '':
logger.debug(url+" is not existed.")
return
e = unicode(e)
#logger.debug( e)
regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?/"(.*?)/"",re.UNICODE|re.S)
m = regex.search(e)
if m is not None:
title = m.group(1).strip()
title2 = str(x)+"_"+title + ".mp3"
#title2 = re.sub(' ','_',title2)
title2 = format(title2)
link = m.group(2)
#logger.debug( "title:" + title + " link:" + link)
新闻热点
疑难解答