# coding=utf-8
import urllib,re,sys,time
url='http://www.google.cn/music/topiclisting?q=top100_duet_love_songs&cat=song'
prefix='http://www.google.cn/music/top100/musicdownload?id='
downname='songlist.txt'
html=urllib.urlopen(url).read()
regx='下载.*window.*http.*\\\\x26resnum' #\x26resnum很奇怪,明明看到的是一个‘\’可是匹配不出来,好像是有两个‘\\’
reobj=re.compile(regx)
reg='http.*mp3' #文件下载地址
rej=re.compile(reg)
list=[]
for match in reobj.finditer(html):
list.append(match.group())
id=[]
for l in list:
id.append(prefix+urllib.unquote(l[-27:-10]))
down=[]
for i in id:
html=urllib.urlopen(i).read()
time.sleep(1)
for match in rej.finditer(html):
down.append(urllib.unquote(urllib.unquote(match.group())))
if len(down) >0:
file=open(downname,'w')
for d in down:
file.write(d+'\n')
file.close()
print 'finish'