Things change, roll with the punches.Oh, yeah. Go for it man, jump off the high dive, stare down the barrel of the gun, pee into the wind!
-
Python下载百度新歌100的代码
2007-02-07 17:00:28
1 #!/usr/bin/python 2 # -*- coding: utf-8 -*- 3 # Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn> 4 # License: GPLv2 5 # Author: oneleaf <oneleaf AT gmail.com> 6 # hack by ct <ctqucl AT gmail.com> 7 8 import httplib 9 import re 10 import urllib 11 import os 12 import locale 13 global m,topid 14 global fsize 15 fsize=2 #文件大小下限(M) 16 m='0' #'-1'=任意 '0'=mp3 '1'=rm '2'=wma '3'=asf '4'=ram '5'=mid '6'=flash 17 topid='1' 18 19 if topid=='0': 20 topid='/list/newhits.html' #新歌100 21 elif topid=='1': 22 topid='/topso/mp3topsong.html' #Top500 23 elif topid=='2': 24 topid='/list/oldsong.html' #老歌经典 25 elif topid=='3': 26 topid='/list/movies.html' #电影金曲 27 elif topid=='4': 28 topid='/list/tvs.html' #电视歌曲 29 elif topid=='5': 30 topid='/minge/mp3topsong.html' #民歌精选 31 elif topid=='6': 32 topid='/xiaoyuan/mp3topsong.html' #校园歌曲 33 elif topid=='7': 34 topid='/list/liujinsuiyue.html' #流金岁月(new) 35 elif topid=='8': 36 topid='/list/yaogun.html' #摇滚地带 37 38 39 def getdownfileurl(url): #获取歌曲页的试听URL 40 url = "http://220.181.27.54/m"+url 41 tn = re.search('&tn=(.*)&word',url).group(0) 42 url=url.replace(tn,'&tn=baidusg,mp3%20%20&word') 43 try: 44 urlopen = urllib.URLopener() 45 fp=urlopen.open(url) 46 data = fp.read() 47 fp.close() 48 except IOError, errmsg: 49 print errmsg 50 expression2='"_blank">(.*)</a></a></li>' 51 url = re.search(expression2, data).group(0)[16:-13] 52 try: 53 url="http://"+urllib.quote(url) 54 except:pass 55 #print u"发现 "+url 56 return url 57 58 def getdownurl(url): #从歌曲页抓取URL列表 59 urllist=[] 60 urllist1=[] 61 urllist2=[] 62 conn = httplib.HTTPConnection('mp3.baidu.com') 63 conn.request("GET",url) 64 response = conn.getresponse() 65 html=response.read() 66 conn.close() 67 expression2='http://220.181.27.54/m(.*)" target' 68 listSentence2 = re.findall(expression2, html) #抓取链接列表 69 filesize=re.findall('<td>(.*)M</td>',html) #抓取文件大小 70 lineno=0 71 while lineno<len(listSentence2): 72 mp3url=getdownfileurl(listSentence2[lineno]) #转换链接为最终下载地址 73 urllist1.append(mp3url) 74 lineno+=1 75 urllist=map(None,urllist1,filesize) 76 return urllist 77 78 def downmp3(url,author,name,filelist): #下载歌曲 79 filename=author+"-"+name; 80 for i in filelist: 81 name=unicode(i,locale.getpreferredencoding()) 82 if name.find(filename) == 0: #忽略 83 print u"文件已经下载,忽略。" 84 return 1 85 urllists=getdownurl(url) #获取文件url列表 86 lineno=0 87 while lineno<len(urllists): 88 print u"尝试",urllists[lineno][0] 89 ext=urllists[lineno][0][-4:] #获取文件名后缀(最后4位) 90 try: 91 lineno+=1 92 print urllists[lineno-1][1] +'M' 93 if float(urllists[lineno-1][1])>float(fsize) : #大小符合则下载 94 urlopen = urllib.URLopener() 95 fp=urlopen.open(urllists[lineno-1][0]) 96 data = fp.read() 97 fp.close() 98 filename=filename+ext; 99 file=open(filename,'w+b') 100 file.write(data) 101 file.close() 102 print u"下载成功!" 103 return 1 104 elif float(urllists[lineno][1])<float(fsize) : #不符则略过 105 print u"文件太小,忽略!" 106 except: 107 continue 108 return 0 109 110 if __name__ == "__main__": 111 conn = httplib.HTTPConnection('list.mp3.baidu.com') 112 conn.request("GET",topid ) #类型 113 response = conn.getresponse() 114 html=response.read().decode('gbk') 115 conn.close() 116 expression1='border">(.*).</td>' 117 expression2='><a href="http://mp3.baidu.com/m(.*)</a>' 118 expression3='href="http://mp3.baidu.com/m(.*)</td>' 119 listSentence1 = re.findall(expression1, html) #编号特征 120 listSentence2 = re.findall(expression2, html) #歌曲名特征 121 listSentence3 = re.findall(expression3, html) #歌手名特征 122 lineno=0 123 while lineno<len(listSentence1): 124 listSentence2[lineno]=listSentence2[lineno].replace('m=-1','m=' + m) #指定格式 125 url=re.search('(.*)target',listSentence2[lineno]) 126 url='/m'+url.group(0)[:-8] 127 idno=listSentence1[lineno] 128 name=re.search('blank>(.*)',listSentence2[lineno]) 129 name=name.group(0)[6:] 130 dirty=re.search('</A>/<A href=(.*) target=_blank>',listSentence3[lineno]) 131 if dirty is not None : #合唱 132 author1=re.search('>(.*)</A>/<A',listSentence3[lineno]) 133 author1=author1.group(0)[1:-7] 134 author2=re.search('/<A href=(.*)</A>',listSentence3[lineno]) 135 author2=re.search('>(.*)<',author2.group(0)) 136 author2=author2.group(0)[1:-1] 137 author=author1 + '+' + author2 138 elif dirty is None : #独唱 139 author=re.search('blank>(.*)</',listSentence3[lineno]) 140 author=author.group(0)[6:-2] 141 print u"开始下载",idno,name,author 142 filelist=os.listdir('.'); 143 if downmp3(url,author,name,filelist)==0: #判断失败 144 print u"下载",author,name,u'失败!' 145 lineno+=1 146 转自:http://forum.ubuntu.org.cn/viewtopic.php?t=15682
-
backup_ver2.py
2007-01-15 13:19:47
#!/usr/bin/python # Filename : backup_ver2.py # backup and package /home/mushroom & /root directory # to /root/backup/YearMonthDay/HourMinuteSecond.tar.gz import os import time source = ['/home/mushroom/','/root/'] target_dir = '/root/backup/' today = target_dir + time.strftime('%Y%m%d') now = time.strftime('%H%M%S') if not os.path.exists(today): os.mkdir(today) print 'Successfully created directory',today target = today + os.sep + now + '.tar.gz' zip_command = "tar zcvf '%s' %s" % (target,' '.join(source)) if os.system(zip_command) == 0: print 'Successful backup to',target else: print 'Backup Failed'