Things change, roll with the punches.Oh, yeah. Go for it man, jump off the high dive, stare down the barrel of the gun, pee into the wind!

发布新日志

  • Python下载百度新歌100的代码

    2007-02-07 17:00:28

      1 #!/usr/bin/python 
      2 # -*- coding: utf-8 -*- 
      3 # Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn> 
      4 # License: GPLv2 
      5 # Author: oneleaf <oneleaf AT gmail.com> 
      6 # hack by ct <ctqucl AT gmail.com> 
      7 
      8 import httplib
      9 import re
     10 import urllib
     11 import os
     12 import locale
     13 global m,topid
     14 global fsize
     15 fsize=2     #文件大小下限(M) 
     16 m='0'       #'-1'=任意  '0'=mp3 '1'=rm '2'=wma '3'=asf '4'=ram '5'=mid '6'=flash 
     17 topid='1'
     18 
     19 if topid=='0':
     20      topid='/list/newhits.html'         #新歌100 
     21 elif topid=='1':
     22      topid='/topso/mp3topsong.html'     #Top500 
     23 elif topid=='2':
     24      topid='/list/oldsong.html'         #老歌经典 
     25 elif topid=='3':
     26      topid='/list/movies.html'          #电影金曲 
     27 elif topid=='4':
     28      topid='/list/tvs.html'             #电视歌曲 
     29 elif topid=='5':
     30      topid='/minge/mp3topsong.html'     #民歌精选 
     31 elif topid=='6':
     32      topid='/xiaoyuan/mp3topsong.html'  #校园歌曲 
     33 elif topid=='7':
     34      topid='/list/liujinsuiyue.html'    #流金岁月(new) 
     35 elif topid=='8':
     36      topid='/list/yaogun.html'          #摇滚地带 
     37 
     38 
     39 def getdownfileurl(url):                #获取歌曲页的试听URL 
     40     url = "http://220.181.27.54/m"+url
     41     tn = re.search('&tn=(.*)&word',url).group(0)
     42     url=url.replace(tn,'&tn=baidusg,mp3%20%20&word')
     43     try:
     44         urlopen = urllib.URLopener()
     45         fp=urlopen.open(url)
     46         data = fp.read()
     47         fp.close()
     48     except IOError, errmsg:
     49         print errmsg
     50     expression2='"_blank">(.*)</a></a></li>'
     51     url = re.search(expression2, data).group(0)[16:-13]
     52     try:
     53         url="http://"+urllib.quote(url)
     54     except:pass
     55     #print u"发现 "+url 
     56     return url
     57 
     58 def getdownurl(url):                     #从歌曲页抓取URL列表 
     59     urllist=[]
     60     urllist1=[]
     61     urllist2=[]
     62     conn = httplib.HTTPConnection('mp3.baidu.com')
     63     conn.request("GET",url)
     64     response = conn.getresponse()
     65     html=response.read()
     66     conn.close()
     67     expression2='http://220.181.27.54/m(.*)" target'
     68     listSentence2 = re.findall(expression2, html)     #抓取链接列表 
     69     filesize=re.findall('<td>(.*)M</td>',html)        #抓取文件大小 
     70     lineno=0
     71     while lineno<len(listSentence2):
     72         mp3url=getdownfileurl(listSentence2[lineno])  #转换链接为最终下载地址 
     73         urllist1.append(mp3url)
     74         lineno+=1
     75     urllist=map(None,urllist1,filesize)
     76     return urllist
     77 
     78 def downmp3(url,author,name,filelist):    #下载歌曲 
     79     filename=author+"-"+name;
     80     for i in filelist:
     81         name=unicode(i,locale.getpreferredencoding())
     82         if name.find(filename) == 0:      #忽略 
     83             print u"文件已经下载,忽略。"
     84             return 1
     85     urllists=getdownurl(url)              #获取文件url列表 
     86     lineno=0
     87     while lineno<len(urllists):
     88         print u"尝试",urllists[lineno][0]
     89         ext=urllists[lineno][0][-4:]      #获取文件名后缀(最后4位) 
     90         try:
     91             lineno+=1
     92             print urllists[lineno-1][1] +'M'
     93             if float(urllists[lineno-1][1])>float(fsize) :   #大小符合则下载 
     94                   urlopen = urllib.URLopener()
     95                   fp=urlopen.open(urllists[lineno-1][0])
     96                   data = fp.read()
     97                   fp.close()
     98                   filename=filename+ext;
     99                   file=open(filename,'w+b')
    100                   file.write(data)
    101                   file.close()
    102                   print u"下载成功!"
    103                   return 1
    104             elif float(urllists[lineno][1])<float(fsize) :   #不符则略过 
    105                   print u"文件太小,忽略!"
    106         except:
    107             continue
    108     return 0
    109 
    110 if __name__ == "__main__":
    111     conn = httplib.HTTPConnection('list.mp3.baidu.com')
    112     conn.request("GET",topid )  #类型 
    113     response = conn.getresponse()
    114     html=response.read().decode('gbk')
    115     conn.close()
    116     expression1='border">(.*).</td>'
    117     expression2='><a href="http://mp3.baidu.com/m(.*)</a>'
    118     expression3='href="http://mp3.baidu.com/m(.*)</td>'
    119     listSentence1 = re.findall(expression1, html)   #编号特征 
    120     listSentence2 = re.findall(expression2, html)   #歌曲名特征 
    121     listSentence3 = re.findall(expression3, html)   #歌手名特征 
    122     lineno=0
    123     while lineno<len(listSentence1):
    124        listSentence2[lineno]=listSentence2[lineno].replace('m=-1','m=' + m) #指定格式 
    125        url=re.search('(.*)target',listSentence2[lineno])
    126        url='/m'+url.group(0)[:-8]
    127        idno=listSentence1[lineno]
    128        name=re.search('blank>(.*)',listSentence2[lineno])
    129        name=name.group(0)[6:]
    130        dirty=re.search('</A>/<A  href=(.*) target=_blank>',listSentence3[lineno])
    131        if dirty is not None :             #合唱 
    132       author1=re.search('>(.*)</A>/<A',listSentence3[lineno])
    133       author1=author1.group(0)[1:-7]
    134       author2=re.search('/<A  href=(.*)</A>',listSentence3[lineno])
    135       author2=re.search('>(.*)<',author2.group(0))
    136       author2=author2.group(0)[1:-1]
    137       author=author1 + '+' + author2
    138        elif dirty is None :               #独唱 
    139            author=re.search('blank>(.*)</',listSentence3[lineno])
    140            author=author.group(0)[6:-2]
    141        print u"开始下载",idno,name,author
    142        filelist=os.listdir('.');
    143        if downmp3(url,author,name,filelist)==0:  #判断失败 
    144           print u"下载",author,name,u'失败!'
    145        lineno+=1
    146 
    转自:http://forum.ubuntu.org.cn/viewtopic.php?t=15682
  • backup_ver2.py

    2007-01-15 13:19:47

    #!/usr/bin/python
    # Filename : backup_ver2.py
    # backup and package /home/mushroom & /root directory 
    # to /root/backup/YearMonthDay/HourMinuteSecond.tar.gz
    
    import os
    import time
    
    source = ['/home/mushroom/','/root/']
    target_dir = '/root/backup/'
    today = target_dir + time.strftime('%Y%m%d')
    now = time.strftime('%H%M%S')
    
    if not os.path.exists(today):
            os.mkdir(today)
            print 'Successfully created directory',today
    
    target = today + os.sep + now + '.tar.gz'
    
    zip_command = "tar zcvf '%s' %s" % (target,' '.join(source))
    
    if os.system(zip_command) == 0:
            print 'Successful backup to',target
    else:
            print 'Backup Failed'
    

数据统计

  • 访问量: 46069
  • 日志数: 42
  • 图片数: 3
  • 文件数: 1
  • 书签数: 13
  • 建立时间: 2007-01-05
  • 更新时间: 2007-03-03

RSS订阅

Open Toolbar