喜马拉雅音频文件下载

喜马拉雅音频文件下载

自动下载喜马拉雅音频文件的python代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
喜马拉雅音频下载
"""
2016年05月
"""
import urllib
import urllib2
import json
import os
import codecs

def downloadTrack(url, title,albumDir):
trackPath = os.path.join(albumDir, title + ".m4a")
if os.path.exists(trackPath):
print trackPath, "have downloaded early."
return
res_data = urllib2.urlopen(url)
res = res_data.read()
#保存为音频文件
f = file(trackPath, "w")
f.write(res)
f.close()

def querySoundInfo(soundUrl):
global saveDir
res_data = urllib2.urlopen(soundUrl)
res = res_data.read()
#print res
data = json.loads(res)
trackTitle = data["title"]
trackUrl = data["play_path"] #play_path_32 play_path_64
#intro = data["intro"]
#print intro
#introPath = os.path.join(saveDir, trackTitle + ".txt")
#保存UTF-8内容
#f = codecs.open(introPath, "w", "utf-8") #file(introPath, "w+")
#f.write(intro)
#f.close()

return trackTitle, trackUrl
"""
下载指定系列的音频文件
url为系列的地址
"""
def downloadAlbum(url, saveDir):
albumDir = setupDownloadDir(url, saveDir)
res_data = urllib2.urlopen(url)
res = res_data.read()
#print res
import re
#pattern = re.compile(r'<li sound_id=\"(.*?)\">')
lstSoundId = re.findall(r'<li sound_id=\"(.*?)\">', res)
print lstSoundId
lstSoundIdUrl = map(lambda x : "http://www.ximalaya.com/tracks/" + x + ".json", lstSoundId)
print lstSoundIdUrl

for soundUrl in lstSoundIdUrl :
trackTitle, trackUrl = querySoundInfo(soundUrl)
print trackTitle, trackUrl
downloadTrack(trackUrl, trackTitle, albumDir)

def setupDownloadDir(albumUrl, saveDir):
if not os.path.exists(saveDir) :
os.makedirs(saveDir)
lst = albumUrl.split("/")
albumId = lst[-1]
print albumId
albumDir = os.path.join(saveDir, albumId)
if not os.path.exists(albumDir) :
os.makedirs(albumDir)
print albumDir
return albumDir

saveDir = "/Users/aoro/Music/ximalaya" #音频文件保存位置
#罗大伦博士讲中医(主要是张锡纯篇) http://www.ximalaya.com/31873895/album/3739850
albumUrl = "http://www.ximalaya.com/31873895/album/3739850"
downloadAlbum(albumUrl, saveDir)
print "OK"