#--*-- coding:utf8 --*--
import requests,json
from lxml import etree
class Music(object):
music_list=[]
def get_music_list():
headers={
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9',
'Connection':'keep-alive',
'Cookie':'_iuqxldmzr_=32; _ntes_nnid=dc7dbed33626ab3af002944fabe23bc4,1524151830800; _ntes_nuid=dc7dbed33626ab3af002944fabe23bc4; __utmc=94650624; __utmz=94650624.1524151831.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=94650624.1505452853.1524151831.1524151831.1524176140.2; WM_TID=RpKJQQ90pzUSYfuSWgFDY6QEK1Gb4Ulg; JSESSIONID-WYYY=ZBmSOShrk4UKH5K%5CVasEPuc0b%2Fq6m5eAE91jWCmD6UpdB2y4vbeazO%2FpQK%5CgiBW0MUDDWfB1EuNaV5c4wIJZ08hYQKDhpsHnDeMAgoz98dt%2B%2BFfhdiiNJw9Y9vRR5S4GU%2FziFp%2BliFX1QTJj%2BbaIGD3YxVzgumklAwJ0uBe%2FcGT6VeQW%3A1524179765762; __utmb=94650624.24.10.1524176140',
'Host':'music.163.com',
'Referer':'https://music.163.com/',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
uid=0
music_id=[]
music_name=[]
xpath='//*[@class="u-cover u-cover-alb3"]/a[1]'
music_name_xpath='//*[@class="u-cover u-cover-alb3"]'
go =True
while go:
url='http://music.163.com/artist/album?id=6452&limit=12&offset='+str(uid)
req=requests.get(url,headers=headers,verify=False).text
req=etree.HTML(req)
id=req.xpath(xpath)
if len(id) == 0:
go =False
name=req.xpath(music_name_xpath)
for x in range(len(id)):
# music_id.append('http://music.163.com'+id[x].get('href'))
# print()
# print(len(id[x].get('href')))
music_id.append(id[x].get('href')[id[x].get('href').find('id=')+3:])
music_name.append(name[x].get('title'))
uid+=12
print('name:'+str(len(music_name)))
print('id:'+str(len(music_id)))
return music_id,music_name
def get_music_content(id):
url='http://music.163.com/album?id='+str(id)
Cookies=requests.get(url).cookies.get_dict()
print(url)
headers={
'Cookie': '_iuqxldmzr_=32; _ntes_nnid=dc7dbed33626ab3af002944fabe23bc4,1524151830800; _ntes_nuid=dc7dbed33626ab3af002944fabe23bc4; __utmz=94650624.1524151831.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=94650624.1505452853.1524151831.1524176140.1524296365.3; __utmc=94650624; WM_TID=RpKJQQ90pzUSYfuSWgFDY6QEK1Gb4Ulg; JSESSIONID-WYYY=7t6F3r9Uzy8uEXHPnVnWTXRP%5CSXg9U3%5CN8V5AROB6BIe%2B4ie5ch%2FPY8fc0WV%2BIA2ya%5CyY5HUBc6Pzh0D5cgpb6fUbRKMzMA%2BmIzzBcxPcEJE5voa%2FHA8H7TWUzvaIt%2FZnA%5CjVghKzoQXNM0bcm%2FBHkGwaOHAadGDnthIqngoYQsNKQQj%3A1524299905306; __utmb=94650624.21.10.1524296365',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
}
req=requests.get(url,headers=headers,verify=False).text
# print(req)
content_xpath='//*[@id="album-desc-dot"]/p/text()' ##专辑介绍
data_tid_xpath='//*[@id="comment-box"]'
req=etree.HTML(req)
data_tid=req.xpath(data_tid_xpath)
# print(data_tid)
txt=req.xpath(content_xpath)
# print(txt)
headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'music.163.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
}
urls='http://music.163.com/api/v1/resource/comments/'+str(data_tid[0].get('data-tid'))
hotComments=requests.get(urls,headers=headers).text
# print(urls)
content=''
for x in txt:
content+=x.replace('\xa0','')+'\n'
print(content)
hotComments=json.loads(hotComments)
# print(hotComments)
content_txt=[]
with open('music.txt','a') as f:
for x in range(len(hotComments['hotComments'])):
# print(content)
print(hotComments['hotComments'][x]['user']['nickname'])
print(hotComments['hotComments'][x]['content'].replace(',',','))
print('==============================================================')
content_txt.append(hotComments['hotComments'][x]['content'].replace(',',',')+'\n')
f.write(hotComments['hotComments'][x]['content'].replace(',',',')+'\n')
return content_txt
if __name__=='__main__':
m = Music()
id1,name = m.get_music_list()
print(id1)
print(name)
'http://music.163.com/album?id=37251353'
'http://music.163.com/#/album?id=37251353'
for x in id1:
Music.get_music_content(x)