python-music

部分代码: 



#--*-- coding:utf8 --*--

import requests,json
from lxml import etree
class Music(object):
    music_list=[]
    def get_music_list():
        headers={
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding':'gzip, deflate, br',
        'Accept-Language':'zh-CN,zh;q=0.9',
        'Connection':'keep-alive',
        'Cookie':'_iuqxldmzr_=32; _ntes_nnid=dc7dbed33626ab3af002944fabe23bc4,1524151830800; _ntes_nuid=dc7dbed33626ab3af002944fabe23bc4; __utmc=94650624; __utmz=94650624.1524151831.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=94650624.1505452853.1524151831.1524151831.1524176140.2; WM_TID=RpKJQQ90pzUSYfuSWgFDY6QEK1Gb4Ulg; JSESSIONID-WYYY=ZBmSOShrk4UKH5K%5CVasEPuc0b%2Fq6m5eAE91jWCmD6UpdB2y4vbeazO%2FpQK%5CgiBW0MUDDWfB1EuNaV5c4wIJZ08hYQKDhpsHnDeMAgoz98dt%2B%2BFfhdiiNJw9Y9vRR5S4GU%2FziFp%2BliFX1QTJj%2BbaIGD3YxVzgumklAwJ0uBe%2FcGT6VeQW%3A1524179765762; __utmb=94650624.24.10.1524176140',
        'Host':'music.163.com',
        'Referer':'https://music.163.com/',
        'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
        uid=0
        music_id=[]
        music_name=[]
        xpath='//*[@class="u-cover u-cover-alb3"]/a[1]'
        music_name_xpath='//*[@class="u-cover u-cover-alb3"]'
        go =True
        while go:
            url='http://music.163.com/artist/album?id=6452&limit=12&offset='+str(uid)
            req=requests.get(url,headers=headers,verify=False).text
            req=etree.HTML(req)
            id=req.xpath(xpath)
            if len(id) == 0:
                go =False
            name=req.xpath(music_name_xpath)
            for x  in range(len(id)):
                # music_id.append('http://music.163.com'+id[x].get('href'))
                # print()
                # print(len(id[x].get('href')))
                music_id.append(id[x].get('href')[id[x].get('href').find('id=')+3:])
                music_name.append(name[x].get('title'))
            uid+=12
        print('name:'+str(len(music_name)))
        print('id:'+str(len(music_id)))
        return music_id,music_name
    def get_music_content(id):
        url='http://music.163.com/album?id='+str(id)
        Cookies=requests.get(url).cookies.get_dict()
        print(url)
        headers={
            'Cookie': '_iuqxldmzr_=32; _ntes_nnid=dc7dbed33626ab3af002944fabe23bc4,1524151830800; _ntes_nuid=dc7dbed33626ab3af002944fabe23bc4; __utmz=94650624.1524151831.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=94650624.1505452853.1524151831.1524176140.1524296365.3; __utmc=94650624; WM_TID=RpKJQQ90pzUSYfuSWgFDY6QEK1Gb4Ulg; JSESSIONID-WYYY=7t6F3r9Uzy8uEXHPnVnWTXRP%5CSXg9U3%5CN8V5AROB6BIe%2B4ie5ch%2FPY8fc0WV%2BIA2ya%5CyY5HUBc6Pzh0D5cgpb6fUbRKMzMA%2BmIzzBcxPcEJE5voa%2FHA8H7TWUzvaIt%2FZnA%5CjVghKzoQXNM0bcm%2FBHkGwaOHAadGDnthIqngoYQsNKQQj%3A1524299905306; __utmb=94650624.21.10.1524296365',
            'Host': 'music.163.com',
            'Referer': 'http://music.163.com/',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
        }
        req=requests.get(url,headers=headers,verify=False).text
        # print(req)
        content_xpath='//*[@id="album-desc-dot"]/p/text()' ##专辑介绍
        data_tid_xpath='//*[@id="comment-box"]'
        req=etree.HTML(req)
        data_tid=req.xpath(data_tid_xpath)
        # print(data_tid)
        txt=req.xpath(content_xpath)
        # print(txt)
        headers={
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'max-age=0',
            'Connection': 'keep-alive',
            'Host': 'music.163.com',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
        }
        urls='http://music.163.com/api/v1/resource/comments/'+str(data_tid[0].get('data-tid'))
        hotComments=requests.get(urls,headers=headers).text
        # print(urls)
        content=''
        for x in txt:
            content+=x.replace('\xa0','')+'\n'
        print(content)
        hotComments=json.loads(hotComments)
        # print(hotComments)
        content_txt=[]
        with open('music.txt','a') as f:
            for x in range(len(hotComments['hotComments'])):
                # print(content)
                print(hotComments['hotComments'][x]['user']['nickname'])
                print(hotComments['hotComments'][x]['content'].replace(',',','))
                print('==============================================================')
                content_txt.append(hotComments['hotComments'][x]['content'].replace(',',',')+'\n')
                f.write(hotComments['hotComments'][x]['content'].replace(',',',')+'\n')
        return content_txt

if __name__=='__main__':
    m = Music()
    id1,name = m.get_music_list()
    print(id1)
    print(name)
    'http://music.163.com/album?id=37251353'
    'http://music.163.com/#/album?id=37251353'
    for x in id1:
        Music.get_music_content(x)