# Author:Nimo_Ding
'''
问题需求:
把豆瓣TOP250里面的 序号/电影名/评分/推荐语/链接 都爬取下来,结果就是全部展示打印出来
https://movie.douban.com/top250?start=0&filter=
'''
import requests
# 引用BeautifulSoup库
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'}
num=[]
movie_name=[]
rating=[]
comment=[]
link=[]
for i in range(2): # 总共有10页,可将数字改成range(10)
url='https://movie.douban.com/top250?start='+str(i*25)+'&filter='
html=BeautifulSoup(requests.get(url,headers=headers).text,'html.parser')
item=html.find_all(class_='item')
for i in item:
num.append(i.find('em').text)
movie_name.append(i.find(class_='title').text)
rating.append(i.find(class_='rating_num').text)
comment.append(i.find(class_='quote').text)
link.append(i.find('a')['href'])
f=open('doubantop250.html','w',encoding='utf-8')
for i in range(len(num)):
print('序号:{}\n电影名:{}\n评分:{}\n推荐语:{}链接:{}\n'.
format(num[i],movie_name[i],rating[i],comment[i],link[i]))
f.write('序号:{}\n电影名:{}\n评分:{}\n推荐语:{}链接:{}\n'.
format(num[i],movie_name[i],rating[i],comment[i],link[i]))
f.write('****************\n')
f.close()
|