ff = open('1.txt','a')#打开文件
if n%10 == 0:#判断,进行多输入一个换行符号,为了美观!
ff.write(titles[w]+' '+str(nn[w])+' \n\n')#写入你想写入的列表元素!
else:
ff.write(titles[w]+' '+str(nn[w])+' \n')#输入一个换行符号,为了美观!
ff.close()#关闭文件!
n += 1#不能一直换行呀,找个变量来!
其他的就是一些正则表达式啦,不会的就直接使用吧!
代码:
import requests
import re
sum = 0
pages = 10
x = 1
nn = []
mm = []
base_url = "https://blog.csdn.net/weixin_42859280/article/list/"
print('\n-------------------------下面是原创的-------------------------\n')
for x in range(pages):
w = 0
r = requests.get(base_url+str(x+1)+'?t=1&orderby=ViewCount')
titles = re.findall(r'<span class="article-type type-.*?">\n.*?</span>\n(.*?)</a>', r.content.decode(), re.MULTILINE)
visits = re.findall( r'<span class="read-num">阅读数:(.*?)</span>', r.content.decode())
mm = re.findall( r'<span class="read-num">评论数:(.*?)</span>', r.content.decode())
nn = [int(x) for x in visits] #将阅读数转换为数字
nn = nn[1:]
mm = mm[1:]
n = 1
for x, y, z in zip(titles, nn,mm):
ff = open('1.txt','a')
if n%10 == 0:
ff.write(titles[w]+' 阅读数:'+str(nn[w])+' 评论数:'+mm[w]+' \n\n')
else:
ff.write(titles[w]+' 阅读数:'+str(nn[w])+' 评论数:'+mm[w]+' \n')
ff.close()
n += 1
#if int(nn[w]) > 1000:#可以进行筛选输出!
#if int(mm[w]) > 0:#可以进行筛选输出!
print(titles[w]+' 阅读数:'+str(nn[w])+' 评论数:'+mm[w])
sum += int(nn[w])
w+=1
print('\n-------------------------下面是转载的-------------------------\n')
pages = 11
x = 1
nn = []
mm = []
base_url = "https://blog.csdn.net/weixin_42859280/article/list/"
for x in range(pages):
w = 0
r = requests.get(base_url+str(x+1)+'?t=2&orderby=ViewCount')
titles = re.findall(r'<span class="article-type type-.*?">\n.*?</span>\n(.*?)</a>', r.content.decode(), re.MULTILINE)
visits = re.findall( r'<span class="read-num">阅读数:(.*?)</span>', r.content.decode())
mm = re.findall( r'<span class="read-num">评论数:(.*?)</span>', r.content.decode())
nn = [int(x) for x in visits] #将阅读数转换为数字
nn = nn[1:]
mm = mm[1:]
n = 1
for x, y, z in zip(titles, nn,mm):
ff = open('2.txt','a')
if n%10 == 0:
ff.write(titles[w]+' 阅读数:'+str(nn[w])+' 评论数:'+mm[w]+' \n\n')
else:
ff.write(titles[w]+' 阅读数:'+str(nn[w])+' 评论数:'+mm[w]+' \n')
ff.close()
n += 1
#if int(nn[w]) > 1000:#可以进行筛选输出!
#if int(mm[w]) > 0:#可以进行筛选输出!
print(titles[w]+' 阅读数:'+str(nn[w])+' 评论数:'+mm[w])
sum += int(nn[w])
w+=1
print("总阅读量:"+str(sum))