import logging
import os
try:
os.mkdir("../data/")
finally:
pass
def write_kaggle_datasets(i):
import requests
url_src = lambda x: 'https://www.kaggle.com/datasets_v2.json?sortBy=hottest&group=public&page='+ str(x) +'&pageSize=25&size=all&filetype=all&license=all'
filename = 'dataset_split' + str(i)
chunk_size = 1024
with open("./data/" + filename, 'wb') as fd:
for chunk in requests.get(url_src(i+1)).iter_content(chunk_size):
fd.write(chunk)
fd.close()
logging.warning('已经写入一条数据')
return True
def get_json():
[write_kaggle_datasets(i) for i in range(25)]
logging.info('Hava_Done!')
import json
def get_by_index(i):
strings = open("./data/"+ "dataset_split" + str(i), "r+", encoding='utf-8').read()
res = json.loads(strings)
return res
def main():
res_data = [data for i in range(25) for data in get_by_index(i)["datasetListItems"]]
import pandas as pd
df = pd.DataFrame(res_data)
df.to_csv('./data/dataset_demo.csv')
if __name__ == '__main__':
main()
|