keyword/new_3800_sections_convertor.py

40 lines
953 B
Python
Raw Normal View History

2025-01-20 16:24:18 +00:00
import json
# from tqdm import tqdm
import time
from funcs import save_to_file_by_address, read_file_by_address, read_from_json,write_to_json
# from pandas import read_excel
old_3800_sections = read_from_json('./data/sections_3_8K.json')
all_sections = read_from_json('./data/sections_170k_normalized.json')
new_3800_sections = []
x = 1
for section in old_3800_sections:
print(f'3.8K data: {x}')
x+=1
id = section['id']
y = 1
for item in all_sections:
if id == item['id']:
new_3800_sections.append({
'id': id,
'content': item['content'],
'topics': section['topics']
})
print(f'large_data: {y}')
print('***********************************')
break
y+=1
print('------------------------')
write_to_json(new_3800_sections, './data/new_3800_sections.json')
print('finished!')