40 lines
953 B
Python
40 lines
953 B
Python
|
import json
|
||
|
# from tqdm import tqdm
|
||
|
import time
|
||
|
|
||
|
from funcs import save_to_file_by_address, read_file_by_address, read_from_json,write_to_json
|
||
|
# from pandas import read_excel
|
||
|
|
||
|
old_3800_sections = read_from_json('./data/sections_3_8K.json')
|
||
|
|
||
|
all_sections = read_from_json('./data/sections_170k_normalized.json')
|
||
|
|
||
|
new_3800_sections = []
|
||
|
x = 1
|
||
|
for section in old_3800_sections:
|
||
|
print(f'3.8K data: {x}')
|
||
|
x+=1
|
||
|
id = section['id']
|
||
|
y = 1
|
||
|
for item in all_sections:
|
||
|
|
||
|
|
||
|
if id == item['id']:
|
||
|
new_3800_sections.append({
|
||
|
'id': id,
|
||
|
'content': item['content'],
|
||
|
'topics': section['topics']
|
||
|
})
|
||
|
print(f'large_data: {y}')
|
||
|
print('***********************************')
|
||
|
break
|
||
|
y+=1
|
||
|
|
||
|
print('------------------------')
|
||
|
|
||
|
|
||
|
write_to_json(new_3800_sections, './data/new_3800_sections.json')
|
||
|
print('finished!')
|
||
|
|
||
|
|