import json # from tqdm import tqdm import time from funcs import save_to_file_by_address, read_file_by_address, read_from_json,write_to_json # from pandas import read_excel old_3800_sections = read_from_json('./data/sections_3_8K.json') all_sections = read_from_json('./data/sections_170k_normalized.json') new_3800_sections = [] x = 1 for section in old_3800_sections: print(f'3.8K data: {x}') x+=1 id = section['id'] y = 1 for item in all_sections: if id == item['id']: new_3800_sections.append({ 'id': id, 'content': item['content'], 'topics': section['topics'] }) print(f'large_data: {y}') print('***********************************') break y+=1 print('------------------------') write_to_json(new_3800_sections, './data/new_3800_sections.json') print('finished!')