from elasticsearch7 import Elasticsearch from general_functions import normalize_content from funcs import read_from_json, write_to_json, save_to_file_by_address, read_file_by_address import os import json base_address = os.getcwd() + "/main_qa_data" json_address_85sections = base_address + "/data/qa_85_sections.json" topiced_sections_15k_address = base_address + "/data/topiced_sections_15k.txt" # dataids = read_from_json(dest_address) datalist_85 = read_from_json(json_address_85sections) data_15k_text = read_file_by_address(topiced_sections_15k_address) data_15k = data_15k_text.splitlines() final_text = '' count = 0 sections_85 = [] c = 1 for sec in data_15k: section = json.loads(sec) id1 = section['id'] for line in datalist_85: if line["id"] == id1: content = section['content'] print(c) c += 1 sections_85.append({ "id": id1, "content": content, }) break new_json_address_85sections = base_address + "/data/qa_sections_85.json" write_to_json(sections_85, new_json_address_85sections) print('finished')