ai_dataset/main_qa_data/main_sections_temp.py

from elasticsearch7 import Elasticsearch
from general_functions import normalize_content
from funcs import read_from_json, write_to_json, save_to_file_by_address, read_file_by_address
import os
import json

base_address = os.getcwd() + "/main_qa_data"

json_address_85sections = base_address + "/data/qa_85_sections.json"
topiced_sections_15k_address = base_address + "/data/topiced_sections_15k.txt"
# dataids = read_from_json(dest_address)

datalist_85 = read_from_json(json_address_85sections)

data_15k_text = read_file_by_address(topiced_sections_15k_address)
data_15k = data_15k_text.splitlines()
final_text = ''

count = 0

sections_85 = []
c = 1
for sec in data_15k:
    section = json.loads(sec)
    id1 = section['id']
    for line in datalist_85:
        if line["id"] == id1:
            content = section['content']
            print(c)
            c += 1
            sections_85.append({
                            "id": id1,
                            "content": content,
                            })
            break

new_json_address_85sections = base_address + "/data/qa_sections_85.json"
write_to_json(sections_85, new_json_address_85sections)

print('finished')