ai_dataset/main_qa_data/main_sections_temp.py

from elasticsearch7 import Elasticsearch
from general_functions import normalize_content
from funcs import read_from_json, write_to_json, save_to_file_by_address, read_file_by_address
import os
import json

base_address = os.getcwd() + "/main_qa_data"

json_address_85sections = base_address + "/data/qa_85_sections.json"
topiced_sections_15k_address = base_address + "/data/topiced_sections_15k.txt"
# dataids = read_from_json(dest_address)

datalist_85 = read_from_json(json_address_85sections)

data_15k_text = read_file_by_address(topiced_sections_15k_address)
data_15k = data_15k_text.splitlines()
final_text = ''

count = 0  

sections_85 = []
c = 1
for sec in data_15k:
    section = json.loads(sec)
    id1 = section['id']
    for line in datalist_85:
        if line["id"] == id1:
            content = section['content']
            print(c)
            c += 1
            sections_85.append({
                            "id": id1,
                            "content": content,
                            })
            break
            
new_json_address_85sections = base_address + "/data/qa_sections_85.json"
write_to_json(sections_85, new_json_address_85sections)  

print('finished')
انتقال به گیت 2 2024-09-17 16:57:26 +00:00			`from elasticsearch7 import Elasticsearch`
			`from general_functions import normalize_content`
			`from funcs import read_from_json, write_to_json, save_to_file_by_address, read_file_by_address`
			`import os`
			`import json`

			`base_address = os.getcwd() + "/main_qa_data"`

			`json_address_85sections = base_address + "/data/qa_85_sections.json"`
			`topiced_sections_15k_address = base_address + "/data/topiced_sections_15k.txt"`
			`# dataids = read_from_json(dest_address)`

			`datalist_85 = read_from_json(json_address_85sections)`

			`data_15k_text = read_file_by_address(topiced_sections_15k_address)`
			`data_15k = data_15k_text.splitlines()`
			`final_text = ''`

			`count = 0`

			`sections_85 = []`
			`c = 1`
			`for sec in data_15k:`
			`section = json.loads(sec)`
			`id1 = section['id']`
			`for line in datalist_85:`
			`if line["id"] == id1:`
			`content = section['content']`
			`print(c)`
			`c += 1`
			`sections_85.append({`
			`"id": id1,`
			`"content": content,`
			`})`
			`break`

			`new_json_address_85sections = base_address + "/data/qa_sections_85.json"`
			`write_to_json(sections_85, new_json_address_85sections)`

			`print('finished')`