40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
|
from elasticsearch7 import Elasticsearch
|
||
|
from general_functions import normalize_content
|
||
|
from funcs import read_from_json, write_to_json, save_to_file_by_address, read_file_by_address
|
||
|
import os
|
||
|
import json
|
||
|
|
||
|
base_address = os.getcwd() + "/main_qa_data"
|
||
|
|
||
|
json_address_85sections = base_address + "/data/qa_85_sections.json"
|
||
|
topiced_sections_15k_address = base_address + "/data/topiced_sections_15k.txt"
|
||
|
# dataids = read_from_json(dest_address)
|
||
|
|
||
|
datalist_85 = read_from_json(json_address_85sections)
|
||
|
|
||
|
data_15k_text = read_file_by_address(topiced_sections_15k_address)
|
||
|
data_15k = data_15k_text.splitlines()
|
||
|
final_text = ''
|
||
|
|
||
|
count = 0
|
||
|
|
||
|
sections_85 = []
|
||
|
c = 1
|
||
|
for sec in data_15k:
|
||
|
section = json.loads(sec)
|
||
|
id1 = section['id']
|
||
|
for line in datalist_85:
|
||
|
if line["id"] == id1:
|
||
|
content = section['content']
|
||
|
print(c)
|
||
|
c += 1
|
||
|
sections_85.append({
|
||
|
"id": id1,
|
||
|
"content": content,
|
||
|
})
|
||
|
break
|
||
|
|
||
|
new_json_address_85sections = base_address + "/data/qa_sections_85.json"
|
||
|
write_to_json(sections_85, new_json_address_85sections)
|
||
|
|
||
|
print('finished')
|