67 lines
1.5 KiB
Python
67 lines
1.5 KiB
Python
# بسم الله
|
|
|
|
|
|
import json
|
|
from elastic_helper import ElasticHelper
|
|
|
|
|
|
|
|
Read = open (".\data\DATASET140402_no_arefـoutput.json","r",encoding='utf8')
|
|
RefList = json.loads(Read.read())
|
|
path = ".\data\mj_qa_section-v02.zip"
|
|
eh_obj = ElasticHelper()
|
|
sections = eh_obj.iterateJsonFile(path, True)
|
|
|
|
all_ref_list = []
|
|
find_refs_list = []
|
|
not_find_refs_list = []
|
|
|
|
for index, item in enumerate(sections):
|
|
ref_id = item['id']
|
|
source = item['source']
|
|
content = source['content']
|
|
all_ref_list.append([ref_id,content.strip()])
|
|
|
|
|
|
|
|
n=1
|
|
for item in RefList :
|
|
refID2 , Content2 , ner_list = item['id'],item['content'].strip(),item['ner']
|
|
x=0
|
|
for refID1 , Content in all_ref_list:
|
|
|
|
if len(ner_list)==0:
|
|
x=1
|
|
continue
|
|
else:
|
|
if Content2 == Content and x == 0:
|
|
find_refs_list.append([refID1,refID2,Content])
|
|
print(f"REF ID {refID2} Found ! ... ")
|
|
x = 1
|
|
|
|
if x == 0:
|
|
not_find_refs_list.append(refID2)
|
|
|
|
print(f"{n} OF {len(RefList)} searched ...")
|
|
n+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open("foundfind_refs_list.json", "w" , encoding="utf8") as f:
|
|
json.dump(find_refs_list, f, indent=4, ensure_ascii=False )
|
|
|
|
|
|
txt=''
|
|
for id_ in not_find_refs_list:
|
|
txt+=f"{id_}\n"
|
|
|
|
with open("not_found_ids.txt", "w",encoding="utf8") as file:
|
|
# نوشتن دادهها در فایل
|
|
file.write(txt)
|
|
|
|
|
|
print("finish!")
|
|
|