llama/convert_json_to_text.py
2025-07-13 19:05:59 +03:30

63 lines
2.1 KiB
Python

""" این روال جهت تبدیل فایل جیسون حاوی قوانین بازنویسی شده به یک متن ایجاد شده است """
from html import escape
from datetime import datetime
import time
import os
from funcs import read_from_json, save_to_file_by_address
counter = 0
total = 0
remained = 0
id = ''
if __name__ == "__main__":
base_address = os.getcwd()
#base_address = "/home/gpu/tnlp/jokar/llama" # terminal
sections_110_address = base_address + "/data/simplized_sentences_110_2.json"
datalist_110 = read_from_json(sections_110_address)
# import numpy as np
# np_sections = np.array(datalist_110)
start_time = time.time()
finall_text = ""
try:
for i , line in enumerate(datalist_110):
content_texts = ''
id = line["id"]
qanon_id = line["qanon_id"]
content_list = line["result"]
for item in content_list:
text = item.replace("*", "")
text = text.strip()
if text.endswith(" .."):
text = text.replace(" ..", ". ")
elif text.endswith(".."):
text = text.replace("..", ".\n")
content_texts += "".join(text)
finall_text += "".join(f"{content_texts}\nid:[{id}]\n\n")
print(i+1)
destination_address = base_address + "/data/simplized_sentences_110_02.txt"
destination_address2 = base_address + "/data/address_link.txt"
save_to_file_by_address(destination_address, finall_text)
except Exception as inst:
print(type(inst)) # the exception type
print(inst.args) # arguments stored in .args
print(inst) # __str__ allows args to be printed directly,
# but may be overridden in exception subclasses
print("Exception:=> %s -> %.2f " % (id , counter / total))
end_time = time.time()
print(f"elapsed time: {end_time-start_time}")
print(" *** finished! *** ")