119 lines
3.5 KiB
Python
119 lines
3.5 KiB
Python
# بسم الله
|
||
|
||
|
||
|
||
import json
|
||
|
||
|
||
"""
|
||
این سورس خروجی سورس convert_sentence_to_part رو به عنوان ورودی دریافت میکنه
|
||
و پارت های نهج البلاغه رو به صورت یک (حکمت یا خطبه یا نامه) درمیاره که هرکدوم یک key شامل پاراگراف ها دارن
|
||
"""
|
||
|
||
|
||
nahj_parts_file = "./nahj_data/all_nahj_parts.json"
|
||
|
||
# --- 1. بارگذاری دادهها از JSON ---
|
||
with open(nahj_parts_file, 'r', encoding='utf-8') as f:
|
||
all_parts = json.load(f)
|
||
|
||
|
||
id_ = all_parts[0]['context_id']
|
||
paragraph_id = all_parts[0]['part_id']
|
||
number = all_parts[0]['number']
|
||
paragraph_order = all_parts[0]['part_order']
|
||
url = all_parts[0]['url']
|
||
interpretation_link = all_parts[0]['interpretation_link']
|
||
title = all_parts[0]['title']
|
||
large_title = all_parts[0]['large_title']
|
||
text = all_parts[0]['part_text']
|
||
arabic_text = all_parts[0]['arabic_text']
|
||
type_ = all_parts[0]['type']
|
||
|
||
first_try = True
|
||
paragraph_list = []
|
||
final_list = []
|
||
for part in all_parts:
|
||
if first_try == True:
|
||
first_try = False
|
||
paragraph_list.append({
|
||
"paragraph_id":paragraph_id,
|
||
"number": number,
|
||
"paragraph_order":paragraph_order,
|
||
"large_title":large_title,
|
||
"text":text,
|
||
"arabic_text":arabic_text
|
||
})
|
||
continue
|
||
|
||
if part['number'] == 480:
|
||
pass
|
||
|
||
if part['number'] == number and \
|
||
part['type'] == type_ :
|
||
|
||
|
||
paragraph_list.append({
|
||
"paragraph_id":part['part_id'],
|
||
"number": part['number'],
|
||
"paragraph_order":part['part_order'],
|
||
"large_title":part['large_title'],
|
||
"text":part['part_text'],
|
||
"arabic_text":part['arabic_text']
|
||
})
|
||
|
||
else:
|
||
final_list.append({
|
||
"id":id_,
|
||
"url":url,
|
||
"interpretation_link":interpretation_link,
|
||
"title":title,
|
||
"large_title":large_title,
|
||
"type":type_,
|
||
"paragraphs":paragraph_list
|
||
})
|
||
|
||
paragraph_list = [{
|
||
"paragraph_id":part['part_id'],
|
||
"number": part['number'],
|
||
"paragraph_order":part['part_order'],
|
||
"large_title":part['large_title'],
|
||
"text":part['part_text'],
|
||
"arabic_text":part['arabic_text']
|
||
}]
|
||
|
||
id_ = part['context_id']
|
||
paragraph_id = part['part_id']
|
||
number = part['number']
|
||
paragraph_order = part['part_order']
|
||
url = part['url']
|
||
interpretation_link = part['interpretation_link']
|
||
title = part['title']
|
||
large_title = part['large_title']
|
||
text = part['part_text']
|
||
arabic_text = part['arabic_text']
|
||
type_ = part['type']
|
||
|
||
|
||
|
||
final_list.append({
|
||
"id":id_,
|
||
"url":url,
|
||
"interpretation_link":interpretation_link,
|
||
"title":title,
|
||
"large_title":large_title,
|
||
"type":type_,
|
||
"paragraphs":paragraph_list
|
||
})
|
||
|
||
|
||
output_file_path = "./nahj_data/all_nahj_CONTEXT.json"
|
||
|
||
with open(output_file_path, 'w', encoding='utf-8') as f:
|
||
json.dump(final_list, f, ensure_ascii=False, indent=2)
|
||
|
||
|
||
|
||
|
||
|