nahj/temp.py
2025-08-17 20:08:45 +03:30

87 lines
3.6 KiB
Python

import json
with open('./data/final_wisdom.json', 'r' , encoding='utf-8') as f:
records = json.load(f)
# new2records = {}
# for item in records:
# key = records[item]['hekmat_number']
# new2records[key] = records[item]
# dictio = dict(sorted(new2records.items()))
# final_data = json.dumps(dictio, ensure_ascii=False, indent=4)
# with open('./data/final_wisdom2.json', 'w', encoding='utf-8') as out:
# out.write(final_data)
before_hekmat_number = 0
new_records = {}
multi = [
'http://nahj.makarem.ir/wisdom/988',
"http://nahj.makarem.ir/wisdom/1089",
"http://nahj.makarem.ir/wisdom/1107",# 3 part
"http://nahj.makarem.ir/wisdom/1113",
"http://nahj.makarem.ir/wisdom/1314",
"http://nahj.makarem.ir/wisdom/1334"
]
titles = [row['title'] for row in records]
for i, item in enumerate(records):
url = item['url']
is_subpart = item['is-subpart']
title = item['title']
if url == 'http://nahj.makarem.ir/wisdom/1070':
continue
if url in multi:
if url == "http://nahj.makarem.ir/wisdom/1107":
record_part_1 = records[i+1]
record_part_2 = records[i+2]
record_part_3 = records[i+3]
is_subpart = False
arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}\n{record_part_3['arabic_text']}'
persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}\n{record_part_3['persian_translate']}'
Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}\n{record_part_3['Interpretation']}'
print(f'url merged =>> {record_part_1['url']} into {item['url']}')
print(f'url merged =>> {record_part_2['url']} into {item['url']}')
print(f'url merged =>> {record_part_3['url']} into {item['url']}')
else:
record_part_1 = records[i+1]
record_part_2 = records[i+2]
is_subpart = False
arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}'
persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}'
Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}'
print(f'url merged =>> {record_part_1['url']} into {item['url']}')
print(f'url merged =>> {record_part_2['url']} into {item['url']}')
else:
arabic_text = item['arabic_text']
persian_translate = item['persian_translate']
Interpretation = item['Interpretation']
if is_subpart:
print(f'jump for title: {title}')
continue
else:
hekmat_number = int(title.lstrip('حکمت').strip())
new_hekmat = {
"hekmat_number": hekmat_number,
"title": title,
"is-subpart": is_subpart,
"url": url,
"arabic_text": arabic_text,
"persian_translate": persian_translate,
"Interpretation": Interpretation
}
new_records[f'hekmat-{int(hekmat_number)}'] = new_hekmat
before_hekmat_number = hekmat_number
# print(f'hekmat {before_hekmat_number} inserted')
final_data = json.dumps(new_records, ensure_ascii=False, indent=4)
with open('./data/final_wisdom.json', 'w', encoding='utf-8') as out:
out.write(final_data)
print('finished')