new json
This commit is contained in:
parent
6859643768
commit
8eb3f5e5ed
4313
data/final_wisdom.json
Normal file
4313
data/final_wisdom.json
Normal file
File diff suppressed because one or more lines are too long
3938
data/wisdom_data.json
Normal file
3938
data/wisdom_data.json
Normal file
File diff suppressed because one or more lines are too long
87
temp.py
Normal file
87
temp.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
with open('./data/final_wisdom.json', 'r' , encoding='utf-8') as f:
|
||||||
|
records = json.load(f)
|
||||||
|
|
||||||
|
# new2records = {}
|
||||||
|
# for item in records:
|
||||||
|
# key = records[item]['hekmat_number']
|
||||||
|
# new2records[key] = records[item]
|
||||||
|
|
||||||
|
# dictio = dict(sorted(new2records.items()))
|
||||||
|
# final_data = json.dumps(dictio, ensure_ascii=False, indent=4)
|
||||||
|
# with open('./data/final_wisdom2.json', 'w', encoding='utf-8') as out:
|
||||||
|
# out.write(final_data)
|
||||||
|
|
||||||
|
before_hekmat_number = 0
|
||||||
|
new_records = {}
|
||||||
|
multi = [
|
||||||
|
'http://nahj.makarem.ir/wisdom/988',
|
||||||
|
"http://nahj.makarem.ir/wisdom/1089",
|
||||||
|
"http://nahj.makarem.ir/wisdom/1107",# 3 part
|
||||||
|
"http://nahj.makarem.ir/wisdom/1113",
|
||||||
|
"http://nahj.makarem.ir/wisdom/1314",
|
||||||
|
"http://nahj.makarem.ir/wisdom/1334"
|
||||||
|
]
|
||||||
|
|
||||||
|
titles = [row['title'] for row in records]
|
||||||
|
for i, item in enumerate(records):
|
||||||
|
url = item['url']
|
||||||
|
is_subpart = item['is-subpart']
|
||||||
|
title = item['title']
|
||||||
|
|
||||||
|
if url == 'http://nahj.makarem.ir/wisdom/1070':
|
||||||
|
continue
|
||||||
|
if url in multi:
|
||||||
|
if url == "http://nahj.makarem.ir/wisdom/1107":
|
||||||
|
record_part_1 = records[i+1]
|
||||||
|
record_part_2 = records[i+2]
|
||||||
|
record_part_3 = records[i+3]
|
||||||
|
|
||||||
|
is_subpart = False
|
||||||
|
arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}\n{record_part_3['arabic_text']}'
|
||||||
|
persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}\n{record_part_3['persian_translate']}'
|
||||||
|
Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}\n{record_part_3['Interpretation']}'
|
||||||
|
|
||||||
|
print(f'url merged =>> {record_part_1['url']} into {item['url']}')
|
||||||
|
print(f'url merged =>> {record_part_2['url']} into {item['url']}')
|
||||||
|
print(f'url merged =>> {record_part_3['url']} into {item['url']}')
|
||||||
|
else:
|
||||||
|
record_part_1 = records[i+1]
|
||||||
|
record_part_2 = records[i+2]
|
||||||
|
|
||||||
|
is_subpart = False
|
||||||
|
arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}'
|
||||||
|
persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}'
|
||||||
|
Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}'
|
||||||
|
|
||||||
|
print(f'url merged =>> {record_part_1['url']} into {item['url']}')
|
||||||
|
print(f'url merged =>> {record_part_2['url']} into {item['url']}')
|
||||||
|
else:
|
||||||
|
arabic_text = item['arabic_text']
|
||||||
|
persian_translate = item['persian_translate']
|
||||||
|
Interpretation = item['Interpretation']
|
||||||
|
|
||||||
|
if is_subpart:
|
||||||
|
print(f'jump for title: {title}')
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
hekmat_number = int(title.lstrip('حکمت').strip())
|
||||||
|
new_hekmat = {
|
||||||
|
"hekmat_number": hekmat_number,
|
||||||
|
"title": title,
|
||||||
|
"is-subpart": is_subpart,
|
||||||
|
"url": url,
|
||||||
|
"arabic_text": arabic_text,
|
||||||
|
"persian_translate": persian_translate,
|
||||||
|
"Interpretation": Interpretation
|
||||||
|
}
|
||||||
|
new_records[f'hekmat-{int(hekmat_number)}'] = new_hekmat
|
||||||
|
before_hekmat_number = hekmat_number
|
||||||
|
# print(f'hekmat {before_hekmat_number} inserted')
|
||||||
|
|
||||||
|
final_data = json.dumps(new_records, ensure_ascii=False, indent=4)
|
||||||
|
with open('./data/final_wisdom.json', 'w', encoding='utf-8') as out:
|
||||||
|
out.write(final_data)
|
||||||
|
|
||||||
|
print('finished')
|
Loading…
Reference in New Issue
Block a user