new json
This commit is contained in:
parent
6859643768
commit
8eb3f5e5ed
4313
data/final_wisdom.json
Normal file
4313
data/final_wisdom.json
Normal file
File diff suppressed because one or more lines are too long
3938
data/wisdom_data.json
Normal file
3938
data/wisdom_data.json
Normal file
File diff suppressed because one or more lines are too long
87
temp.py
Normal file
87
temp.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
import json
|
||||
|
||||
with open('./data/final_wisdom.json', 'r' , encoding='utf-8') as f:
|
||||
records = json.load(f)
|
||||
|
||||
# new2records = {}
|
||||
# for item in records:
|
||||
# key = records[item]['hekmat_number']
|
||||
# new2records[key] = records[item]
|
||||
|
||||
# dictio = dict(sorted(new2records.items()))
|
||||
# final_data = json.dumps(dictio, ensure_ascii=False, indent=4)
|
||||
# with open('./data/final_wisdom2.json', 'w', encoding='utf-8') as out:
|
||||
# out.write(final_data)
|
||||
|
||||
before_hekmat_number = 0
|
||||
new_records = {}
|
||||
multi = [
|
||||
'http://nahj.makarem.ir/wisdom/988',
|
||||
"http://nahj.makarem.ir/wisdom/1089",
|
||||
"http://nahj.makarem.ir/wisdom/1107",# 3 part
|
||||
"http://nahj.makarem.ir/wisdom/1113",
|
||||
"http://nahj.makarem.ir/wisdom/1314",
|
||||
"http://nahj.makarem.ir/wisdom/1334"
|
||||
]
|
||||
|
||||
titles = [row['title'] for row in records]
|
||||
for i, item in enumerate(records):
|
||||
url = item['url']
|
||||
is_subpart = item['is-subpart']
|
||||
title = item['title']
|
||||
|
||||
if url == 'http://nahj.makarem.ir/wisdom/1070':
|
||||
continue
|
||||
if url in multi:
|
||||
if url == "http://nahj.makarem.ir/wisdom/1107":
|
||||
record_part_1 = records[i+1]
|
||||
record_part_2 = records[i+2]
|
||||
record_part_3 = records[i+3]
|
||||
|
||||
is_subpart = False
|
||||
arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}\n{record_part_3['arabic_text']}'
|
||||
persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}\n{record_part_3['persian_translate']}'
|
||||
Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}\n{record_part_3['Interpretation']}'
|
||||
|
||||
print(f'url merged =>> {record_part_1['url']} into {item['url']}')
|
||||
print(f'url merged =>> {record_part_2['url']} into {item['url']}')
|
||||
print(f'url merged =>> {record_part_3['url']} into {item['url']}')
|
||||
else:
|
||||
record_part_1 = records[i+1]
|
||||
record_part_2 = records[i+2]
|
||||
|
||||
is_subpart = False
|
||||
arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}'
|
||||
persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}'
|
||||
Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}'
|
||||
|
||||
print(f'url merged =>> {record_part_1['url']} into {item['url']}')
|
||||
print(f'url merged =>> {record_part_2['url']} into {item['url']}')
|
||||
else:
|
||||
arabic_text = item['arabic_text']
|
||||
persian_translate = item['persian_translate']
|
||||
Interpretation = item['Interpretation']
|
||||
|
||||
if is_subpart:
|
||||
print(f'jump for title: {title}')
|
||||
continue
|
||||
else:
|
||||
hekmat_number = int(title.lstrip('حکمت').strip())
|
||||
new_hekmat = {
|
||||
"hekmat_number": hekmat_number,
|
||||
"title": title,
|
||||
"is-subpart": is_subpart,
|
||||
"url": url,
|
||||
"arabic_text": arabic_text,
|
||||
"persian_translate": persian_translate,
|
||||
"Interpretation": Interpretation
|
||||
}
|
||||
new_records[f'hekmat-{int(hekmat_number)}'] = new_hekmat
|
||||
before_hekmat_number = hekmat_number
|
||||
# print(f'hekmat {before_hekmat_number} inserted')
|
||||
|
||||
final_data = json.dumps(new_records, ensure_ascii=False, indent=4)
|
||||
with open('./data/final_wisdom.json', 'w', encoding='utf-8') as out:
|
||||
out.write(final_data)
|
||||
|
||||
print('finished')
|
Loading…
Reference in New Issue
Block a user