import json with open('./data/final_wisdom.json', 'r' , encoding='utf-8') as f: records = json.load(f) # new2records = {} # for item in records: # key = records[item]['hekmat_number'] # new2records[key] = records[item] # dictio = dict(sorted(new2records.items())) # final_data = json.dumps(dictio, ensure_ascii=False, indent=4) # with open('./data/final_wisdom2.json', 'w', encoding='utf-8') as out: # out.write(final_data) before_hekmat_number = 0 new_records = {} multi = [ 'http://nahj.makarem.ir/wisdom/988', "http://nahj.makarem.ir/wisdom/1089", "http://nahj.makarem.ir/wisdom/1107",# 3 part "http://nahj.makarem.ir/wisdom/1113", "http://nahj.makarem.ir/wisdom/1314", "http://nahj.makarem.ir/wisdom/1334" ] titles = [row['title'] for row in records] for i, item in enumerate(records): url = item['url'] is_subpart = item['is-subpart'] title = item['title'] if url == 'http://nahj.makarem.ir/wisdom/1070': continue if url in multi: if url == "http://nahj.makarem.ir/wisdom/1107": record_part_1 = records[i+1] record_part_2 = records[i+2] record_part_3 = records[i+3] is_subpart = False arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}\n{record_part_3['arabic_text']}' persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}\n{record_part_3['persian_translate']}' Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}\n{record_part_3['Interpretation']}' print(f'url merged =>> {record_part_1['url']} into {item['url']}') print(f'url merged =>> {record_part_2['url']} into {item['url']}') print(f'url merged =>> {record_part_3['url']} into {item['url']}') else: record_part_1 = records[i+1] record_part_2 = records[i+2] is_subpart = False arabic_text = f'{item['arabic_text']}\n{record_part_1['arabic_text']}\n{record_part_2['arabic_text']}' persian_translate = f'{item['persian_translate']}\n{record_part_1['persian_translate']}\n{record_part_2['persian_translate']}' Interpretation = f'{item['Interpretation']}\n{record_part_1['Interpretation']}\n{record_part_2['Interpretation']}' print(f'url merged =>> {record_part_1['url']} into {item['url']}') print(f'url merged =>> {record_part_2['url']} into {item['url']}') else: arabic_text = item['arabic_text'] persian_translate = item['persian_translate'] Interpretation = item['Interpretation'] if is_subpart: print(f'jump for title: {title}') continue else: hekmat_number = int(title.lstrip('حکمت').strip()) new_hekmat = { "hekmat_number": hekmat_number, "title": title, "is-subpart": is_subpart, "url": url, "arabic_text": arabic_text, "persian_translate": persian_translate, "Interpretation": Interpretation } new_records[f'hekmat-{int(hekmat_number)}'] = new_hekmat before_hekmat_number = hekmat_number # print(f'hekmat {before_hekmat_number} inserted') final_data = json.dumps(new_records, ensure_ascii=False, indent=4) with open('./data/final_wisdom.json', 'w', encoding='utf-8') as out: out.write(final_data) print('finished')