majles_refs/base_process.py
2025-05-21 21:04:05 +03:30

57 lines
1.8 KiB
Python

import pandas as pd
from text_normalizer import normalizer
import json
"""word_enter_charachters = {"^p","'^l'"}"""
def read_excel_file(file_path):
"""
Reads an Excel file and returns a list of dictionaries containing the data.
"""
data = pd.read_excel(file_path)
bime_json_list = []
count = 0
for row in data.itertuples():
if count == 102:
pass
radif = normalizer.normalize_qanon(row[1]).replace('.','')
if radif == "":
radif = bime_json_list[count-1]['radif']
qanon_title = normalizer.normalize_qanon(row[2]).replace('.','').replace('#!#',' ').strip()
if qanon_title == "":
qanon_title = bime_json_list[count-1]['qanon_title']
related = normalizer.normalize_qanon(row[3]).replace('.','')
if related == "":
related = bime_json_list[count-1]['related']
row_structure = {
"id": row[0],
"radif":radif,
"qanon_title": qanon_title,
"related": related,
"status": normalizer.normalize_qanon(row[4]).replace('#!#',''),
"reasons": normalizer.normalize_qanon(row[5]).replace('#!#','\n'),
"description": normalizer.normalize_qanon(row[6]),
}
bime_json_list.append(row_structure)
count += 1
return bime_json_list
def write_to_json(data_dict, output_path):
"""
Writes data to a JSON file.
"""
with open(output_path, 'w', encoding='utf-8') as json_file:
json.dump(data_dict, json_file, indent=4, ensure_ascii=False)
return True
if __name__ == '__main__':
data_dict = read_excel_file('./data/bime.xlsx')
write_to_json(data_dict, './data/bime.json')
print('Done!')