import re import os import json from pandas import read_excel def remove_signs(): str = read_file() # lines = pattern = r"\(|\)" str = re.sub(pattern,'', str) # str = re.sub(')','', str) # str = re.sub('/','', str) return str def read_file(): with open('./data/DATASET_2.txt', 'r', encoding='utf-8') as file: text = '' try: text = str(file.read()) except: pass return text def read_file_by_address(file_address): with open(file_address, 'r', encoding='utf-8') as file: text = '' try: text = str(file.read()) except: pass return text def save_to_file(result): with open('./data/DATASET_3.txt', 'a+', encoding='utf-8') as file: previous_result = '' try: previous_result = file.read() except: pass file.write(result) file.close() def save_to_file_by_address(file_address, text): with open(file_address, 'a+', encoding='utf-8') as file: previous_result = '' try: previous_result = file.read() except: pass file.write(text) file.close() def read_from_excel(file_address, column_name): # خواندن فایل اکسل data = read_excel(file_address) # استخراج محتوای ستون مورد نظر column_data = data[column_name] return column_data def add_columndata_to_excel(file_address, column_name, columndata): # خواندن فایل اکسل data = read_excel(file_address) # اضافه کردن ستون جدید به داده‌ها data[column_name] = columndata # ذخیره کردن داده‌ها در فایل اکسل data.to_excel(file_address, index=False) def write_to_json(dict, file_address): # تبدیل دیکشنری به فرمت JSON json_data = json.dumps(dict, indent=2, ensure_ascii=False) # ذخیره فایل with open(file_address, 'a+', encoding='utf-8') as file: file.write(json_data) def read_from_json(file_address): data_dict = [] # خواندن اطلاعات از فایل JSON with open(file_address, 'r', encoding='utf-8') as file: loaded_data = json.load(file) # نمایش اطلاعات خوانده شده for item in loaded_data: data_dict.append(item) return data_dict def separated_date_format_finder(date_ner): result = False date_ner = date_ner.replace('.','/') date_ner = date_ner.replace('،','/') date_ner = date_ner.replace('ر','/') #date_pattern = r'\d{1,2} /\d{1,2} /\d{2,4}|\d{1,2}/\d{1,2}/\d{2,4}|\d{2,4} /\d{1,2} /\d{1,2}|\d{2,4}/\d{1,2}/\d{1,2}' date_pattern = r'\b(?:(?:1[0-2]|0?[1-9])/?(?:3[01]|[12][0-9]|0?[1-9])/?(?:14[0-7][0-9]|13[0-9][0-9]|128[0-9])|(?:1[0-2]|0?[1-9])/?(?:3[01]|[12][0-9]|0?[1-9])/?(?:14[0-7][0-9]|13[0-9][0-9]|128[0-9]|[0-9]{2}))\b' regex = re.compile(date_pattern) match_dates = regex.finditer(date_ner) for date_item in match_dates: result = True break return result # dataset = remove_signs() # save_to_file(dataset) if __name__ == "__main__": # file_address = "./Flair_NER/data/test_sections.json" # column_name = "content" # data = read_from_excel(file_address, column_name) # sections_text = [] # sections = [] # dest_address = "./Flair_NER/data/test_sections.json" # for item in data: # sections.append({"content" : item, "real_keywords":"", "ai_keywords":""}) # write_to_json(sections, dest_address) # for section in sections: # write_to_json(section, dest_address) # data_rows = read_from_json(file_address) # print(len(data_rows)) # for data in data_rows: # h = data pass