import re import os import json from pandas import read_excel, DataFrame def remove_signs(): str = read_file() # lines = pattern = r"\(|\)" str = re.sub(pattern,'', str) # str = re.sub(')','', str) # str = re.sub('/','', str) return str def read_file(): with open('./data/DATASET_2.txt', 'r', encoding='utf-8') as file: text = '' try: text = str(file.read()) except: pass return text def read_file_by_address(file_address): with open(file_address, 'r', encoding='utf-8') as file: text = '' try: text = str(file.read()) except: pass return text def save_to_file(result): with open('./data/DATASET_3.txt', 'a+', encoding='utf-8') as file: previous_result = '' try: previous_result = file.read() except: pass file.write(result) file.close() def save_to_file_by_address(file_address, text): with open(file_address, 'a+', encoding='utf-8') as file: previous_result = '' try: previous_result = file.read() except: pass file.write(text) file.close() def read_from_excel(file_address, column_name): # خواندن فایل اکسل data = read_excel(file_address) # استخراج محتوای ستون مورد نظر column_data = data[column_name] return column_data def add_columndata_to_excel(file_address, column_name, columndata): # خواندن فایل اکسل data = read_excel(file_address) # اضافه کردن ستون جدید به داده‌ها data[column_name] = columndata # ذخیره کردن داده‌ها در فایل اکسل data.to_excel(file_address, index=False) def write_to_excel(data_dict, file_name_and_address): df = DataFrame(data_dict) # ذخیره DataFrame به عنوان فایل اکسل df.to_excel(file_name_and_address, index=False) return True def write_to_json(dict, file_address): # تبدیل دیکشنری به فرمت JSON json_data = json.dumps(dict, indent=2, ensure_ascii=False) # ذخیره فایل with open(file_address, 'w+', encoding='utf-8') as file: file.write(json_data) return True def read_from_json(file_address): data_dict = [] # خواندن اطلاعات از فایل JSON with open(file_address, 'r', encoding='utf-8') as file: loaded_data = json.load(file) # نمایش اطلاعات خوانده شده for item in loaded_data: data_dict.append(item) return data_dict def separated_date_format_finder(date_ner): result = False date_ner = date_ner.replace('.','/') date_ner = date_ner.replace('،','/') date_ner = date_ner.replace('ر','/') #date_pattern = r'\d{1,2} /\d{1,2} /\d{2,4}|\d{1,2}/\d{1,2}/\d{2,4}|\d{2,4} /\d{1,2} /\d{1,2}|\d{2,4}/\d{1,2}/\d{1,2}' date_pattern = r'\b(?:(?:1[0-2]|0?[1-9])/?(?:3[01]|[12][0-9]|0?[1-9])/?(?:14[0-7][0-9]|13[0-9][0-9]|128[0-9])|(?:1[0-2]|0?[1-9])/?(?:3[01]|[12][0-9]|0?[1-9])/?(?:14[0-7][0-9]|13[0-9][0-9]|128[0-9]|[0-9]{2}))\b' regex = re.compile(date_pattern) match_dates = regex.finditer(date_ner) for date_item in match_dates: result = True break return result if __name__ == "__main__": pass