LEARNING_RATE = 0.65e-4 # 0.65e-4 - 0.4e-4 MINI_BATCH_SIZE = 8 MAX_EPOCHS = 100 import datetime from pathlib import Path from flair.data import Corpus from flair.datasets import ColumnCorpus from flair.embeddings import TransformerWordEmbeddings from flair.models import SequenceTagger from flair.trainers import ModelTrainer from flair.models import SequenceTagger from flair.embeddings import TransformerDocumentEmbeddings def save_to_file_by_address(file_address, content): with open(file_address, 'a+', encoding='utf-8') as file: file.write(content) def digit_correct(input_num): if input_num <10: return f'0{input_num}' return str(input_num) def main_train(model): """ آموزش مدل برای تسک NER Args: model(str): نام مدلی که قرار است آموزش داده شود """ time = datetime.datetime.now() # ایجاد فرمتی برای نام مدل نهایی که با کمک تاریخ روز ساخته می شود model_title = f"{time.year}-{digit_correct(time.month)}-{digit_correct(time.day)}--{digit_correct(time.hour)}-{digit_correct(time.minute)}-{digit_correct(time.second)}--{model}".replace('/','--') print(f'\nMODEL:: {model}\n') # define dataset columns columns = {0 : 'text', 1 : 'ner'} # directory of data files data_folder = './data/' #آدرس فایل دیتاست اینجا تنظیم شود corpus = ColumnCorpus(data_folder, columns, #train_file = 'peyma_train.txt') train_file = 'DATASET140402_no_aref2.txt', # qavanin 36K tokens # test_file = 'test_ds_new.txt', # test 110 sections - 6.7K #dev_file = 'dev split 2.txt' #max_sentence_length=500 ) # tag to predict tag_type = 'ner' # make tag dictionary from the corpus tag_dictionary = corpus.make_label_dictionary(label_type=tag_type) embeddings = TransformerWordEmbeddings(model= model, layers="-1", subtoken_pooling="first", # pooling='mean', fine_tune=True, use_context=True, from_tf=True, allow_long_sentences=True # model_max_length=512, ) print('model read successfully !') try: tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary= tag_dictionary, tag_type='ner', use_crf=False, use_rnn=False, reproject_embeddings=False ) except Exception as e: print(str(e.args[0])) return print('tagger initialized successfully !') try: trainer = ModelTrainer(tagger, corpus) except Exception as e: print(str(e.args[0])) return # begin training data try: result = trainer.fine_tune(f"./taggers/{model_title}", learning_rate= LEARNING_RATE, mini_batch_size= MINI_BATCH_SIZE, max_epochs= MAX_EPOCHS ) except Exception as e: print(str(e.args[0])) return print(f'training finished successfully!') # plot training log to evaluate process try: from train_log_plotter import plot_diagram result = plot_diagram(model_title) print(result[1]) except: print('log diagram failed due to error!') print('fine-tune operation finished') operation_time = datetime.datetime.now() print(f'operation_time: {operation_time}') # ################################################### # تست مدل بر یک مقدار مشخص شده print(' Try to test trained model! ') try: from inference import inference_main inference_main(f"./taggers/{model_title}",'') except: print(' Testing model Error! ') print(' Testing model finished! ') # ################################################### # ارزیابی مدل آموزش دیده try: from evaluate_model import do_evaluate print(' Try to evaluating the trained model! ') evaluate_result = do_evaluate(f"./taggers/{model_title}/final-model.pt") print(' Evaluating finished! ') except Exception as e: print('do_evaluate function failed') evaluate_result = f"do_evaluate function failed!\nerror massage:\n{str(e.args[0])}" train_result = f'''************************************************\n ##### TRAIN RESULT ##### F1 Score: {result} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n''' hyperparameters = f"""LEARNING_RATE: {LEARNING_RATE} - MINI_BATCH_SIZE: {MINI_BATCH_SIZE} - MAX_EPOCHS: {MAX_EPOCHS}""" final_result = f"""Model Name: {model} Fine-Tune Parameters: {hyperparameters} {train_result} {evaluate_result}\n Fine_Tune time: {operation_time} ------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------\n """ save_to_file_by_address(f'./taggers/{model_title}/test-result.txt', final_result) return True models = ["HooshvareLab/bert-fa-base-uncased-ner-peyma"] if __name__ == "__main__": # model = 'HooshvareLab/bert-fa-base-uncased-ner-peyma' # main_train(model) # iterate models to train for model in models: if model == '': continue print(f" ... try to TRAIN ** {model} ** Model ... ") try: result = main_train(model) if result: print(f'TRAIN **{model}** Finished successfully') except: print(f" !!! TRAIN **{model}** Model ERROR !!! ") print('All Models Training Process Finished!')