From c21d1bd22a4ebff0c10d3069539dea595021fbaf Mon Sep 17 00:00:00 2001 From: init_mahdi Date: Sun, 17 Aug 2025 15:23:20 +0330 Subject: [PATCH] insert some comments in code --- requierments.txt | 3 ++- train.py | 37 +++++++++++++++---------------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/requierments.txt b/requierments.txt index 3f09d8d..46bf30a 100644 --- a/requierments.txt +++ b/requierments.txt @@ -1,2 +1,3 @@ pip install numpy = 1.24.3 -pip show hazm = 0.10.0 \ No newline at end of file +pip show hazm = 0.10.0 +pip show flair == 0.10 diff --git a/train.py b/train.py index 91037c2..e8d91b7 100644 --- a/train.py +++ b/train.py @@ -42,21 +42,20 @@ def main_train(model): """ آموزش مدل برای تسک NER - :model نام مدلی که قرار است آموزش داده شود + Args: + model(str): نام مدلی که قرار است آموزش داده شود """ time = datetime.datetime.now() model_title = f"{time.year}-{digit_correct(time.month)}-{digit_correct(time.day)}--{digit_correct(time.hour)}-{digit_correct(time.minute)}-{digit_correct(time.second)}--{model}".replace('/','--') print(f'\nMODEL:: {model}\n') - #!pip install 'flair==0.10' - - # define columns + # define dataset columns columns = {0 : 'text', 1 : 'ner'} - # directory where the data resides + # directory of data files data_folder = './data/' - # initializing the corpuscorpus = ColumnCorpus(data_folder, columns, train_file='peyma_train.txt', sequence_length=512) - #اسم دیتاست اینجا تنظیم شود + + #آدرس فایل دیتاست اینجا تنظیم شود corpus = ColumnCorpus(data_folder, columns, #train_file = 'peyma_train.txt') train_file = 'DATASET140402_no_aref2.txt', # qavanin 36K tokens @@ -70,8 +69,6 @@ def main_train(model): # make tag dictionary from the corpus tag_dictionary = corpus.make_label_dictionary(label_type=tag_type) - #xlm-roberta-large - # embeddings = TransformerWordEmbeddings(model='HooshvareLab/distilbert-fa-zwnj-base-ner', embeddings = TransformerWordEmbeddings(model= model, layers="-1", subtoken_pooling="first", @@ -97,23 +94,17 @@ def main_train(model): except Exception as e: print(str(e.args[0])) return - - - from flair.trainers import ModelTrainer + + print('tagger initialized successfully !') + try: - trainer = ModelTrainer(tagger, corpus) - #resources/taggers/sota-ner-flert - # trainer.fine_tune('./taggers', - # learning_rate=2.0e-6, - # mini_batch_size=16, - # # mini_batch_chunk_size=1, # remove this parameter to speed up computation if you have a big GPU - # max_epochs=20 - # ) + except Exception as e: print(str(e.args[0])) return - + + # begin training data try: result = trainer.fine_tune(f"./taggers/{model_title}", learning_rate= learning_rate, @@ -123,7 +114,9 @@ def main_train(model): except Exception as e: print(str(e.args[0])) return - + print(f'training finished successfully!') + + # plot training log to evaluate process try: from train_log_plotter import plot_diagram plot_diagram(model_title)