insert some comments in code
This commit is contained in:
parent
7a0ab2b445
commit
c21d1bd22a
|
@ -1,2 +1,3 @@
|
|||
pip install numpy = 1.24.3
|
||||
pip show hazm = 0.10.0
|
||||
pip show flair == 0.10
|
||||
|
|
29
train.py
29
train.py
|
@ -42,21 +42,20 @@ def main_train(model):
|
|||
"""
|
||||
آموزش مدل برای تسک NER
|
||||
|
||||
:model نام مدلی که قرار است آموزش داده شود
|
||||
Args:
|
||||
model(str): نام مدلی که قرار است آموزش داده شود
|
||||
"""
|
||||
|
||||
time = datetime.datetime.now()
|
||||
model_title = f"{time.year}-{digit_correct(time.month)}-{digit_correct(time.day)}--{digit_correct(time.hour)}-{digit_correct(time.minute)}-{digit_correct(time.second)}--{model}".replace('/','--')
|
||||
print(f'\nMODEL:: {model}\n')
|
||||
|
||||
#!pip install 'flair==0.10'
|
||||
|
||||
# define columns
|
||||
# define dataset columns
|
||||
columns = {0 : 'text', 1 : 'ner'}
|
||||
# directory where the data resides
|
||||
# directory of data files
|
||||
data_folder = './data/'
|
||||
# initializing the corpuscorpus = ColumnCorpus(data_folder, columns, train_file='peyma_train.txt', sequence_length=512)
|
||||
#اسم دیتاست اینجا تنظیم شود
|
||||
|
||||
#آدرس فایل دیتاست اینجا تنظیم شود
|
||||
corpus = ColumnCorpus(data_folder, columns,
|
||||
#train_file = 'peyma_train.txt')
|
||||
train_file = 'DATASET140402_no_aref2.txt', # qavanin 36K tokens
|
||||
|
@ -70,8 +69,6 @@ def main_train(model):
|
|||
# make tag dictionary from the corpus
|
||||
tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)
|
||||
|
||||
#xlm-roberta-large
|
||||
# embeddings = TransformerWordEmbeddings(model='HooshvareLab/distilbert-fa-zwnj-base-ner',
|
||||
embeddings = TransformerWordEmbeddings(model= model,
|
||||
layers="-1",
|
||||
subtoken_pooling="first",
|
||||
|
@ -98,22 +95,16 @@ def main_train(model):
|
|||
print(str(e.args[0]))
|
||||
return
|
||||
|
||||
print('tagger initialized successfully !')
|
||||
|
||||
from flair.trainers import ModelTrainer
|
||||
try:
|
||||
|
||||
trainer = ModelTrainer(tagger, corpus)
|
||||
#resources/taggers/sota-ner-flert
|
||||
# trainer.fine_tune('./taggers',
|
||||
# learning_rate=2.0e-6,
|
||||
# mini_batch_size=16,
|
||||
# # mini_batch_chunk_size=1, # remove this parameter to speed up computation if you have a big GPU
|
||||
# max_epochs=20
|
||||
# )
|
||||
|
||||
except Exception as e:
|
||||
print(str(e.args[0]))
|
||||
return
|
||||
|
||||
# begin training data
|
||||
try:
|
||||
result = trainer.fine_tune(f"./taggers/{model_title}",
|
||||
learning_rate= learning_rate,
|
||||
|
@ -123,7 +114,9 @@ def main_train(model):
|
|||
except Exception as e:
|
||||
print(str(e.args[0]))
|
||||
return
|
||||
print(f'training finished successfully!')
|
||||
|
||||
# plot training log to evaluate process
|
||||
try:
|
||||
from train_log_plotter import plot_diagram
|
||||
plot_diagram(model_title)
|
||||
|
|
Loading…
Reference in New Issue
Block a user