Compare commits
9 Commits
3141a929a0
...
a0df99bc0e
Author | SHA1 | Date | |
---|---|---|---|
a0df99bc0e | |||
3e4e55c93f | |||
9c24a9746a | |||
1bbc3dd13a | |||
8e87550213 | |||
9734482ca3 | |||
93142f70e9 | |||
d6f3f95c37 | |||
7b9537dd55 |
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,3 +3,6 @@ final-model.pt
|
||||||
pytorch_model.bin
|
pytorch_model.bin
|
||||||
sentencepiece.bpe.model
|
sentencepiece.bpe.model
|
||||||
tokenizer.json
|
tokenizer.json
|
||||||
|
/__pycache__
|
||||||
|
.gitignore
|
||||||
|
log
|
||||||
|
|
35375
data/DATASET140402_no_aref2.txt
Normal file
35375
data/DATASET140402_no_aref2.txt
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -3516,3 +3516,125 @@ Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
Span[224:225]: "مشهد"/LOC2 /%/ 1.0
|
Span[224:225]: "مشهد"/LOC2 /%/ 1.0
|
||||||
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
|
||||||
|
|
||||||
|
|
||||||
|
**************************************************
|
||||||
|
############################################################
|
||||||
|
Model Name: HooshvareLab--bert-base-parsbert-armanner-uncased--2025-7-20--23-45-26
|
||||||
|
Found Entity Count: 18
|
||||||
|
inference time: 2025-07-21 17:03:32.727174
|
||||||
|
############################################################
|
||||||
|
Span[3:6]: "کلیه دستگاههای اجرائی"/ORG2 /%/ 1.0
|
||||||
|
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
||||||
|
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
||||||
|
Span[47:49]: "نیروهای مسلح"/ORG /%/ 1.0
|
||||||
|
Span[49:50]: "جمهوری"/LOC /%/ 0.92
|
||||||
|
Span[50:52]: "اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[137:147]: "نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 0.98
|
||||||
|
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
||||||
|
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
||||||
|
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[183:187]: "سازمان جوانان هلال احمر"/ORG /%/ 1.0
|
||||||
|
Span[199:202]: "قانون هوای پاک"/REF /%/ 1.0
|
||||||
|
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
||||||
|
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[220:222]: "محمد سرمدی"/PER /%/ 1.0
|
||||||
|
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
||||||
|
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
||||||
|
**************************************************
|
||||||
|
|
||||||
|
**************************************************
|
||||||
|
############################################################
|
||||||
|
Model Name: HooshvareLab--bert-base-parsbert-armanner-uncased--2025-7-20--23-45-26
|
||||||
|
Found Entity Count: 18
|
||||||
|
inference time: 2025-07-21 17:37:56.924980
|
||||||
|
############################################################
|
||||||
|
Span[3:6]: "کلیه دستگاههای اجرائی"/ORG2 /%/ 1.0
|
||||||
|
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
||||||
|
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
||||||
|
Span[47:49]: "نیروهای مسلح"/ORG /%/ 1.0
|
||||||
|
Span[49:50]: "جمهوری"/LOC /%/ 0.92
|
||||||
|
Span[50:52]: "اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[137:147]: "نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 0.98
|
||||||
|
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
||||||
|
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
||||||
|
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[183:187]: "سازمان جوانان هلال احمر"/ORG /%/ 1.0
|
||||||
|
Span[199:202]: "قانون هوای پاک"/REF /%/ 1.0
|
||||||
|
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
||||||
|
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[220:222]: "محمد سرمدی"/PER /%/ 1.0
|
||||||
|
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
||||||
|
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
||||||
|
**************************************************
|
||||||
|
|
||||||
|
**************************************************
|
||||||
|
############################################################
|
||||||
|
Model Name: HooshvareLab--bert-base-parsbert-peymaner-uncased--2025-7-21--0-40-1
|
||||||
|
Found Entity Count: 15
|
||||||
|
inference time: 2025-07-21 17:39:02.757138
|
||||||
|
############################################################
|
||||||
|
Span[3:6]: "کلیه دستگاههای اجرائی"/ORG2 /%/ 1.0
|
||||||
|
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
||||||
|
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
||||||
|
Span[47:52]: "نیروهای مسلح جمهوری اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[136:138]: "روز نوزدهم"/EVENT /%/ 0.89
|
||||||
|
Span[144:145]: "نود"/EVENT /%/ 0.92
|
||||||
|
Span[146:147]: "سه"/EVENT /%/ 0.78
|
||||||
|
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
||||||
|
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
||||||
|
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
||||||
|
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
||||||
|
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
||||||
|
**************************************************
|
||||||
|
|
||||||
|
**************************************************
|
||||||
|
############################################################
|
||||||
|
Model Name: HooshvareLab--bert-fa-base-uncased-ner-arman--2025-7-21--0-12-43
|
||||||
|
Found Entity Count: 17
|
||||||
|
inference time: 2025-07-21 17:40:54.675849
|
||||||
|
############################################################
|
||||||
|
Span[5:6]: "اجرائی"/ORG2 /%/ 1.0
|
||||||
|
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
||||||
|
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
||||||
|
Span[47:49]: "نیروهای مسلح"/ORG /%/ 1.0
|
||||||
|
Span[49:52]: "جمهوری اسلامی ایران"/LOC /%/ 0.98
|
||||||
|
Span[61:64]: "جمهوری اسلامی ایران"/LOC /%/ 1.0
|
||||||
|
Span[136:147]: "روز نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 0.94
|
||||||
|
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
||||||
|
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
||||||
|
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[183:187]: "سازمان جوانان هلال احمر"/ORG /%/ 1.0
|
||||||
|
Span[199:200]: "قانون"/REF /%/ 1.0
|
||||||
|
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
||||||
|
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[220:222]: "محمد سرمدی"/PER /%/ 1.0
|
||||||
|
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
||||||
|
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
||||||
|
**************************************************
|
||||||
|
|
||||||
|
**************************************************
|
||||||
|
############################################################
|
||||||
|
Model Name: HooshvareLab--bert-fa-base-uncased-ner-peyma--2025-7-20--23-18-3
|
||||||
|
Found Entity Count: 14
|
||||||
|
inference time: 2025-07-21 17:42:01.419288
|
||||||
|
############################################################
|
||||||
|
Span[4:6]: "دستگاههای اجرائی"/ORG2 /%/ 1.0
|
||||||
|
Span[11:15]: "قانون مدیریت خدمات کشوری"/REF /%/ 1.0
|
||||||
|
Span[31:35]: "قانون محاسبات عمومی کشور"/REF /%/ 1.0
|
||||||
|
Span[47:52]: "نیروهای مسلح جمهوری اسلامی ایران"/ORG /%/ 1.0
|
||||||
|
Span[59:64]: "نیروی انتظامی جمهوری اسلامی ایران"/ORG /%/ 0.95
|
||||||
|
Span[120:124]: "قانون خانواده و جوانی"/REF /%/ 0.98
|
||||||
|
Span[136:147]: "روز نوزدهم دی ماه سال یکهزار وسیصد و نود و سه"/EVENT /%/ 1.0
|
||||||
|
Span[166:168]: "مجلس ملی"/ORG /%/ 1.0
|
||||||
|
Span[175:177]: "شورای نگهبان"/ORG /%/ 1.0
|
||||||
|
Span[178:181]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[204:207]: "مجلس شورای اسلامی"/ORG /%/ 1.0
|
||||||
|
Span[208:211]: "سازمان محیط زیست"/ORG /%/ 1.0
|
||||||
|
Span[224:225]: "مشهد"/LOC /%/ 1.0
|
||||||
|
Span[226:227]: "سمنان"/LOC /%/ 1.0
|
||||||
|
**************************************************
|
||||||
|
|
|
@ -22,5 +22,7 @@ Detailed Results: {result.detailed_results}
|
||||||
return result_text
|
return result_text
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
result = do_evaluate()
|
model_folder = "2025-07-21--17-51-49--HooshvareLab--bert-fa-base-uncased-ner-peyma"
|
||||||
print(do_evaluate())
|
model_path = f"./taggers/{model_folder}/final-model.pt"
|
||||||
|
result = do_evaluate(model_path)
|
||||||
|
print(result)
|
||||||
|
|
10
inference.py
10
inference.py
|
@ -237,7 +237,15 @@ def inference_main(trained_model,input_sentence):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("do inf ... ")
|
print("do inf ... ")
|
||||||
result = inference_main('',read_file())
|
result = inference_main('HooshvareLab--bert-fa-base-uncased-ner-peyma--2025-7-20--23-18-3',read_file())
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
|
"""
|
||||||
|
HooshvareLab--bert-base-parsbert-armanner-uncased--2025-7-20--23-45-26
|
||||||
|
HooshvareLab--bert-base-parsbert-ner-uncased--2025-7-20--22-51-20
|
||||||
|
HooshvareLab--bert-base-parsbert-peymaner-uncased--2025-7-21--0-40-1
|
||||||
|
HooshvareLab--bert-fa-base-uncased-ner-arman--2025-7-21--0-12-43
|
||||||
|
HooshvareLab--bert-fa-base-uncased-ner-peyma--2025-7-20--23-18-3
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
40
train.py
40
train.py
|
@ -33,6 +33,11 @@ from flair.embeddings import TransformerDocumentEmbeddings
|
||||||
#model = "HooshvareLab/bert-base-parsbert-peymaner-uncased" # جالب نبود!
|
#model = "HooshvareLab/bert-base-parsbert-peymaner-uncased" # جالب نبود!
|
||||||
#model = "HooshvareLab/bert-base-parsbert-armanner-uncased" # جالب نبود!
|
#model = "HooshvareLab/bert-base-parsbert-armanner-uncased" # جالب نبود!
|
||||||
|
|
||||||
|
def digit_correct(input_num):
|
||||||
|
if input_num <10:
|
||||||
|
return f'0{input_num}'
|
||||||
|
return str(input_num)
|
||||||
|
|
||||||
def main_train(model):
|
def main_train(model):
|
||||||
"""
|
"""
|
||||||
آموزش مدل برای تسک NER
|
آموزش مدل برای تسک NER
|
||||||
|
@ -41,7 +46,7 @@ def main_train(model):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
time = datetime.datetime.now()
|
time = datetime.datetime.now()
|
||||||
model_title = f"{model}--{time.year}-{time.month}-{time.day}--{time.hour}-{time.minute}-{time.second}".replace('/','--')
|
model_title = f"{time.year}-{digit_correct(time.month)}-{digit_correct(time.day)}--{digit_correct(time.hour)}-{digit_correct(time.minute)}-{digit_correct(time.second)}--{model}".replace('/','--')
|
||||||
print(f'\nMODEL:: {model}\n')
|
print(f'\nMODEL:: {model}\n')
|
||||||
|
|
||||||
#!pip install 'flair==0.10'
|
#!pip install 'flair==0.10'
|
||||||
|
@ -54,11 +59,11 @@ def main_train(model):
|
||||||
#اسم دیتاست اینجا تنظیم شود
|
#اسم دیتاست اینجا تنظیم شود
|
||||||
corpus = ColumnCorpus(data_folder, columns,
|
corpus = ColumnCorpus(data_folder, columns,
|
||||||
#train_file = 'peyma_train.txt')
|
#train_file = 'peyma_train.txt')
|
||||||
train_file = 'DATASET140402_no_aref.txt', # qavanin 36K tokens
|
train_file = 'DATASET140402_no_aref2.txt', # qavanin 36K tokens
|
||||||
test_file = 'test_ds_new.txt',) # test 110 sections - 6.7K
|
# test_file = 'test_ds_new.txt', # test 110 sections - 6.7K
|
||||||
#dev_file = 'dev split 2.txt'
|
#dev_file = 'dev split 2.txt'
|
||||||
#max_sentence_length=500
|
#max_sentence_length=500
|
||||||
#)
|
)
|
||||||
|
|
||||||
# tag to predict
|
# tag to predict
|
||||||
tag_type = 'ner'
|
tag_type = 'ner'
|
||||||
|
@ -159,16 +164,18 @@ def main_train(model):
|
||||||
F1 Score: {result}
|
F1 Score: {result}
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'''
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'''
|
||||||
|
|
||||||
# final_result = f"""Model Name: {model}
|
hyperparameters = f"""learning_rate: {learning_rate} - mini_batch_size: {mini_batch_size} - max_epochs: {max_epochs}"""
|
||||||
# Fine-Tune Parameters: {hyperparameters}
|
|
||||||
# {train_result}
|
final_result = f"""Model Name: {model}
|
||||||
# {evaluate_result}\n
|
Fine-Tune Parameters: {hyperparameters}
|
||||||
# Fine_Tune time: {operation_time}
|
{train_result}
|
||||||
# ------------------------------------------------------------------------------------
|
{evaluate_result}\n
|
||||||
# ------------------------------------------------------------------------------------\n
|
Fine_Tune time: {operation_time}
|
||||||
# """
|
------------------------------------------------------------------------------------
|
||||||
# save_to_file_by_address('./data/train_log.txt', final_result)
|
------------------------------------------------------------------------------------\n
|
||||||
# print(' Saving results finished! ')
|
"""
|
||||||
|
save_to_file_by_address('./data/train_log.txt', final_result)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
models = """
|
models = """
|
||||||
|
@ -177,8 +184,11 @@ HooshvareLab/bert-fa-base-uncased-ner-peyma
|
||||||
HooshvareLab/bert-base-parsbert-armanner-uncased
|
HooshvareLab/bert-base-parsbert-armanner-uncased
|
||||||
HooshvareLab/bert-fa-base-uncased-ner-arman
|
HooshvareLab/bert-fa-base-uncased-ner-arman
|
||||||
HooshvareLab/bert-base-parsbert-peymaner-uncased
|
HooshvareLab/bert-base-parsbert-peymaner-uncased
|
||||||
HooshvareLab/distilbert-fa-zwnj-base-ner
|
|
||||||
"""
|
"""
|
||||||
|
models = """
|
||||||
|
HooshvareLab/bert-fa-base-uncased-ner-peyma
|
||||||
|
"""
|
||||||
|
# HooshvareLab/distilbert-fa-zwnj-base-ner
|
||||||
models_with_error= """
|
models_with_error= """
|
||||||
nicolauduran45/affilgood-ner-multilingual-v2 - error
|
nicolauduran45/affilgood-ner-multilingual-v2 - error
|
||||||
Amirmerfan/bert-base-uncased-persian-ner-50k-base - error
|
Amirmerfan/bert-base-uncased-persian-ner-50k-base - error
|
||||||
|
|
Loading…
Reference in New Issue
Block a user