43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
"""
|
||
سورس اجرای پردازش های مختلف روی اجزای قانونی
|
||
شامل: کلاسیفیکیشن، تشخیص موجودیت های نامدار، استخراج بردار کلمات، استخراج کلیدواژه ها و سادهسازی متن
|
||
"""
|
||
from p1_classifier import do_classify
|
||
from p2_ner_recognizer import do_ner_recognize
|
||
from p3_words_embedder import do_word_embedder
|
||
from p4_keyword_extractor import do_keyword_extract
|
||
from p5_simplifier import do_simplify
|
||
|
||
from elastic_helper import ElasticHelper
|
||
|
||
def get_sections():
|
||
sections_path = "/home/gpu/data_11/14040423/mj_qa_section.zip"
|
||
eh_obj = ElasticHelper()
|
||
sections = eh_obj.iterateJsonFile(sections_path, True)
|
||
return sections
|
||
|
||
def main():
|
||
|
||
# get sections to do nlp processes
|
||
sections = get_sections()
|
||
|
||
# 1. classify
|
||
sections = do_classify(sections)
|
||
|
||
# 2. ner_recognize
|
||
sections = do_ner_recognize(sections)
|
||
|
||
# 3. word embedder
|
||
sections = do_word_embedder(sections)
|
||
|
||
# 4. keyword extract
|
||
sections = do_keyword_extract(sections)
|
||
|
||
# 5. simpify
|
||
sections = do_simplify(sections)
|
||
|
||
print('all nlp processes finished successfully!')
|
||
|
||
|
||
|