51 lines
1.5 KiB
Python
51 lines
1.5 KiB
Python
"""
|
||
سورس اجرای پردازش های مختلف روی اجزای قانونی
|
||
شامل: کلاسیفیکیشن، تشخیص موجودیت های نامدار، استخراج بردار کلمات، استخراج کلیدواژه ها و سادهسازی(بازنمایی) متن
|
||
"""
|
||
from p1_classifier import do_classify
|
||
from p2_ner_recognizer import do_ner_recognize
|
||
# from p3_words_embedder import do_word_embedder
|
||
# from p4_keyword_extractor import do_keyword_extract
|
||
# from p5_simplifier import do_simplify
|
||
|
||
from elastic_helper import ElasticHelper
|
||
|
||
def get_sections():
|
||
sections_path = "/home/gpu/data_11/14040423/mj_qa_section.zip"
|
||
eh_obj = ElasticHelper()
|
||
sections = eh_obj.iterateJsonFile(sections_path, True)
|
||
return sections
|
||
|
||
def main():
|
||
|
||
# get sections to do nlp processes
|
||
sections = get_sections()
|
||
|
||
# dictsections = {}
|
||
# for item in sections:
|
||
# if not item['id'] == 'qs2180272':
|
||
# continue
|
||
# dictsections[item['id']] = item['source']
|
||
# break
|
||
# sections = dictsections
|
||
|
||
# 1. classify
|
||
sections = do_classify(sections)
|
||
|
||
# 2. ner_recognize
|
||
sections = do_ner_recognize(sections)
|
||
|
||
# 3. word embedder
|
||
# sections = do_word_embedder(sections)
|
||
|
||
# 4. keyword extract
|
||
# result_kw = do_keyword_extract(sections)
|
||
|
||
# 5. simpify
|
||
# result_simp = do_simplify(sections)
|
||
|
||
print('all nlp processes finished successfully!')
|
||
|
||
|
||
main()
|