Flair_NER/services.py

71 lines
2.4 KiB
Python

import requests
import re
# url = "http://192.168.23.114:3110/hazm/chunk_tree"
# url = "http://192.168.23.114:3110/hazm/chunk_org"
# url = "http://192.168.23.114:3110/hazm/dependency_parse"
# url = "http://192.168.23.114:3110/laws/effective-laws"
# url = "http://192.168.23.114:3110/hazm/pos_tag_data_maker"
def pos_tagger(input_string):
url = "http://192.168.23.114:3110/hazm/pos_tag_tag"
data = {
"text": input_string
}
response = requests.post(url, json=data)
if response.status_code == 200:
pos_obj = find_pos_values_in_text(input_string, response.json())
return response.status_code, pos_obj
else:
return response.status_code, {}
def find_pos_values_in_text(text, pos_values):
pos_values = pos_values["pos_tags"]
text_temp = text
text_tokens = text.split()
pos_obj = []
difference = 0
for row_item in pos_values:
pos_value = row_item[0]
pos_type = row_item[1]
# پیدا کردن آیتم بالا در متن
matched_pos = [(m.start(), m.end()) for m in re.finditer(re.escape(pos_value), text_temp)]
if matched_pos:
matched_pos_start = matched_pos[0][0]
# matched_pos_end = matched_pos[0][1]
# before_ner_text = ''
if matched_pos_start > 1:
before_pos_text = text_temp[0:matched_pos_start-1]
difference = len(before_pos_text.split())
pos_start_token = difference
pos_end_token = len(pos_value.split()) + difference
if pos_end_token > len(text_tokens):
pos_start_token -= 1
pos_end_token -= 1
# pos_tokens = [text_tokens[t] for t in range (pos_start_token,pos_end_token)]
if matched_pos_start == 0:
difference = len(pos_value.split())
pos_obj.append({
'pos_value' : pos_value.strip(),
'pos_start_token': pos_start_token,
'pos_end_token' : pos_end_token,
'pos_key' : pos_type.strip()
})
return pos_obj
if __name__ == "__main__":
my_text = "تقاضای رأی به عدم کفایت سیاسی رییس جمهور در مجلس قابل طرح است."
pos_tagger(my_text)