import requests import re # url = "http://192.168.23.114:3110/hazm/chunk_tree" # url = "http://192.168.23.114:3110/hazm/chunk_org" # url = "http://192.168.23.114:3110/hazm/dependency_parse" # url = "http://192.168.23.114:3110/laws/effective-laws" # url = "http://192.168.23.114:3110/hazm/pos_tag_data_maker" def pos_tagger(input_string): url = "http://192.168.23.114:3110/hazm/pos_tag_tag" data = { "text": input_string } response = requests.post(url, json=data) if response.status_code == 200: pos_obj = find_pos_values_in_text(input_string, response.json()) return response.status_code, pos_obj else: return response.status_code, {} def find_pos_values_in_text(text, pos_values): pos_values = pos_values["pos_tags"] text_temp = text text_tokens = text.split() pos_obj = [] difference = 0 for row_item in pos_values: pos_value = row_item[0] pos_type = row_item[1] # پیدا کردن آیتم بالا در متن matched_pos = [(m.start(), m.end()) for m in re.finditer(re.escape(pos_value), text_temp)] if matched_pos: matched_pos_start = matched_pos[0][0] # matched_pos_end = matched_pos[0][1] # before_ner_text = '' if matched_pos_start > 1: before_pos_text = text_temp[0:matched_pos_start-1] difference = len(before_pos_text.split()) pos_start_token = difference pos_end_token = len(pos_value.split()) + difference if pos_end_token > len(text_tokens): pos_start_token -= 1 pos_end_token -= 1 # pos_tokens = [text_tokens[t] for t in range (pos_start_token,pos_end_token)] if matched_pos_start == 0: difference = len(pos_value.split()) pos_obj.append({ 'pos_value' : pos_value.strip(), 'pos_start_token': pos_start_token, 'pos_end_token' : pos_end_token, 'pos_key' : pos_type.strip() }) return pos_obj if __name__ == "__main__": my_text = "تقاضای رأی به عدم کفایت سیاسی رییس جمهور در مجلس قابل طرح است." pos_tagger(my_text)