from decimal import Decimal import requests import json import os """ !!! place of token:: Application > Local Storage > id_token !!! """ # توکن جدید را از آدرس زیر دریافت و جایگزین می کنیم: # api.tavasi.ir token and url TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE3MjQ5Mzk3NTksImp0aSI6IktwbjlBdEV1ZGh2WngxZUFXOFRRRUFrcmpcL01xa2JmWiIsImlzcyI6bnVsbCwiZXhwIjoxNzI2MjM5NzU4LCJhdWQiOm51bGwsImRhdGEiOnsiaWQiOjQwLCJmaXJzdF9uYW1lIjoiXHUwNjM5XHUwNjQ1XHUwNjI3XHUwNjMxIiwibGFzdF9uYW1lIjoiXHUwNjJjXHUwNjQ4XHUwNmE5XHUwNjI3XHUwNjMxIiwiZW1haWwiOiJham9rYXI5MUB5YWhvby5jb20iLCJ1c2VybmFtZSI6ImFqb2thciIsInVzZXJfbGV2ZWwiOjF9fQ.r_A8IMQdN50ZM8oPmIq31Pz-phPDJyfQYLmbsLjv4Ao' url = "https://api.tavasi.ir/repo/dataset/multi/add/qasection_test/ner" # Mortaza Server token and url # توکن جدید را از آدرس زیر دریافت و جایگزین می کنیم: # 192.168.23.160 token #TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE3MjUxMDk1NDAsImp0aSI6IjdWWUhCTFhiMVJFb3VpY2dORzE1dHRkcnBObU1cL0JobCIsImlzcyI6bnVsbCwiZXhwIjoxNzI2NDA5NTM5LCJhdWQiOm51bGwsImRhdGEiOnsiaWQiOjEsImZpcnN0X25hbWUiOiJcdTA2MjhcdTA2MzFcdTA2NDZcdTA2MjdcdTA2NDVcdTA2NDcgXHUwNjQ2XHUwNjQ4XHUwNmNjXHUwNjMzIiwibGFzdF9uYW1lIjoiXHUwNjQxXHUwNjQ2XHUwNmNjIiwiZW1haWwiOiJkZXZAZ21haWwuY29tIiwidXNlcm5hbWUiOiJkZXYiLCJ1c2VyX2xldmVsIjoyfX0.WayvUXRnBukSyc6o_Qv4dClTv9Hn75uOTcCfzGQ9El8' #url = "http://192.168.23.160:8080/repo/dataset/multi/add/qasection_test/ner" ACCEPT = "application/json" HEADERS = {"Authorization": TOKEN, "Accept": ACCEPT} headers = HEADERS address = './impoert_data/data/sample_dataset.txt' # address = os.getcwd() # if 'impoert_data' in address: # address += '/data/sample_dataset.txt' ### # else: # address += '/impoert_data/data/sample_dataset.txt' # باز کردن فایل متنی with open(address, 'r', encoding='utf-8') as file: input_text = file.read() # تبدیل متن به لیستی از خطوط lines = input_text.strip().split('\n') key = '' begin = -1 end = -1 tokenNumber = -1 content = '' result_token = [] class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, Decimal): return float(obj) return json.JSONEncoder.default(self, obj) def createIndex(content, result_token, section_order): result_objects = [{ "task":"ner", "key":"qavanin_ner", "label":"خروجی تیم همتا", "values":result_token } ] output ={ "content": content, "domain": "پیکره قوانین آزمایشی", "ref_id": "", "ref_url": "", "result_objects": result_objects, "order": str(section_order), } # print(output) # print(json.dumps(output, indent=4, ensure_ascii=False)) return output def appendResultToken(text, key, begin, tokenNumber, result_token): end = -1 # if key == 'HALFREFERENCE' : # key = 'H_REF' # elif key == 'REFERENCE' : # key = 'REF' if key: # if key == 'org' : # key = 'ORG' # elif key == 'loc' : # key = 'LOC' # elif key == 'Facility' : # key = 'fac' # elif key == 'event' : # key = 'EVENT' # elif key == 'pro' : # key = 'PRO' # elif key == 'pers' : # key = 'PER' end = tokenNumber -1 result_token.append({ "begin": begin, "end": end, "result_key": key, "text" : text }) begin = -1 end = -1 key = '' return key, begin, end, result_token bulk_data = [] bulk_count = 1 count = 0 section_order = 0 text = '' for i, line in enumerate(lines): print('line: ' + str(i)) count += 1 tokenNumber = tokenNumber + 1 if line.strip() == '' : key, begin, end, result_token = appendResultToken(text, key, begin, tokenNumber, result_token) section_order += 1 data = createIndex(content, result_token, section_order) tokenNumber = -1 content = '' result_token = [] bulk_data.append(data) bulk_count +=1 if bulk_data.__len__() > 100: print('=' * 30 ) print('count ' + str(count)) payload = json.dumps(bulk_data, cls=JSONEncoder) #Works! response = requests.request("POST", url, headers=headers, data=payload) print(response) bulk_data = [] bulk_count = 1 continue parts = line.split() if len(parts) != 2: continue content += ' ' + parts[0] result_key = parts[1] if result_key.startswith('I-'): text += ' ' + parts[0] continue if result_key == 'O' : key, begin, end, result_token = appendResultToken(text, key, begin, tokenNumber, result_token) if result_key.startswith('B-'): key, begin, end, result_token = appendResultToken(text, key, begin, tokenNumber, result_token) text = parts[0] begin = tokenNumber end = -1 key = result_key.replace('B-', '') if content != '' : key, begin, end, result_token = appendResultToken(text, key, begin, tokenNumber, result_token) section_order += 1 data = createIndex(content, result_token, section_order) bulk_data.append(data) bulk_count +=1 if bulk_data.__len__() > 0: print(bulk_count) payload = json.dumps(bulk_data, cls=JSONEncoder) #Works! response = requests.request("POST", url, headers=headers, data=payload) print(response) # نمایش دیکشنری خروجی به صورت JSON print("***************** end *****************")