import json import requests from decimal import Decimal TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE3MTg3ODk5OTEsImp0aSI6IlNGaWVOcWIxeEFzZ252QmtvUkxXWU9UbXR2VTNvT3R6IiwiaXNzIjoiaHR0cHM6XC9cL2NwLnRhdmFzaS5pciIsImV4cCI6MTcyMDA4OTk5MCwiYXVkIjoiaHR0cHM6XC9cL2NwLnRhdmFzaS5pciIsImRhdGEiOnsiaWQiOjEsImZpcnN0X25hbWUiOiJcdTA2MjhcdTA2MzFcdTA2NDZcdTA2MjdcdTA2NDVcdTA2NDcgXHUwNjQ2XHUwNjQ4XHUwNmNjXHUwNjMzIiwibGFzdF9uYW1lIjoiXHUwNjQxXHUwNjQ2XHUwNmNjIiwiZW1haWwiOiJkZXZAZ21haWwuY29tIiwidXNlcm5hbWUiOiJkZXYiLCJ1c2VyX2xldmVsIjoyfX0.7DzFqHLee3ZI7EnZYjy5ChtVWhT3QJvBNUbLUdPssSo' ACCEPT = "application/json" HEADERS = {"Authorization": TOKEN, "Accept": ACCEPT} url = "https://api.tavasi.ir/repo/dataset/multi/add/qasection/keyword" headers = HEADERS # # باز کردن فایل متنی # file_path = 'G:/_majles/ner_law_dataset.txt' # with open(file_path, 'r', encoding='utf-8') as file: # input_text = file.read() # # تبدیل متن به لیستی از خطوط # lines = input_text.strip().split('\n') file_address = './new_law_excel.xlsx' # output_file_address = './output/keywords_law_excel.xlsx' column_name = "content" contents = read_from_excel(file_address, column_name) contents_list = [] for index, section in enumerate(contents): #ner_values = inference_main(model, section) contents_list.append(section) # contents_list = contents_list + section + '\n********************\n' new_column_name = 'content_keywords' key = '' begin = -1 end = -1 tokenNumber = -1 content = '' result_token = [] class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, Decimal): return float(obj) return json.JSONEncoder.default(self, obj) # content : main text/content # results : keywords list def createIndex(content, extracted_keywords): result_objects = [{ "task":"keyword", "key":"lama3-8b", "label":"لاما3 فارسی شده", "values":extracted_keywords } ] output ={ "content": content, "domain": "مقررات", "ref_id": "", "ref_url": "", "result_objects": result_objects, } # print(output) # print(json.dumps(output, indent=4, ensure_ascii=False)) return output bulk_data = [] bulk_count = 1 count = 0 for mentry in contents_list: count += 1 tokenNumber = tokenNumber + 1 extracted_keywords = [] data=createIndex(mentry, extracted_keywords) bulk_data.append(data) bulk_count +=1 if bulk_data.__len__() > 10: print('=' * 10 ) print('count' + str(count)) payload = json.dumps(bulk_data, cls=JSONEncoder) #Works! response = requests.request("POST", url, headers=headers, data=payload) print(response.text) bulk_data = [] bulk_count = 1 if bulk_data.__len__() > 0: print(bulk_count) payload = json.dumps(bulk_data, cls=JSONEncoder) #Works! response = requests.request("POST", url, headers=headers, data=payload) print(response.text) # نمایش دیکشنری خروجی به صورت JSON print("***************** end ")