ارسال کلیدواژه های 11 هزارتایی به aitools

This commit is contained in:
ajokar 2024-09-28 16:47:38 +03:30
parent b38c9d1444
commit d1aee6adeb
3 changed files with 137695 additions and 5039 deletions

View File

@ -14,9 +14,9 @@ headers = HEADERS
address = os.getcwd()
if "import_data" in address:
address += "/data/clean_sections_kw_15k.json"
address += "/data/clean_sections_kw_11k.json"
else:
address += "/import_data/data/clean_sections_kw_15k.json"
address += "/import_data/data/clean_sections_kw_11k.json"
# open .json file
lines = read_from_json(address)
@ -58,7 +58,7 @@ def createIndex(id, content, result_objects):
output = {
"id" : id,
"content": content,
"domain": "استخراج کلیدواژه 15 هزارتایی",
"domain": "استخراج کلیدواژه 11 هزارتایی",
"ref_id": "",
"ref_url": "",
"result_objects": result_objects,
@ -128,7 +128,12 @@ for i, line in enumerate(lines):
result_objects = []
llam_prompt_kws = line["keywords"]
# values = extract_keywords(llam_prompt_kw)
values = llam_prompt_kws
values = []
for item in llam_prompt_kws:
values.append({
"text": item,
})
result_objects.append(
{
"task": "keyword",
@ -154,7 +159,7 @@ for i, line in enumerate(lines):
bulk_data.append(data)
bulk_count += 1
if bulk_data.__len__() > 500:
if bulk_data.__len__() > 1000:
print("=" * 30)
print("count " + str(count))
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long