ارسال کلیدواژه های 11 هزارتایی به aitools
This commit is contained in:
parent
b38c9d1444
commit
d1aee6adeb
|
@ -14,9 +14,9 @@ headers = HEADERS
|
||||||
|
|
||||||
address = os.getcwd()
|
address = os.getcwd()
|
||||||
if "import_data" in address:
|
if "import_data" in address:
|
||||||
address += "/data/clean_sections_kw_15k.json"
|
address += "/data/clean_sections_kw_11k.json"
|
||||||
else:
|
else:
|
||||||
address += "/import_data/data/clean_sections_kw_15k.json"
|
address += "/import_data/data/clean_sections_kw_11k.json"
|
||||||
|
|
||||||
# open .json file
|
# open .json file
|
||||||
lines = read_from_json(address)
|
lines = read_from_json(address)
|
||||||
|
@ -58,7 +58,7 @@ def createIndex(id, content, result_objects):
|
||||||
output = {
|
output = {
|
||||||
"id" : id,
|
"id" : id,
|
||||||
"content": content,
|
"content": content,
|
||||||
"domain": "استخراج کلیدواژه 15 هزارتایی",
|
"domain": "استخراج کلیدواژه 11 هزارتایی",
|
||||||
"ref_id": "",
|
"ref_id": "",
|
||||||
"ref_url": "",
|
"ref_url": "",
|
||||||
"result_objects": result_objects,
|
"result_objects": result_objects,
|
||||||
|
@ -128,7 +128,12 @@ for i, line in enumerate(lines):
|
||||||
result_objects = []
|
result_objects = []
|
||||||
llam_prompt_kws = line["keywords"]
|
llam_prompt_kws = line["keywords"]
|
||||||
# values = extract_keywords(llam_prompt_kw)
|
# values = extract_keywords(llam_prompt_kw)
|
||||||
values = llam_prompt_kws
|
|
||||||
|
values = []
|
||||||
|
for item in llam_prompt_kws:
|
||||||
|
values.append({
|
||||||
|
"text": item,
|
||||||
|
})
|
||||||
result_objects.append(
|
result_objects.append(
|
||||||
{
|
{
|
||||||
"task": "keyword",
|
"task": "keyword",
|
||||||
|
@ -154,7 +159,7 @@ for i, line in enumerate(lines):
|
||||||
bulk_data.append(data)
|
bulk_data.append(data)
|
||||||
|
|
||||||
bulk_count += 1
|
bulk_count += 1
|
||||||
if bulk_data.__len__() > 500:
|
if bulk_data.__len__() > 1000:
|
||||||
print("=" * 30)
|
print("=" * 30)
|
||||||
print("count " + str(count))
|
print("count " + str(count))
|
||||||
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!
|
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!
|
||||||
|
|
137098
import_data/data/clean_sections_kw_11k.json
Normal file
137098
import_data/data/clean_sections_kw_11k.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user