Compare commits

...

2 Commits

6 changed files with 234418 additions and 5039 deletions

View File

@ -14,9 +14,9 @@ headers = HEADERS
address = os.getcwd()
if "import_data" in address:
address += "/data/clean_sections_kw_15k.json"
address += "/data/clean_sections_kw_11k.json"
else:
address += "/import_data/data/clean_sections_kw_15k.json"
address += "/import_data/data/clean_sections_kw_11k.json"
# open .json file
lines = read_from_json(address)
@ -58,7 +58,7 @@ def createIndex(id, content, result_objects):
output = {
"id" : id,
"content": content,
"domain": "استخراج کلیدواژه 15 هزارتایی",
"domain": "استخراج کلیدواژه 11 هزارتایی",
"ref_id": "",
"ref_url": "",
"result_objects": result_objects,
@ -128,7 +128,12 @@ for i, line in enumerate(lines):
result_objects = []
llam_prompt_kws = line["keywords"]
# values = extract_keywords(llam_prompt_kw)
values = llam_prompt_kws
values = []
for item in llam_prompt_kws:
values.append({
"text": item,
})
result_objects.append(
{
"task": "keyword",
@ -154,7 +159,7 @@ for i, line in enumerate(lines):
bulk_data.append(data)
bulk_count += 1
if bulk_data.__len__() > 500:
if bulk_data.__len__() > 1000:
print("=" * 30)
print("count " + str(count))
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

Binary file not shown.