Compare commits
2 Commits
b38c9d1444
...
6bdac3f01e
Author | SHA1 | Date | |
---|---|---|---|
6bdac3f01e | |||
d1aee6adeb |
|
@ -14,9 +14,9 @@ headers = HEADERS
|
||||||
|
|
||||||
address = os.getcwd()
|
address = os.getcwd()
|
||||||
if "import_data" in address:
|
if "import_data" in address:
|
||||||
address += "/data/clean_sections_kw_15k.json"
|
address += "/data/clean_sections_kw_11k.json"
|
||||||
else:
|
else:
|
||||||
address += "/import_data/data/clean_sections_kw_15k.json"
|
address += "/import_data/data/clean_sections_kw_11k.json"
|
||||||
|
|
||||||
# open .json file
|
# open .json file
|
||||||
lines = read_from_json(address)
|
lines = read_from_json(address)
|
||||||
|
@ -58,7 +58,7 @@ def createIndex(id, content, result_objects):
|
||||||
output = {
|
output = {
|
||||||
"id" : id,
|
"id" : id,
|
||||||
"content": content,
|
"content": content,
|
||||||
"domain": "استخراج کلیدواژه 15 هزارتایی",
|
"domain": "استخراج کلیدواژه 11 هزارتایی",
|
||||||
"ref_id": "",
|
"ref_id": "",
|
||||||
"ref_url": "",
|
"ref_url": "",
|
||||||
"result_objects": result_objects,
|
"result_objects": result_objects,
|
||||||
|
@ -128,7 +128,12 @@ for i, line in enumerate(lines):
|
||||||
result_objects = []
|
result_objects = []
|
||||||
llam_prompt_kws = line["keywords"]
|
llam_prompt_kws = line["keywords"]
|
||||||
# values = extract_keywords(llam_prompt_kw)
|
# values = extract_keywords(llam_prompt_kw)
|
||||||
values = llam_prompt_kws
|
|
||||||
|
values = []
|
||||||
|
for item in llam_prompt_kws:
|
||||||
|
values.append({
|
||||||
|
"text": item,
|
||||||
|
})
|
||||||
result_objects.append(
|
result_objects.append(
|
||||||
{
|
{
|
||||||
"task": "keyword",
|
"task": "keyword",
|
||||||
|
@ -154,7 +159,7 @@ for i, line in enumerate(lines):
|
||||||
bulk_data.append(data)
|
bulk_data.append(data)
|
||||||
|
|
||||||
bulk_count += 1
|
bulk_count += 1
|
||||||
if bulk_data.__len__() > 500:
|
if bulk_data.__len__() > 1000:
|
||||||
print("=" * 30)
|
print("=" * 30)
|
||||||
print("count " + str(count))
|
print("count " + str(count))
|
||||||
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!
|
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!
|
||||||
|
|
137098
import_data/data/clean_sections_kw_11k.json
Normal file
137098
import_data/data/clean_sections_kw_11k.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
87372
ner_dataset/data/DATASET ORIGINAL_480Kـoutput.json
Normal file
87372
ner_dataset/data/DATASET ORIGINAL_480Kـoutput.json
Normal file
File diff suppressed because one or more lines are too long
9351
ner_dataset/data/DATASET140402_no_arefـoutput.json
Normal file
9351
ner_dataset/data/DATASET140402_no_arefـoutput.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
ner_dataset/data/all_sections_classes_new_140405.zip
Normal file
BIN
ner_dataset/data/all_sections_classes_new_140405.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user