Compare commits
2 Commits
b38c9d1444
...
6bdac3f01e
Author | SHA1 | Date | |
---|---|---|---|
6bdac3f01e | |||
d1aee6adeb |
|
@ -14,9 +14,9 @@ headers = HEADERS
|
|||
|
||||
address = os.getcwd()
|
||||
if "import_data" in address:
|
||||
address += "/data/clean_sections_kw_15k.json"
|
||||
address += "/data/clean_sections_kw_11k.json"
|
||||
else:
|
||||
address += "/import_data/data/clean_sections_kw_15k.json"
|
||||
address += "/import_data/data/clean_sections_kw_11k.json"
|
||||
|
||||
# open .json file
|
||||
lines = read_from_json(address)
|
||||
|
@ -58,7 +58,7 @@ def createIndex(id, content, result_objects):
|
|||
output = {
|
||||
"id" : id,
|
||||
"content": content,
|
||||
"domain": "استخراج کلیدواژه 15 هزارتایی",
|
||||
"domain": "استخراج کلیدواژه 11 هزارتایی",
|
||||
"ref_id": "",
|
||||
"ref_url": "",
|
||||
"result_objects": result_objects,
|
||||
|
@ -128,7 +128,12 @@ for i, line in enumerate(lines):
|
|||
result_objects = []
|
||||
llam_prompt_kws = line["keywords"]
|
||||
# values = extract_keywords(llam_prompt_kw)
|
||||
values = llam_prompt_kws
|
||||
|
||||
values = []
|
||||
for item in llam_prompt_kws:
|
||||
values.append({
|
||||
"text": item,
|
||||
})
|
||||
result_objects.append(
|
||||
{
|
||||
"task": "keyword",
|
||||
|
@ -154,7 +159,7 @@ for i, line in enumerate(lines):
|
|||
bulk_data.append(data)
|
||||
|
||||
bulk_count += 1
|
||||
if bulk_data.__len__() > 500:
|
||||
if bulk_data.__len__() > 1000:
|
||||
print("=" * 30)
|
||||
print("count " + str(count))
|
||||
payload = json.dumps(bulk_data, cls=JSONEncoder) # Works!
|
||||
|
|
137098
import_data/data/clean_sections_kw_11k.json
Normal file
137098
import_data/data/clean_sections_kw_11k.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
87372
ner_dataset/data/DATASET ORIGINAL_480Kـoutput.json
Normal file
87372
ner_dataset/data/DATASET ORIGINAL_480Kـoutput.json
Normal file
File diff suppressed because one or more lines are too long
9351
ner_dataset/data/DATASET140402_no_arefـoutput.json
Normal file
9351
ner_dataset/data/DATASET140402_no_arefـoutput.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
ner_dataset/data/all_sections_classes_new_140405.zip
Normal file
BIN
ner_dataset/data/all_sections_classes_new_140405.zip
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user