43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
from elastic_helper import ElasticHelper
|
|
import json
|
|
from collections import Counter
|
|
|
|
eh_obj = ElasticHelper()
|
|
path = "/home/gpu/data_11/mj_qa_section.zip"
|
|
data = eh_obj.iterateJsonFile(path, True)
|
|
|
|
classes =[]
|
|
failed = 0
|
|
for i,item in enumerate(data):
|
|
|
|
try:
|
|
class_ = item['source']['code-ai']['label']
|
|
print(i+1)
|
|
classes.append(class_)
|
|
except:
|
|
failed += 1
|
|
# with open('./data/alldata_classes.txt', 'a', encoding='utf-8') as file:
|
|
# file.write(str(source))
|
|
# exit(1)
|
|
|
|
# with open('./data/fullpath_dataset.json', 'r', encoding='utf-8') as file:
|
|
# data = json.load(file)
|
|
|
|
# classes = [itm['domain_name'] for itm in data]
|
|
classes_count = Counter(classes)
|
|
sorted_elements = sorted(classes_count.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
text = ''
|
|
for k,v in sorted_elements:
|
|
text += f'{k} -> {v}\n'
|
|
print(f'{k} -> {v}')
|
|
|
|
# print(text)
|
|
with open('./data/alldata_classes.txt', 'a', encoding='utf-8') as file:
|
|
file.write(text)
|
|
|
|
# print(classes_count)
|
|
print('*****************************************')
|
|
print(f'len(classes_count): {len(classes_count)}')
|
|
print(f'failed: {failed}')
|
|
pass |