from elastic_helper import ElasticHelper import json from collections import Counter eh_obj = ElasticHelper() path = "/home/gpu/data_11/mj_qa_section.zip" data = eh_obj.iterateJsonFile(path, True) classes =[] failed = 0 for i,item in enumerate(data): try: class_ = item['source']['code-ai']['label'] print(i+1) classes.append(class_) except: failed += 1 # with open('./data/alldata_classes.txt', 'a', encoding='utf-8') as file: # file.write(str(source)) # exit(1) # with open('./data/fullpath_dataset.json', 'r', encoding='utf-8') as file: # data = json.load(file) # classes = [itm['domain_name'] for itm in data] classes_count = Counter(classes) sorted_elements = sorted(classes_count.items(), key=lambda x: x[1], reverse=True) text = '' for k,v in sorted_elements: text += f'{k} -> {v}\n' print(f'{k} -> {v}') # print(text) with open('./data/alldata_classes.txt', 'a', encoding='utf-8') as file: file.write(text) # print(classes_count) print('*****************************************') print(f'len(classes_count): {len(classes_count)}') print(f'failed: {failed}') pass