find large sections which send to window
This commit is contained in:
parent
0e856fd797
commit
3a23cb6e66
|
@ -1 +1,104 @@
|
||||||
qs832374 >> No text is provided to clean
|
qs214614 >> No text is provided to clean
|
||||||
|
qs214630 >> No text is provided to clean
|
||||||
|
qs999303 >> No text is provided to clean
|
||||||
|
qs688916 >> No text is provided to clean
|
||||||
|
qs213422 >> No text is provided to clean
|
||||||
|
qs894830 >> No text is provided to clean
|
||||||
|
qs625850 >> No text is provided to clean
|
||||||
|
qs762340 >> No text is provided to clean
|
||||||
|
qs1816282 >> No text is provided to clean
|
||||||
|
qs668053 >> No text is provided to clean
|
||||||
|
qs692739 >> No text is provided to clean
|
||||||
|
qs218353 >> No text is provided to clean
|
||||||
|
qs2065818 >> No text is provided to clean
|
||||||
|
qs286455 >> No text is provided to clean
|
||||||
|
qs573113 >> No text is provided to clean
|
||||||
|
qs688971 >> No text is provided to clean
|
||||||
|
qs1051386 >> No text is provided to clean
|
||||||
|
qs1118015 >> No text is provided to clean
|
||||||
|
qs776897 >> No text is provided to clean
|
||||||
|
qs1115771 >> No text is provided to clean
|
||||||
|
298683e9ae8dffe3_10901111 >> No text is provided to clean
|
||||||
|
298683e9ae8dffe3_1090111 >> No text is provided to clean
|
||||||
|
qs1102704 >> No text is provided to clean
|
||||||
|
qs768498 >> No text is provided to clean
|
||||||
|
qs988057 >> No text is provided to clean
|
||||||
|
qs830281 >> No text is provided to clean
|
||||||
|
qs953637 >> No text is provided to clean
|
||||||
|
qs975560 >> No text is provided to clean
|
||||||
|
qs214537 >> No text is provided to clean
|
||||||
|
qs2516016 >> No text is provided to clean
|
||||||
|
qs1115812 >> No text is provided to clean
|
||||||
|
qs832536 >> No text is provided to clean
|
||||||
|
qs3437676 >> No text is provided to clean
|
||||||
|
qs1102796 >> No text is provided to clean
|
||||||
|
qs799402 >> No text is provided to clean
|
||||||
|
qs866692 >> No text is provided to clean
|
||||||
|
qs761253 >> No text is provided to clean
|
||||||
|
qs1044009 >> No text is provided to clean
|
||||||
|
qs773634 >> No text is provided to clean
|
||||||
|
qs1102868 >> No text is provided to clean
|
||||||
|
qs1102870 >> No text is provided to clean
|
||||||
|
qs1843826 >> No text is provided to clean
|
||||||
|
qs1102735 >> No text is provided to clean
|
||||||
|
qs693032 >> No text is provided to clean
|
||||||
|
qs877648 >> No text is provided to clean
|
||||||
|
qs987743 >> No text is provided to clean
|
||||||
|
qs1128475 >> No text is provided to clean
|
||||||
|
qs3349198 >> No text is provided to clean
|
||||||
|
qs621263 >> No text is provided to clean
|
||||||
|
qs218787 >> No text is provided to clean
|
||||||
|
qs218788 >> No text is provided to clean
|
||||||
|
qs2515800 >> No text is provided to clean
|
||||||
|
qs218789 >> No text is provided to clean
|
||||||
|
qs218791 >> No text is provided to clean
|
||||||
|
qs218792 >> No text is provided to clean
|
||||||
|
qs786853 >> No text is provided to clean
|
||||||
|
qs516658 >> No text is provided to clean
|
||||||
|
qs214636 >> No text is provided to clean
|
||||||
|
qs1115772 >> No text is provided to clean
|
||||||
|
qs552406 >> No text is provided to clean
|
||||||
|
qs236126 >> No text is provided to clean
|
||||||
|
qs286506 >> No text is provided to clean
|
||||||
|
qs286511 >> No text is provided to clean
|
||||||
|
qs94717 >> No text is provided to clean
|
||||||
|
qs719521 >> No text is provided to clean
|
||||||
|
qs1117997 >> No text is provided to clean
|
||||||
|
qs286490 >> No text is provided to clean
|
||||||
|
qs1988696 >> No text is provided to clean
|
||||||
|
qs2516015 >> No text is provided to clean
|
||||||
|
qs214546 >> No text is provided to clean
|
||||||
|
qs507915 >> No text is provided to clean
|
||||||
|
qs980157 >> No text is provided to clean
|
||||||
|
qs2651573 >> No text is provided to clean
|
||||||
|
qs621745 >> No text is provided to clean
|
||||||
|
qs957628 >> No text is provided to clean
|
||||||
|
qs725920 >> No text is provided to clean
|
||||||
|
qs832412 >> No text is provided to clean
|
||||||
|
qs1113187 >> No text is provided to clean
|
||||||
|
qs621180 >> No text is provided to clean
|
||||||
|
qs1092296 >> No text is provided to clean
|
||||||
|
qs987744 >> No text is provided to clean
|
||||||
|
qs990251 >> No text is provided to clean
|
||||||
|
qs956440 >> No text is provided to clean
|
||||||
|
qs998707 >> No text is provided to clean
|
||||||
|
qs431837 >> No text is provided to clean
|
||||||
|
qs1120063 >> No text is provided to clean
|
||||||
|
qs2516020 >> No text is provided to clean
|
||||||
|
qs214569 >> No text is provided to clean
|
||||||
|
qs214577 >> No text is provided to clean
|
||||||
|
qs680f6f37b77af_21 >> No text is provided to clean
|
||||||
|
qs2128754 >> No text is provided to clean
|
||||||
|
qs925414 >> No text is provided to clean
|
||||||
|
774683546f75f603 >> No text is provided to clean
|
||||||
|
298683e9ae8dffe3_109011 >> No text is provided to clean
|
||||||
|
qs1113973 >> No text is provided to clean
|
||||||
|
qs286498 >> No text is provided to clean
|
||||||
|
qs286501 >> No text is provided to clean
|
||||||
|
qs217778 >> No text is provided to clean
|
||||||
|
qs761370 >> No text is provided to clean
|
||||||
|
qs289962 >> No text is provided to clean
|
||||||
|
qs236391 >> No text is provided to clean
|
||||||
|
qs218594 >> No text is provided to clean
|
||||||
|
qs236285 >> No text is provided to clean
|
||||||
|
qs1113974 >> No text is provided to clean
|
||||||
|
|
3273
data/large_sections.txt
Normal file
3273
data/large_sections.txt
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
@ -147,10 +147,7 @@ qanon_title_list = []
|
||||||
new_sections_dict = {}
|
new_sections_dict = {}
|
||||||
selectedids = []
|
selectedids = []
|
||||||
for index, item in enumerate(sections):
|
for index, item in enumerate(sections):
|
||||||
if index < 2900:
|
|
||||||
continue
|
|
||||||
if index > 10000:
|
|
||||||
break
|
|
||||||
id = item['id']
|
id = item['id']
|
||||||
|
|
||||||
source = item['source']
|
source = item['source']
|
||||||
|
@ -216,9 +213,6 @@ for index, item in enumerate(sections):
|
||||||
with open('./data/all_sections_classes_new_140405.json', 'w', encoding='utf-8') as output_file:
|
with open('./data/all_sections_classes_new_140405.json', 'w', encoding='utf-8') as output_file:
|
||||||
json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
|
json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
|
||||||
output_file.write(json_data)
|
output_file.write(json_data)
|
||||||
# with open('./data/all_sections_classes_tttttesttttt.json', 'w', encoding='utf-8') as output_file:
|
|
||||||
# json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
|
|
||||||
# output_file.write(json_data)
|
|
||||||
|
|
||||||
print(f'end: {datetime.datetime.now()}')
|
print(f'end: {datetime.datetime.now()}')
|
||||||
print('finished!')
|
print('finished!')
|
||||||
|
|
28
temp.py
28
temp.py
|
@ -85,5 +85,29 @@ def classified_sections():
|
||||||
return large_not_classified
|
return large_not_classified
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
result = classified_sections()
|
|
||||||
print(len(result))
|
with open('./data/all_sections_classes_new_140405.json', 'r', encoding='utf-8') as _file:
|
||||||
|
sections = json.load(_file)
|
||||||
|
|
||||||
|
# region large sections which send to window
|
||||||
|
faults = []
|
||||||
|
for item in sections:
|
||||||
|
itm = sections[item]
|
||||||
|
try:
|
||||||
|
best = itm['best-class']['score']
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if best > 1:
|
||||||
|
print(best)
|
||||||
|
faults.append((item,best))
|
||||||
|
|
||||||
|
faults_text = ''
|
||||||
|
for item in faults:
|
||||||
|
faults_text += ''.join(item[0]) + '\n'
|
||||||
|
with open('./data/large_sections.txt', 'a+') as file:
|
||||||
|
file.write(faults_text.strip())
|
||||||
|
|
||||||
|
# endregion
|
||||||
|
|
||||||
|
# result = classified_sections()
|
||||||
|
# print(len(result))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user