find large sections which send to window
This commit is contained in:
parent
0e856fd797
commit
3a23cb6e66
|
@ -1 +1,104 @@
|
|||
qs832374 >> No text is provided to clean
|
||||
qs214614 >> No text is provided to clean
|
||||
qs214630 >> No text is provided to clean
|
||||
qs999303 >> No text is provided to clean
|
||||
qs688916 >> No text is provided to clean
|
||||
qs213422 >> No text is provided to clean
|
||||
qs894830 >> No text is provided to clean
|
||||
qs625850 >> No text is provided to clean
|
||||
qs762340 >> No text is provided to clean
|
||||
qs1816282 >> No text is provided to clean
|
||||
qs668053 >> No text is provided to clean
|
||||
qs692739 >> No text is provided to clean
|
||||
qs218353 >> No text is provided to clean
|
||||
qs2065818 >> No text is provided to clean
|
||||
qs286455 >> No text is provided to clean
|
||||
qs573113 >> No text is provided to clean
|
||||
qs688971 >> No text is provided to clean
|
||||
qs1051386 >> No text is provided to clean
|
||||
qs1118015 >> No text is provided to clean
|
||||
qs776897 >> No text is provided to clean
|
||||
qs1115771 >> No text is provided to clean
|
||||
298683e9ae8dffe3_10901111 >> No text is provided to clean
|
||||
298683e9ae8dffe3_1090111 >> No text is provided to clean
|
||||
qs1102704 >> No text is provided to clean
|
||||
qs768498 >> No text is provided to clean
|
||||
qs988057 >> No text is provided to clean
|
||||
qs830281 >> No text is provided to clean
|
||||
qs953637 >> No text is provided to clean
|
||||
qs975560 >> No text is provided to clean
|
||||
qs214537 >> No text is provided to clean
|
||||
qs2516016 >> No text is provided to clean
|
||||
qs1115812 >> No text is provided to clean
|
||||
qs832536 >> No text is provided to clean
|
||||
qs3437676 >> No text is provided to clean
|
||||
qs1102796 >> No text is provided to clean
|
||||
qs799402 >> No text is provided to clean
|
||||
qs866692 >> No text is provided to clean
|
||||
qs761253 >> No text is provided to clean
|
||||
qs1044009 >> No text is provided to clean
|
||||
qs773634 >> No text is provided to clean
|
||||
qs1102868 >> No text is provided to clean
|
||||
qs1102870 >> No text is provided to clean
|
||||
qs1843826 >> No text is provided to clean
|
||||
qs1102735 >> No text is provided to clean
|
||||
qs693032 >> No text is provided to clean
|
||||
qs877648 >> No text is provided to clean
|
||||
qs987743 >> No text is provided to clean
|
||||
qs1128475 >> No text is provided to clean
|
||||
qs3349198 >> No text is provided to clean
|
||||
qs621263 >> No text is provided to clean
|
||||
qs218787 >> No text is provided to clean
|
||||
qs218788 >> No text is provided to clean
|
||||
qs2515800 >> No text is provided to clean
|
||||
qs218789 >> No text is provided to clean
|
||||
qs218791 >> No text is provided to clean
|
||||
qs218792 >> No text is provided to clean
|
||||
qs786853 >> No text is provided to clean
|
||||
qs516658 >> No text is provided to clean
|
||||
qs214636 >> No text is provided to clean
|
||||
qs1115772 >> No text is provided to clean
|
||||
qs552406 >> No text is provided to clean
|
||||
qs236126 >> No text is provided to clean
|
||||
qs286506 >> No text is provided to clean
|
||||
qs286511 >> No text is provided to clean
|
||||
qs94717 >> No text is provided to clean
|
||||
qs719521 >> No text is provided to clean
|
||||
qs1117997 >> No text is provided to clean
|
||||
qs286490 >> No text is provided to clean
|
||||
qs1988696 >> No text is provided to clean
|
||||
qs2516015 >> No text is provided to clean
|
||||
qs214546 >> No text is provided to clean
|
||||
qs507915 >> No text is provided to clean
|
||||
qs980157 >> No text is provided to clean
|
||||
qs2651573 >> No text is provided to clean
|
||||
qs621745 >> No text is provided to clean
|
||||
qs957628 >> No text is provided to clean
|
||||
qs725920 >> No text is provided to clean
|
||||
qs832412 >> No text is provided to clean
|
||||
qs1113187 >> No text is provided to clean
|
||||
qs621180 >> No text is provided to clean
|
||||
qs1092296 >> No text is provided to clean
|
||||
qs987744 >> No text is provided to clean
|
||||
qs990251 >> No text is provided to clean
|
||||
qs956440 >> No text is provided to clean
|
||||
qs998707 >> No text is provided to clean
|
||||
qs431837 >> No text is provided to clean
|
||||
qs1120063 >> No text is provided to clean
|
||||
qs2516020 >> No text is provided to clean
|
||||
qs214569 >> No text is provided to clean
|
||||
qs214577 >> No text is provided to clean
|
||||
qs680f6f37b77af_21 >> No text is provided to clean
|
||||
qs2128754 >> No text is provided to clean
|
||||
qs925414 >> No text is provided to clean
|
||||
774683546f75f603 >> No text is provided to clean
|
||||
298683e9ae8dffe3_109011 >> No text is provided to clean
|
||||
qs1113973 >> No text is provided to clean
|
||||
qs286498 >> No text is provided to clean
|
||||
qs286501 >> No text is provided to clean
|
||||
qs217778 >> No text is provided to clean
|
||||
qs761370 >> No text is provided to clean
|
||||
qs289962 >> No text is provided to clean
|
||||
qs236391 >> No text is provided to clean
|
||||
qs218594 >> No text is provided to clean
|
||||
qs236285 >> No text is provided to clean
|
||||
qs1113974 >> No text is provided to clean
|
||||
|
|
3273
data/large_sections.txt
Normal file
3273
data/large_sections.txt
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
@ -147,10 +147,7 @@ qanon_title_list = []
|
|||
new_sections_dict = {}
|
||||
selectedids = []
|
||||
for index, item in enumerate(sections):
|
||||
if index < 2900:
|
||||
continue
|
||||
if index > 10000:
|
||||
break
|
||||
|
||||
id = item['id']
|
||||
|
||||
source = item['source']
|
||||
|
@ -216,9 +213,6 @@ for index, item in enumerate(sections):
|
|||
with open('./data/all_sections_classes_new_140405.json', 'w', encoding='utf-8') as output_file:
|
||||
json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
|
||||
output_file.write(json_data)
|
||||
# with open('./data/all_sections_classes_tttttesttttt.json', 'w', encoding='utf-8') as output_file:
|
||||
# json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
|
||||
# output_file.write(json_data)
|
||||
|
||||
print(f'end: {datetime.datetime.now()}')
|
||||
print('finished!')
|
||||
|
|
28
temp.py
28
temp.py
|
@ -85,5 +85,29 @@ def classified_sections():
|
|||
return large_not_classified
|
||||
|
||||
if __name__ == '__main__':
|
||||
result = classified_sections()
|
||||
print(len(result))
|
||||
|
||||
with open('./data/all_sections_classes_new_140405.json', 'r', encoding='utf-8') as _file:
|
||||
sections = json.load(_file)
|
||||
|
||||
# region large sections which send to window
|
||||
faults = []
|
||||
for item in sections:
|
||||
itm = sections[item]
|
||||
try:
|
||||
best = itm['best-class']['score']
|
||||
except:
|
||||
continue
|
||||
if best > 1:
|
||||
print(best)
|
||||
faults.append((item,best))
|
||||
|
||||
faults_text = ''
|
||||
for item in faults:
|
||||
faults_text += ''.join(item[0]) + '\n'
|
||||
with open('./data/large_sections.txt', 'a+') as file:
|
||||
file.write(faults_text.strip())
|
||||
|
||||
# endregion
|
||||
|
||||
# result = classified_sections()
|
||||
# print(len(result))
|
||||
|
|
Loading…
Reference in New Issue
Block a user