find large sections which send to window

This commit is contained in:
init_mahdi 2025-08-20 13:56:00 +03:30
parent 0e856fd797
commit 3a23cb6e66
5 changed files with 154581 additions and 1445 deletions

View File

@ -1 +1,104 @@
qs832374 >> No text is provided to clean qs214614 >> No text is provided to clean
qs214630 >> No text is provided to clean
qs999303 >> No text is provided to clean
qs688916 >> No text is provided to clean
qs213422 >> No text is provided to clean
qs894830 >> No text is provided to clean
qs625850 >> No text is provided to clean
qs762340 >> No text is provided to clean
qs1816282 >> No text is provided to clean
qs668053 >> No text is provided to clean
qs692739 >> No text is provided to clean
qs218353 >> No text is provided to clean
qs2065818 >> No text is provided to clean
qs286455 >> No text is provided to clean
qs573113 >> No text is provided to clean
qs688971 >> No text is provided to clean
qs1051386 >> No text is provided to clean
qs1118015 >> No text is provided to clean
qs776897 >> No text is provided to clean
qs1115771 >> No text is provided to clean
298683e9ae8dffe3_10901111 >> No text is provided to clean
298683e9ae8dffe3_1090111 >> No text is provided to clean
qs1102704 >> No text is provided to clean
qs768498 >> No text is provided to clean
qs988057 >> No text is provided to clean
qs830281 >> No text is provided to clean
qs953637 >> No text is provided to clean
qs975560 >> No text is provided to clean
qs214537 >> No text is provided to clean
qs2516016 >> No text is provided to clean
qs1115812 >> No text is provided to clean
qs832536 >> No text is provided to clean
qs3437676 >> No text is provided to clean
qs1102796 >> No text is provided to clean
qs799402 >> No text is provided to clean
qs866692 >> No text is provided to clean
qs761253 >> No text is provided to clean
qs1044009 >> No text is provided to clean
qs773634 >> No text is provided to clean
qs1102868 >> No text is provided to clean
qs1102870 >> No text is provided to clean
qs1843826 >> No text is provided to clean
qs1102735 >> No text is provided to clean
qs693032 >> No text is provided to clean
qs877648 >> No text is provided to clean
qs987743 >> No text is provided to clean
qs1128475 >> No text is provided to clean
qs3349198 >> No text is provided to clean
qs621263 >> No text is provided to clean
qs218787 >> No text is provided to clean
qs218788 >> No text is provided to clean
qs2515800 >> No text is provided to clean
qs218789 >> No text is provided to clean
qs218791 >> No text is provided to clean
qs218792 >> No text is provided to clean
qs786853 >> No text is provided to clean
qs516658 >> No text is provided to clean
qs214636 >> No text is provided to clean
qs1115772 >> No text is provided to clean
qs552406 >> No text is provided to clean
qs236126 >> No text is provided to clean
qs286506 >> No text is provided to clean
qs286511 >> No text is provided to clean
qs94717 >> No text is provided to clean
qs719521 >> No text is provided to clean
qs1117997 >> No text is provided to clean
qs286490 >> No text is provided to clean
qs1988696 >> No text is provided to clean
qs2516015 >> No text is provided to clean
qs214546 >> No text is provided to clean
qs507915 >> No text is provided to clean
qs980157 >> No text is provided to clean
qs2651573 >> No text is provided to clean
qs621745 >> No text is provided to clean
qs957628 >> No text is provided to clean
qs725920 >> No text is provided to clean
qs832412 >> No text is provided to clean
qs1113187 >> No text is provided to clean
qs621180 >> No text is provided to clean
qs1092296 >> No text is provided to clean
qs987744 >> No text is provided to clean
qs990251 >> No text is provided to clean
qs956440 >> No text is provided to clean
qs998707 >> No text is provided to clean
qs431837 >> No text is provided to clean
qs1120063 >> No text is provided to clean
qs2516020 >> No text is provided to clean
qs214569 >> No text is provided to clean
qs214577 >> No text is provided to clean
qs680f6f37b77af_21 >> No text is provided to clean
qs2128754 >> No text is provided to clean
qs925414 >> No text is provided to clean
774683546f75f603 >> No text is provided to clean
298683e9ae8dffe3_109011 >> No text is provided to clean
qs1113973 >> No text is provided to clean
qs286498 >> No text is provided to clean
qs286501 >> No text is provided to clean
qs217778 >> No text is provided to clean
qs761370 >> No text is provided to clean
qs289962 >> No text is provided to clean
qs236391 >> No text is provided to clean
qs218594 >> No text is provided to clean
qs236285 >> No text is provided to clean
qs1113974 >> No text is provided to clean

3273
data/large_sections.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -147,10 +147,7 @@ qanon_title_list = []
new_sections_dict = {} new_sections_dict = {}
selectedids = [] selectedids = []
for index, item in enumerate(sections): for index, item in enumerate(sections):
if index < 2900:
continue
if index > 10000:
break
id = item['id'] id = item['id']
source = item['source'] source = item['source']
@ -216,9 +213,6 @@ for index, item in enumerate(sections):
with open('./data/all_sections_classes_new_140405.json', 'w', encoding='utf-8') as output_file: with open('./data/all_sections_classes_new_140405.json', 'w', encoding='utf-8') as output_file:
json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False) json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
output_file.write(json_data) output_file.write(json_data)
# with open('./data/all_sections_classes_tttttesttttt.json', 'w', encoding='utf-8') as output_file:
# json_data = json.dumps(new_sections_dict, indent=4, ensure_ascii=False)
# output_file.write(json_data)
print(f'end: {datetime.datetime.now()}') print(f'end: {datetime.datetime.now()}')
print('finished!') print('finished!')

28
temp.py
View File

@ -85,5 +85,29 @@ def classified_sections():
return large_not_classified return large_not_classified
if __name__ == '__main__': if __name__ == '__main__':
result = classified_sections()
print(len(result)) with open('./data/all_sections_classes_new_140405.json', 'r', encoding='utf-8') as _file:
sections = json.load(_file)
# region large sections which send to window
faults = []
for item in sections:
itm = sections[item]
try:
best = itm['best-class']['score']
except:
continue
if best > 1:
print(best)
faults.append((item,best))
faults_text = ''
for item in faults:
faults_text += ''.join(item[0]) + '\n'
with open('./data/large_sections.txt', 'a+') as file:
file.write(faults_text.strip())
# endregion
# result = classified_sections()
# print(len(result))