add files

This commit is contained in:
ajokar 2025-07-30 18:56:17 +03:30
parent 393aace89c
commit 0c822ffa61
9 changed files with 70857 additions and 2 deletions

5114
data/errors.txt Normal file

File diff suppressed because it is too large Load Diff

65718
data/recent_sections.json Normal file

File diff suppressed because one or more lines are too long

6
do_nlp_processes.py Normal file
View File

@ -0,0 +1,6 @@
import p1_classifier
import p2_keyword_extractor
import p3_ner_recognizer
import p4_simplifier
import p5_words_embedder

View File

@ -10,23 +10,40 @@ sections = eh_obj.iterateJsonFile(path, True)
update_time = datetime.datetime(1403,10,5)
def get_data_from_date(date):
errors = []
recent_sections = {}
counter = 1
for i, item in enumerate(sections):
id = item['id']
source = item['source']
ts_date = source['ts_date']
ts_date_standard = datetime.datetime(ts_date.split('/')[0],ts_date.split('/')[1],ts_date.split('/')[2])
try:
ts_date_standard = datetime.datetime(int(ts_date.split('/')[0]),int(ts_date.split('/')[1]),int(ts_date.split('/')[2]))
except:
# errors+= f'{ts_date} - {id}\n'
errors.append(f'{ts_date} - {id}')
continue
if ts_date_standard>date:
recent_sections[id] = source
counter+=1
print(ts_date)
errors.sort()
errors_text = ''
for item in errors:
errors_text += item +'\n'
with open('./data/errors.txt', 'w', encoding='utf-8') as file:
file.write(errors_text)
print(f'new sections count: {counter}')
return recent_sections
if __name__ == '__main__':
recent_sections = get_data_from_date(update_time)
with open('./data/recent_sections.json', 'w', encoding='utf-8') as file:
data = json.dump(recent_sections)
data = json.dumps(recent_sections, ensure_ascii=False, indent=4)
file.write(data)
print('finished!')

0
p1_classifier.py Normal file
View File

0
p2_keyword_extractor.py Normal file
View File

0
p3_ner_recognizer.py Normal file
View File

0
p4_simplifier.py Normal file
View File

0
p5_words_embedder.py Normal file
View File