add files
This commit is contained in:
parent
393aace89c
commit
0c822ffa61
5114
data/errors.txt
Normal file
5114
data/errors.txt
Normal file
File diff suppressed because it is too large
Load Diff
65718
data/recent_sections.json
Normal file
65718
data/recent_sections.json
Normal file
File diff suppressed because one or more lines are too long
6
do_nlp_processes.py
Normal file
6
do_nlp_processes.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
import p1_classifier
|
||||
import p2_keyword_extractor
|
||||
import p3_ner_recognizer
|
||||
import p4_simplifier
|
||||
import p5_words_embedder
|
||||
|
|
@ -10,23 +10,40 @@ sections = eh_obj.iterateJsonFile(path, True)
|
|||
update_time = datetime.datetime(1403,10,5)
|
||||
|
||||
def get_data_from_date(date):
|
||||
errors = []
|
||||
recent_sections = {}
|
||||
counter = 1
|
||||
for i, item in enumerate(sections):
|
||||
id = item['id']
|
||||
source = item['source']
|
||||
ts_date = source['ts_date']
|
||||
ts_date_standard = datetime.datetime(ts_date.split('/')[0],ts_date.split('/')[1],ts_date.split('/')[2])
|
||||
try:
|
||||
ts_date_standard = datetime.datetime(int(ts_date.split('/')[0]),int(ts_date.split('/')[1]),int(ts_date.split('/')[2]))
|
||||
except:
|
||||
# errors+= f'{ts_date} - {id}\n'
|
||||
errors.append(f'{ts_date} - {id}')
|
||||
continue
|
||||
|
||||
if ts_date_standard>date:
|
||||
recent_sections[id] = source
|
||||
counter+=1
|
||||
print(ts_date)
|
||||
|
||||
errors.sort()
|
||||
errors_text = ''
|
||||
for item in errors:
|
||||
errors_text += item +'\n'
|
||||
with open('./data/errors.txt', 'w', encoding='utf-8') as file:
|
||||
file.write(errors_text)
|
||||
|
||||
print(f'new sections count: {counter}')
|
||||
return recent_sections
|
||||
|
||||
if __name__ == '__main__':
|
||||
recent_sections = get_data_from_date(update_time)
|
||||
|
||||
with open('./data/recent_sections.json', 'w', encoding='utf-8') as file:
|
||||
data = json.dump(recent_sections)
|
||||
data = json.dumps(recent_sections, ensure_ascii=False, indent=4)
|
||||
file.write(data)
|
||||
|
||||
print('finished!')
|
||||
|
|
0
p1_classifier.py
Normal file
0
p1_classifier.py
Normal file
0
p2_keyword_extractor.py
Normal file
0
p2_keyword_extractor.py
Normal file
0
p3_ner_recognizer.py
Normal file
0
p3_ner_recognizer.py
Normal file
0
p4_simplifier.py
Normal file
0
p4_simplifier.py
Normal file
0
p5_words_embedder.py
Normal file
0
p5_words_embedder.py
Normal file
Loading…
Reference in New Issue
Block a user