53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
from elastic_helper import ElasticHelper
|
||
import datetime
|
||
import json
|
||
|
||
eh_obj = ElasticHelper()
|
||
path = "/home/gpu/data_11/14040423/mj_qa_section.zip"
|
||
sections = eh_obj.iterateJsonFile(path, True)
|
||
|
||
# تاریخی که داده ها از این تاریخ به بعد، پردازش های مختلف را ندیده است
|
||
update_time = datetime.datetime(1403,10,5)
|
||
|
||
def get_data_from_date(date):
|
||
errors = []
|
||
recent_sections = {}
|
||
counter = 1
|
||
for i, item in enumerate(sections):
|
||
id = item['id']
|
||
source = item['source']
|
||
ts_date = source['ts_date']
|
||
try:
|
||
ts_date_standard = datetime.datetime(int(ts_date.split('/')[0]),int(ts_date.split('/')[1]),int(ts_date.split('/')[2]))
|
||
except:
|
||
# errors+= f'{ts_date} - {id}\n'
|
||
errors.append(f'{ts_date} - {id}')
|
||
continue
|
||
|
||
if ts_date_standard>date:
|
||
recent_sections[id] = source
|
||
counter+=1
|
||
print(ts_date)
|
||
|
||
errors.sort()
|
||
errors_text = ''
|
||
for item in errors:
|
||
errors_text += item +'\n'
|
||
with open('./data/errors.txt', 'w', encoding='utf-8') as file:
|
||
file.write(errors_text)
|
||
|
||
print(f'new sections count: {counter}')
|
||
return recent_sections
|
||
|
||
if __name__ == '__main__':
|
||
recent_sections = get_data_from_date(update_time)
|
||
|
||
with open('./data/recent_sections.json', 'w', encoding='utf-8') as file:
|
||
data = json.dumps(recent_sections, ensure_ascii=False, indent=4)
|
||
file.write(data)
|
||
|
||
print('finished!')
|
||
|
||
|
||
|