from elastic_helper import ElasticHelper import datetime import json eh_obj = ElasticHelper() path = "/home/gpu/data_11/14040423/mj_qa_section.zip" sections = eh_obj.iterateJsonFile(path, True) # تاریخی که داده ها از این تاریخ به بعد، پردازش های مختلف را ندیده است update_time = datetime.datetime(1403,10,5) def get_data_from_date(date): errors = [] recent_sections = {} counter = 1 for i, item in enumerate(sections): id = item['id'] source = item['source'] ts_date = source['ts_date'] try: ts_date_standard = datetime.datetime(int(ts_date.split('/')[0]),int(ts_date.split('/')[1]),int(ts_date.split('/')[2])) except: # errors+= f'{ts_date} - {id}\n' errors.append(f'{ts_date} - {id}') continue if ts_date_standard>date: recent_sections[id] = source counter+=1 print(ts_date) errors.sort() errors_text = '' for item in errors: errors_text += item +'\n' with open('./data/errors.txt', 'w', encoding='utf-8') as file: file.write(errors_text) print(f'new sections count: {counter}') return recent_sections if __name__ == '__main__': recent_sections = get_data_from_date(update_time) with open('./data/recent_sections.json', 'w', encoding='utf-8') as file: data = json.dumps(recent_sections, ensure_ascii=False, indent=4) file.write(data) print('finished!')