ai_dataset/import_data/section_topic_dataset.py

98 lines
2.6 KiB
Python
Raw Permalink Normal View History

2024-09-17 16:45:41 +00:00
import json
import requests
from config_base import *
import requests
from bs4 import BeautifulSoup
from html import escape
import json
from decimal import Decimal
import datetime
from config_base import *
from general_functions import *
from funcs import write_to_json
# موقت
# این برنامه موقتا بخاطر اصلاح داده اجرا شد، اگر از ابتدا انتقال داده انجام شود این بخش خودکار انجام خواهد شد
cursor, cnxn = create_cursor()
# Does quasi the same things as json.loads from here: https://pypi.org/project/dynamodb-json/
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Decimal):
return float(obj)
return json.JSONEncoder.default(self, obj)
#نکته : برای مقررات هیچ موردی نبود که جدولی باشد
query = '''SELECT [ID],[CODE],[CAPTION]
FROM [Qavanin].[dbo].[lwTopic]
'''
cursor.execute(query)
headers = HEADERS
t1= datetime.datetime.now()
print("start time: " + str(t1))
i = 0
topic_list = []
while 1:
i += 1
row = cursor.fetchone()
if not row:
break
topic_id = int(row.ID)
caption = str(row.CAPTION)
topic_code = str(row.CODE).strip()
caption = normalize_content(caption)
if topic_id == 96:
topic_id = 68,
topic_code = "426"
caption = "آمار و سرشماری"
if topic_id == 61:
topic_id = 60,
topic_code = "418",
caption = "بورس اوراق بهادار و اسناد خزانه "
if topic_id == 126:
topic_id = 149,
topic_code = "104/51",
caption = "وظایف وزارت جهاد کشاورزی"
if topic_id == 122:
topic_id = 150,
topic_code = "104/52",
caption = "وظایف صنایع و معادن"
if topic_id == 41:
topic_id= 185,
topic_code= "304/1",
caption= "اتومبیل های دولتی"
topic_list.append({
"topic_id" : topic_id,
"topic_code": topic_code,
"caption" : caption,
})
cursor.close()
cnxn.close()
address = "./impoert_data/data/section_topic.json"
result = write_to_json(topic_list, address)
if result:
print("all done correctly!")
else:
print("operation failed!")
print()
t2= datetime.datetime.now()
print("update rows count: " + str(i))
print("finished time: " + str(t2))
print("process time: " + str(t2-t1))
print('*********** end success ***********')