import pandas as pd from sqlalchemy import create_engine from sqlalchemy.exc import OperationalError import re import requests import json from decimal import Decimal # توکن و هدر برای ارسال درخواست به API TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE3MTg3MTY3MTMsImp0aSI6Im1aY0MwSEdIV3dxb1ppWVwvb2VqMlwvT2FWc3FTOFIwSTkiLCJpc3MiOiJodHRwczpcL1wvY3AudGF2YXNpLmlyIiwiZXhwIjoxNzIwMDE2NzEyLCJhdWQiOiJodHRwczpcL1wvY3AudGF2YXNpLmlyIiwiZGF0YSI6eyJpZCI6NywiZmlyc3RfbmFtZSI6Ilx1MDY0NVx1MDYzNVx1MDYzN1x1MDY0MVx1MDZjYyIsImxhc3RfbmFtZSI6Ilx1MDYyOFx1MDY0N1x1MDYyZlx1MDYyN1x1MDY0Nlx1MDZjYyIsImVtYWlsIjoiamFtdXNiMjc0NzRAZ21haWwuY29tIiwidXNlcm5hbWUiOiJtYmVoZGFuaSIsInVzZXJfbGV2ZWwiOjF9fQ.NhxbjdXMCEb_ninOBKpzbUsaAmxva1zpShuesXrVpEQ' ACCEPT = "application/json" HEADERS = {"Authorization": TOKEN, "Accept": ACCEPT} # اطلاعات اتصال به پایگاه داده db_host = 'DESKTOP-0STSURA\\MUSTAFA' db_name = 'Qavanin' db_driver = 'ODBC Driver 17 for SQL Server' db_trusted_connection = 'yes' # ایجاد یک رشته اتصال (connection string) connection_string = f"mssql+pyodbc://@{db_host}/{db_name}?driver={db_driver}&Trusted_Connection={db_trusted_connection}" # ایجاد یک engine با استفاده از SQLAlchemy engine = create_engine(connection_string) # بررسی اتصال به پایگاه داده try: with engine.connect() as connection: print("اتصال به سرور موفقیت‌آمیز بود.") except OperationalError as e: print(f"خطا در اتصال به سرور: {e}") raise # اجرای کوئری SQL و خواندن داده‌ها به یک DataFrame query = """ SELECT (select Top(1) gl5.SECTIONTEXT from lwSectionLog gl5 where gl1.[F_LWSECTIONLOGID_EFFECTED]=gl5.ID ) as effect_prev_text, gl2.[SECTIONTEXT] as effected_text FROM [Qavanin].[dbo].[lwSectionChange] gl1 LEFT JOIN lwSection gl2 on gl2.ID=gl1.[F_LWSECTIONID_EFFECTED] LEFT JOIN lwLaw gl4 on gl1.F_LWLAWID_EFFECTIVE=gl4.ID WHERE gl4.ISLAW = 1 AND gl1.[F_LWSECTIONID_EFFECTED] is not null ORDER BY gl1.[F_LWSECTIONID_EFFECTED], gl4.APPROVEDATE """ df = pd.read_sql(query, engine) # تابع برای حذف کاراکترهای غیرمجاز def remove_illegal_chars(value): if isinstance(value, str): return re.sub(r'[\000-\010]|[\013-\014]|[\016-\037]', '', value) return value df = df.applymap(remove_illegal_chars) df = df[['effect_prev_text', 'effected_text']] num_rows = len(df) num_chunks = 15 chunk_size = num_rows // num_chunks + (1 if num_rows % num_chunks != 0 else 0) BASE_URL = "https://api.tavasi.ir/repo/dataset/multi/add/qasection/keyword" class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, Decimal): return float(obj) return json.JSONEncoder.default(self, obj) def isNeedHtml(html): if "/)*(?P' + exceptTags + ')(?P[^>]+)*>' html = re.sub(reg1, '', html) return html def createIndex(content, values): result_objects = [{ "task": "diff", "key": "qasection_ref_state_diff", "label": "تغییرات در قانون", "values": [{"text": values, "score": 4}] # مثال امتیاز تشابه }] output = { "content": content, "domain": "تغییرات قانون", "ref_id": "", "ref_url": "", "task": "diff", "result_objects": result_objects, } return output def send_data_to_api(data_list): bulk_data = [] for item in data_list: content = item["effect_prev_text"] values = item["effected_text"] # بررسی خالی نبودن content و values if not content or not values: continue # بررسی وجود تگ HTML خاص if isNeedHtml(content) or isNeedHtml(values): continue # حذف تگ‌های HTML از content و values content = removeHtmlTags(content) values = removeHtmlTags(values) data = createIndex(content, values) bulk_data.append(data) if len(bulk_data) > 10: payload = json.dumps(bulk_data, cls=JSONEncoder) response = requests.post(BASE_URL, headers=HEADERS, data=payload) bulk_data = [] if bulk_data: payload = json.dumps(bulk_data, cls=JSONEncoder) response = requests.post(BASE_URL, headers=HEADERS, data=payload) # for i, line in mainList : for i in range(num_chunks): start_row = i * chunk_size end_row = start_row + chunk_size chunk_df = df.iloc[start_row:end_row] data_list = [] for index, row in chunk_df.iterrows(): data_list.append({ "effect_prev_text": row['effect_prev_text'], "effected_text": row['effected_text'] }) send_data_to_api(data_list) print("تمام داده‌ها با موفقیت ارسال شدند.")