import pandas as pd from sqlalchemy import create_engine from sqlalchemy.exc import OperationalError import re import requests import json from decimal import Decimal # توکن و هدر برای ارسال درخواست به API TOKEN = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE3MTg3MTY3MTMsImp0aSI6Im1aY0MwSEdIV3dxb1ppWVwvb2VqMlwvT2FWc3FTOFIwSTkiLCJpc3MiOiJodHRwczpcL1wvY3AudGF2YXNpLmlyIiwiZXhwIjoxNzIwMDE2NzEyLCJhdWQiOiJodHRwczpcL1wvY3AudGF2YXNpLmlyIiwiZGF0YSI6eyJpZCI6NywiZmlyc3RfbmFtZSI6Ilx1MDY0NVx1MDYzNVx1MDYzN1x1MDY0MVx1MDZjYyIsImxhc3RfbmFtZSI6Ilx1MDYyOFx1MDY0N1x1MDYyZlx1MDYyN1x1MDY0Nlx1MDZjYyIsImVtYWlsIjoiamFtdXNiMjc0NzRAZ21haWwuY29tIiwidXNlcm5hbWUiOiJtYmVoZGFuaSIsInVzZXJfbGV2ZWwiOjF9fQ.NhxbjdXMCEb_ninOBKpzbUsaAmxva1zpShuesXrVpEQ' ACCEPT = "application/json" HEADERS = {"Authorization": TOKEN, "Accept": ACCEPT} # اطلاعات اتصال به پایگاه داده db_host = 'DESKTOP-0STSURA\\MUSTAFA' db_name = 'Qavanin' db_driver = 'ODBC Driver 17 for SQL Server' db_trusted_connection = 'yes' # ایجاد یک رشته اتصال (connection string) connection_string = f"mssql+pyodbc://@{db_host}/{db_name}?driver={db_driver}&Trusted_Connection={db_trusted_connection}" # ایجاد یک engine با استفاده از SQLAlchemy engine = create_engine(connection_string) # بررسی اتصال به پایگاه داده try: with engine.connect() as connection: print("اتصال به سرور موفقیتآمیز بود.") except OperationalError as e: print(f"خطا در اتصال به سرور: {e}") raise # اجرای کوئری SQL و خواندن دادهها به یک DataFrame query = """ SELECT (select Top(1) gl5.SECTIONTEXT from lwSectionLog gl5 where gl1.[F_LWSECTIONLOGID_EFFECTED]=gl5.ID ) as effect_prev_text, gl2.[SECTIONTEXT] as effected_text FROM [Qavanin].[dbo].[lwSectionChange] gl1 LEFT JOIN lwSection gl2 on gl2.ID=gl1.[F_LWSECTIONID_EFFECTED] LEFT JOIN lwLaw gl4 on gl1.F_LWLAWID_EFFECTIVE=gl4.ID WHERE gl4.ISLAW = 1 AND gl1.[F_LWSECTIONID_EFFECTED] is not null ORDER BY gl1.[F_LWSECTIONID_EFFECTED], gl4.APPROVEDATE """ df = pd.read_sql(query, engine) # تابع برای حذف کاراکترهای غیرمجاز def remove_illegal_chars(value): if isinstance(value, str): return re.sub(r'[\000-\010]|[\013-\014]|[\016-\037]', '', value) return value df = df.applymap(remove_illegal_chars) df = df[['effect_prev_text', 'effected_text']] num_rows = len(df) num_chunks = 15 chunk_size = num_rows // num_chunks + (1 if num_rows % num_chunks != 0 else 0) BASE_URL = "https://api.tavasi.ir/repo/dataset/multi/add/qasection/keyword" class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, Decimal): return float(obj) return json.JSONEncoder.default(self, obj) def isNeedHtml(html): if "