782 lines
29 KiB
Python
782 lines
29 KiB
Python
#بسم الله
|
||
|
||
from transformers import AutoTokenizer
|
||
from bidi.algorithm import get_display
|
||
from huggingface_hub import HfApi
|
||
import matplotlib.pyplot as plt
|
||
from datetime import date
|
||
import arabic_reshaper
|
||
from fpdf import FPDF
|
||
import random
|
||
import sqlite3
|
||
import string
|
||
import os
|
||
|
||
|
||
|
||
text = 'جمهوری موافقتنامه معاملات قانون بودجه اساسی قضائی بینالمللی تأسیس منطقهای لازمالاجراء دامپروری راهآهن کمیسیونهای جدیدالاحداث مسئول فرآورده زائد اسقاط پنجساله'
|
||
list1 = ["ID","model_id","url","downloads","private","author","tags","tag_dataset",\
|
||
"tag_base_model","tag_license","tag_region","pipeline_tag","Likes","languages",\
|
||
"library","datasets","license","just_persian","deleted","date_added","last_modified"]
|
||
cnt = sqlite3.connect(".\\db\\persian_nlp_model.db")
|
||
c = cnt.cursor()
|
||
today = date.today()
|
||
d1 = today.strftime("%d-%m-%Y")
|
||
|
||
|
||
|
||
|
||
# فقط برای اولین بار که جدول قرار است ساخته شود از این کد ها استفاده شود
|
||
# c.execute("""CREATE TABLE PersianNlp(
|
||
# ID INT PRIMARY KEY ,
|
||
# model_id TEXT ,
|
||
# url TEXT ,
|
||
# downloads INT,
|
||
# private TEXT,
|
||
# author TEXT,
|
||
# tags TEXT,
|
||
# tag_dataset TEXT,
|
||
# tag_base_model TEXT,
|
||
# tag_license TEXT,
|
||
# tag_region TEXT,
|
||
# pipeline_tag TEXT,
|
||
# Likes INT,
|
||
# languages TEXT,
|
||
# library TEXT,
|
||
# datasets TEXT,
|
||
# license TEXT,
|
||
# just_persian TEXT,
|
||
# deleted TEXT,
|
||
# date_added TEXT,
|
||
# last_modified TEXT
|
||
# );""")
|
||
|
||
|
||
|
||
# برای ساخت جدول میزان دانلود ها از این کد استفاده شود
|
||
# c.execute("""CREATE TABLE downloadCountHistory(
|
||
# ID INT PRIMARY KEY ,
|
||
# key_id INT ,
|
||
# downloads INT,
|
||
# date TEXT
|
||
# );""")
|
||
|
||
|
||
|
||
|
||
|
||
# تعریف تسکهای رایج NLP
|
||
nlp_task_list = [
|
||
"text-classification",
|
||
"token-classification",
|
||
"question-answering",
|
||
"summarization",
|
||
"translation",
|
||
"text-generation",
|
||
"fill-mask",
|
||
"zero-shot-classification",
|
||
"feature-extraction",
|
||
"sentence-similarity",
|
||
"text2text-generation",
|
||
"conversational"
|
||
]
|
||
|
||
|
||
|
||
|
||
def generate_random_id(length=10, chars=string.ascii_letters + string.digits):
|
||
"""
|
||
یک آیدی تصادفی با طول مشخص و از کاراکترهای داده شده تولید میکند.
|
||
:param length: طول آیدی (پیشفرض: 10)
|
||
:param chars: رشتهای از کاراکترهای مجاز (پیشفرض: حروف کوچک و بزرگ انگلیسی + ارقام)
|
||
:return: رشته آیدی تصادفی
|
||
"""
|
||
return ''.join(random.choice(chars) for _ in range(length))
|
||
|
||
|
||
|
||
|
||
# 3. تعریف تابع کمکی برای پردازش متن فارسی
|
||
def process_text_for_fpdf(text):
|
||
# مرحله 1: تغییر شکل حروف (اتصال و شکل صحیح)
|
||
reshaped_text = arabic_reshaper.reshape(text)
|
||
# مرحله 2: بازآرایی برای نمایش راست به چپ
|
||
bidi_text = get_display(reshaped_text)
|
||
return bidi_text
|
||
|
||
|
||
|
||
|
||
|
||
def find_growth_slope(number_list):
|
||
|
||
try:
|
||
n = -1
|
||
index = 0
|
||
# if n == -1:
|
||
last_num = number_list[-1]
|
||
first_num = number_list[-1]
|
||
|
||
if number_list[-1] <= number_list[-2]:
|
||
for num in range(len(number_list)-1):
|
||
if number_list[n-num] <= number_list[n-num-1]:
|
||
last_num = number_list[n-num-1]
|
||
index = n-num-1
|
||
else:
|
||
break
|
||
|
||
|
||
if number_list[-1] >= number_list[-2] and last_num >= first_num:
|
||
for num in range(len(number_list)-1):
|
||
if number_list[n-num] >= number_list[n-num-1]:
|
||
last_num = number_list[n-num-1]
|
||
index = n-num-1
|
||
else:
|
||
break
|
||
percentage_growth = ((first_num - last_num) / last_num) * 100
|
||
|
||
except:
|
||
percentage_growth = 0
|
||
|
||
|
||
return percentage_growth ,abs(index)
|
||
|
||
# find_growth_slope([15,16,17,16,10,8])
|
||
# x = find_growth_slope([1,2,3,4,5,6,7,8,10])
|
||
# c = find_growth_slope([8,8,9,10,8,8,7])
|
||
# v = find_growth_slope([8,8,7,5,4,6,7,7])
|
||
# print("finish!")
|
||
|
||
|
||
|
||
|
||
def persian_model_finder(nlp_tasks,idx):
|
||
today = date.today()
|
||
download_date = today.strftime("%d/%m/%Y")
|
||
idX = idx # اخرین آیدی موجود در دیتابیس را وارد میکنیم تا موارد جدید با آیدی های قبلی تداخل نکند
|
||
api = HfApi()
|
||
all_persian_nlp_models_data = []
|
||
seen_model_ids = set() # برای جلوگیری از اضافه شدن مدلهای تکراری
|
||
new_seen_ids = set()
|
||
|
||
|
||
for task in nlp_tasks:
|
||
|
||
models_for_task = api.list_models(
|
||
language="fa",
|
||
task=task,
|
||
sort="downloads",
|
||
direction=-1, # نزولی (از بیشترین دانلود به کمترین)
|
||
limit=None # میتوانید این عدد را تغییر دهید
|
||
)
|
||
|
||
for model_info in models_for_task:
|
||
new_seen_ids.add(model_info.id)
|
||
|
||
|
||
|
||
print("در حال جستجو و استخراج اطلاعات مدلهای NLP فارسی...")
|
||
|
||
# فیلتر کردن و پیمایش روی مدلها
|
||
# برای هر تسک NLP، مدلهای فارسی را جستجو میکنیم.
|
||
# محدودیت 500 مدل برای هر تسک در نظر گرفته شده است تا از دانلود بیش از حد جلوگیری شود.
|
||
# اگر میخواهید همه مدلها را استخراج کنید، ممکن است نیاز به پیجینیشن (pagination) باشد.
|
||
|
||
try:
|
||
allModel = c.execute(f'''SELECT *
|
||
FROM PersianNlp''')
|
||
|
||
idX+=1
|
||
|
||
for model in allModel:
|
||
seen_model_ids.add(model[1])
|
||
idX+=1
|
||
except:
|
||
print("database not find!")
|
||
|
||
for task in nlp_tasks:
|
||
|
||
print(f" جستجو برای تسک: {task} (زبان: فارسی)...")
|
||
# try:
|
||
models_for_task = api.list_models(
|
||
language="fa",
|
||
task=task,
|
||
sort="downloads",
|
||
direction=-1, # نزولی (از بیشترین دانلود به کمترین)
|
||
limit=None # میتوانید این عدد را تغییر دهید
|
||
)
|
||
|
||
for model_info in models_for_task:
|
||
if model_info.id not in seen_model_ids:
|
||
idX+=1
|
||
# try : # اگر از کارت مدل توانست اطلاعات بیشتری به دست بیاورد :
|
||
model_ = api.model_info(model_info.id) # به دست آوردن شناسه مدل
|
||
lastModified = api.model_info(repo_id=model_info.id).last_modified
|
||
card_data_dict = model_.card_data.to_dict() # از روی کارت مدل که شامل اطلاعات مدل میباشد یک دیکشنری میسازیم
|
||
model_data = {
|
||
"model_id": model_info.id,
|
||
"url": f"https://huggingface.co/{model_info.id}",
|
||
"downloads": model_info.downloads,
|
||
"private": model_info.private,
|
||
"author": model_info.author,
|
||
"tags": model_info.tags, # شامل زبانها، تسکها، لایبرریها و...
|
||
"tag_dataset":"-",
|
||
"tag_base_model":"-",
|
||
"tag_license":"-",
|
||
"tag_region":"-",
|
||
"pipeline_tag": model_info.pipeline_tag, # تسک اصلی مدل که توسط هاب تعیین شده
|
||
"Likes":model_info.likes,
|
||
# چهار مورد پایینی از روی دیکشنری کارت مدل خوانده میشود
|
||
"languages":card_data_dict.get('language', 'N/A'), # زبان هایی که پشتیبانی میشود
|
||
"library":card_data_dict.get('library', 'N/A'), # کتابخانه های مورد استفاده
|
||
"datasets":card_data_dict.get('datasets', 'N/A'), # دیتابیس های مورد استفاده
|
||
"license":card_data_dict.get('license', 'N/A'),
|
||
"just_persian" : "False",
|
||
"deleted" : "False",
|
||
"date_added" : f"{download_date}",
|
||
"last_modified" : f"{str(lastModified.strftime("%d-%m-%Y"))}"
|
||
|
||
}
|
||
|
||
if model_data["library"] == 'N/A': # در بعضی موارد کتابخانه به این نام ('library_name') در دیکشنری کارت مدل ذخیره شده
|
||
model_data["library"] = card_data_dict.get('library_name', 'N/A')
|
||
# شرط پایینی ، مواردی که فقط مختص زبان فارسی هستند را در دیکشنری مشخص میکند
|
||
if len(model_data["languages"]) == 2 and "multilingual" in model_data["languages"] or\
|
||
len(model_data["languages"]) == 2 and "persian" in model_data["languages"] or\
|
||
len(model_data["languages"]) == 2 and "farsi" in model_data["languages"] or\
|
||
len(model_data["languages"]) == 2 and "fas" in model_data["languages"] or\
|
||
len(model_data["languages"]) == 2 and model_data["languages"]=="fa" or\
|
||
model_data["languages"] == "persian" or\
|
||
model_data["languages"] == "farsi" or\
|
||
model_data["languages"] == "fas" or\
|
||
model_data["languages"] == "pes" or\
|
||
len(model_data["languages"]) == 1 :
|
||
model_data["just_persian"] = "True"
|
||
|
||
for value in model_data["tags"]:
|
||
|
||
if "dataset:" in value :
|
||
if type(model_data["tag_dataset"]) == type(""):
|
||
model_data["tag_dataset"] = list(model_data["tag_dataset"])
|
||
model_data["tag_dataset"].pop(0)
|
||
model_data["tag_dataset"].append(f"{str(value).replace("dataset:","")}")
|
||
|
||
if "base_model:" in value :
|
||
if type(model_data["tag_base_model"]) == type(""):
|
||
model_data["tag_base_model"] = list(model_data["tag_base_model"])
|
||
model_data["tag_base_model"].pop(0)
|
||
model_data["tag_base_model"].append(f"{str(value).replace("base_model:","")}")
|
||
|
||
if "region:" in value :
|
||
model_data["tag_region"]=f"{str(value).replace("region:","")}"
|
||
|
||
if "license:" in value :
|
||
model_data["tag_license"]=f"{str(value).replace("license:","")}"
|
||
|
||
|
||
# all_persian_nlp_models_data.append(model_data)
|
||
c.execute(f"""INSERT INTO PersianNlp (ID,model_id,url,downloads,private,author,tags,tag_dataset,tag_base_model,tag_license,tag_region,pipeline_tag,Likes,languages,library,datasets,license,just_persian,deleted,date_added,last_modified)
|
||
VALUES ({idX},"{model_data["model_id"]}","{model_data["url"]}",{model_data["downloads"]},"{model_data["private"]}","{model_data["author"]}","{model_data["tags"]}","{model_data["tag_dataset"]}","{model_data["tag_base_model"]}","{model_data["tag_license"]}","{model_data["tag_region"]}","{model_data["pipeline_tag"]}",{model_data["Likes"]},"{model_data["languages"]}","{model_data["library"]}","{model_data["datasets"]}","{model_data["license"]}","{model_data["just_persian"]}","{model_data["deleted"]}","{model_data["date_added"]}","{model_data['last_modified']}");""")
|
||
cnt.commit()
|
||
seen_model_ids.add(model_info.id)
|
||
|
||
|
||
print(f"\nتعداد کل مدلهای NLP فارسی منحصربهفرد یافت شده: {len(seen_model_ids)}")
|
||
|
||
for modelID in seen_model_ids:
|
||
if modelID not in new_seen_ids:
|
||
|
||
c.execute(f'''UPDATE PersianNlp
|
||
SET deleted = 'True'
|
||
WHERE model_id = '{modelID}';''')
|
||
|
||
cnt.commit()
|
||
|
||
#اول لیست تسک ها را میدهیم برای جست و جو ، و بعد اولین آیدی که در تیبل مدلها در دیتابیس موجود است
|
||
# persian_model_finder(nlp_task_list,6600)
|
||
|
||
|
||
|
||
def search(name,search_by):
|
||
|
||
X = "------------------------------------------------------------------------------------\n+-+-+-+- FOUND MODEL +-+-+-+-\n------------------------------------------------------------------------------------\n\n"
|
||
n=0
|
||
|
||
if search_by == "name":
|
||
|
||
model = c.execute(f'''SELECT *
|
||
FROM PersianNlp
|
||
WHERE model_id="{name}"''')
|
||
for x in model:
|
||
for y in x :
|
||
X+= f"{list1[n]} : {y}\n-----------------------------------------------------------------\n"
|
||
n+=1
|
||
X+="\n\n"
|
||
n = 0
|
||
print(X)
|
||
|
||
if search_by == "task":
|
||
|
||
model = c.execute(f'''SELECT *
|
||
FROM PersianNlp
|
||
WHERE pipeline_tag="{name}"''')
|
||
|
||
for x in model:
|
||
for y in x :
|
||
X+= f"{list1[n]} : {y}\n-----------------------------------------------------------------\n"
|
||
n+=1
|
||
X+="\n\n"
|
||
n = 0
|
||
print(X)
|
||
|
||
# search("text-ranking","task")
|
||
|
||
|
||
|
||
|
||
def pdf_saver(name):
|
||
|
||
|
||
X = "------------------------------------------------------------------------------------\n\
|
||
+-+-+-+- FOUND MODEL +-+-+-+-\
|
||
\n------------------------------------------------------------------------------------\n\n"
|
||
model = c.execute(f'''SELECT *
|
||
FROM PersianNlp
|
||
WHERE model_id="{name}"''')
|
||
n=0
|
||
for x in model:
|
||
for y in x :
|
||
X+= f"{list1[n]} : {y}\n-----------------------------------------------------------------\n"
|
||
n+=1
|
||
print(X)
|
||
pdf = FPDF()
|
||
pdf.add_page()
|
||
pdf.set_font("Arial", size=12)
|
||
pdf.multi_cell(0, 10, X)
|
||
pdf.output("found_model.pdf")
|
||
print("PDF generated successfully!")
|
||
|
||
# pdf_saver("Alibaba-NLP/gte-multilingual-reranker-base")
|
||
|
||
|
||
|
||
|
||
# برای پیدا کردن مدل ها بر اساس تاریخ ، برای مثال سه ماه پیش
|
||
# راهنمایی بیشتر در انتها
|
||
def find_by_date(month_later , year_later):
|
||
|
||
today = date.today()
|
||
date_year = today.strftime("%Y")
|
||
date_month = today.strftime("%m")
|
||
month = int(date_month)
|
||
year = int(date_year)
|
||
allModel = c.execute(f'''SELECT *
|
||
FROM PersianNlp''')
|
||
n=0
|
||
for model in allModel:
|
||
if int(model[19].split("/")[1]) >= month-month_later and int(model[19].split("/")[2]) >= year-year_later :
|
||
X = ""
|
||
for y in model :
|
||
X+= f"{list1[n]} : {y}\n-----------------------------------------------------------------\n"
|
||
n+=1
|
||
n = 0
|
||
print(X)
|
||
|
||
# برای مثال میخواهیم مدل های یک سال و شش ماه قبل تا الان را ببینیم
|
||
# اول ماه و بعد سال را وارد میکنیم به این صورت :
|
||
# find_by_date(6,1)
|
||
|
||
# یا میخواهیم مدل های سه ماه گذشته را ببینیم :
|
||
# find_by_date(3,0)
|
||
|
||
|
||
|
||
|
||
def add_download_count():
|
||
|
||
count = 1
|
||
api = HfApi()
|
||
allModel = c.execute(f'''SELECT *
|
||
FROM PersianNlp''')
|
||
all_model_id = []
|
||
for model in allModel:
|
||
all_model_id.append([model[0],model[1]])
|
||
|
||
for id_ in all_model_id:
|
||
# try:
|
||
print(count)
|
||
count+=1
|
||
id_12_digits = generate_random_id(length=12, chars=string.digits)
|
||
model_details = api.model_info(repo_id=id_[1])
|
||
c.execute(f"""INSERT INTO downloadCountHistory(ID,key_id,downloads,date)
|
||
VALUES ({id_12_digits},"{int(id_[0])}","{int(model_details.downloads)}","{str(d1)}");""")
|
||
# c.execute(f"""INSERT INTO downloadCountHistory(ID,key_id,downloads,date)
|
||
# VALUES ({id_12_digits},"{int(id_[0])}","{int(model_details.downloads)+1}","22-08-2025");""")
|
||
cnt.commit()
|
||
# except:
|
||
# print("Error!!")
|
||
|
||
# add_download_count()
|
||
|
||
|
||
|
||
|
||
def singleModelInfo( model_id_ ,month_later = 6 , year_later = 0 ):
|
||
|
||
|
||
today = date.today()
|
||
date_year = today.strftime("%Y")
|
||
date_month = today.strftime("%m")
|
||
month = int(date_month)
|
||
year = int(date_year)
|
||
model_info = c.execute(f'''SELECT *
|
||
FROM PersianNlp
|
||
WHERE model_id = "{model_id_}"''')
|
||
|
||
for model in model_info:
|
||
m = model
|
||
model_id = m[0]
|
||
last_modyfied = m[20]
|
||
Likes = m[12]
|
||
task = m[11]
|
||
|
||
|
||
downloadCountHistory = c.execute(f'''SELECT *
|
||
FROM downloadCountHistory
|
||
WHERE key_id = {model_id}''')
|
||
|
||
|
||
n=0
|
||
downloads_list = []
|
||
download_count_list = []
|
||
download_date_list = []
|
||
for model in downloadCountHistory :
|
||
if int(model[3].split("-")[1]) >= month-month_later and int(model[3].split("-")[2]) >= year-year_later :
|
||
|
||
download_count_list.append(model[2])
|
||
download_date_list.append(model[3])
|
||
downloads_list.append([model[3],model[2]])
|
||
|
||
growth_slope , lenM = find_growth_slope(download_count_list)
|
||
|
||
plt.plot(download_date_list,download_count_list, marker='o', linestyle='-')
|
||
plt.savefig('Download_rate_chart.png', dpi=300)
|
||
|
||
pdf = FPDF()
|
||
pdf.add_page()
|
||
pdf.add_font('B Nazanin', '', '.\\fonts\\B Nazanin.ttf', uni=True)
|
||
pdf.set_font("Arial", size=12)
|
||
|
||
# اضافه کردن متن
|
||
pdf.multi_cell(0, 10, f"Model -<< https://huggingface.co/{model_id_} >>- Information :")
|
||
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 5, f"Download rate chart : ")
|
||
pdf.ln()
|
||
# اضافه کردن عکس
|
||
pdf.image("Download_rate_chart.png", x=10, y=pdf.get_y() + 5, w=70)
|
||
pdf.ln(20) # یک خط فاصله بعد از عکس
|
||
# pdf.cell(0, 10, " Download history chart ", ln=True, align='C')
|
||
|
||
# اضافه کردن جدول
|
||
pdf.ln(50)
|
||
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 5, f"Download rate table : ")
|
||
pdf.ln()
|
||
# سربرگ جدول
|
||
for header in ['Date', 'Download-rate']:
|
||
pdf.cell(40, 10, header, 1, 0, 'C')
|
||
pdf.ln()
|
||
pdf.set_font("Arial", size=10)
|
||
# ردیفهای داده
|
||
for row in downloads_list:
|
||
for item in row:
|
||
pdf.cell(40, 10, str(item), 1, 0, 'C')
|
||
pdf.ln()
|
||
|
||
pdf.ln()
|
||
|
||
tokenizer = AutoTokenizer.from_pretrained(model_id_)
|
||
tokens = tokenizer.tokenize(text)
|
||
print(tokens)
|
||
print(f'len(tokens): {len(tokens)}')
|
||
results = {'model': model_id_, 'len': len(tokens), 'tokens': tokens }
|
||
# results = str(results).encode('latin-1', 'replace').decode('latin-1')
|
||
# results = str(results).replace("▁","")
|
||
pdf.multi_cell(0, 3, f"Likes : {str(Likes)}")
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 3, f"last_modyfied : {str(last_modyfied)}")
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 3, f"Growth slope : {round(growth_slope, 2)} in {lenM} month .")
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 7, f"task : {task}")
|
||
pdf.ln()
|
||
pdf.set_font("Arial", size=16)
|
||
pdf.multi_cell(0, 5, f"Tokenize info : ")
|
||
pdf.ln()
|
||
pdf.set_font("Arial", size=10)
|
||
pdf.multi_cell(0, 3, f"len : {str(results['len'])}")
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 3, "tokenized list : ")
|
||
pdf.ln()
|
||
txt = ''
|
||
for token in results["tokens"]:
|
||
txt += f' [ {token} ] '
|
||
pdf.set_font("B Nazanin", size=10)
|
||
pdf.multi_cell(0, 5, f"{process_text_for_fpdf(txt)}",align='R')
|
||
pdf.output("singleModelInfo.pdf")
|
||
print("فایل PDF با FPDF ایجاد شد.")
|
||
os.remove("Download_rate_chart.png")
|
||
|
||
# singleModelInfo("amberoad/bert-multilingual-passage-reranking-msmarco")
|
||
|
||
|
||
|
||
|
||
|
||
def MultiModelInfo(limit_number=10):
|
||
|
||
|
||
today = date.today()
|
||
date_year = today.strftime("%Y")
|
||
date_month = today.strftime("%m")
|
||
month = int(date_month)
|
||
year = int(date_year)
|
||
Models_added_this_month=[]
|
||
Models_deleted=[]
|
||
all_id_download = []
|
||
all_download = []
|
||
growth_slope_list_info = []
|
||
growth_slope_list = []
|
||
model_info = c.execute(f'''SELECT *
|
||
FROM PersianNlp''')
|
||
|
||
for model in model_info:
|
||
|
||
if int(model[19].split("/")[1]) == month and int(model[19].split("/")[2]) == year :
|
||
Models_added_this_month.append(model[1])
|
||
if str(model[18]) == "True":
|
||
Models_deleted.append(model[1])
|
||
|
||
all_id_download.append([model[0],model[1],model[3],model[11],model[20]])
|
||
|
||
|
||
|
||
|
||
listX=[]
|
||
for model in all_id_download:
|
||
|
||
downloadCountHistory = c.execute(f'''SELECT *
|
||
FROM downloadCountHistory
|
||
WHERE key_id = {model[0]}''')
|
||
|
||
for models in downloadCountHistory :
|
||
if int(models[3].split("-")[1]) == month and int(models[3].split("-")[2]) == year :
|
||
model[2]=models[2]
|
||
listX.append(model)
|
||
all_id_download = listX
|
||
|
||
|
||
for model in all_id_download:
|
||
all_download.append(model[2])
|
||
|
||
all_download.sort(reverse=True)
|
||
maximum_download_list = all_download[0:limit_number]
|
||
maximum_download_info_list = []
|
||
n=0
|
||
for DCount in maximum_download_list:
|
||
|
||
for model in all_id_download:
|
||
if DCount == model[2]:
|
||
if n < limit_number :
|
||
maximum_download_info_list.append(model)
|
||
n+=1
|
||
|
||
|
||
|
||
# پیدا کردن بیشترین شیب دانلود ها در چند ماه :
|
||
|
||
for model in all_id_download:
|
||
growth_slope = []
|
||
DHList =c.execute(f'''SELECT *
|
||
FROM "downloadCountHistory"
|
||
WHERE key_id = {model[0]}''')
|
||
|
||
for data in DHList:
|
||
growth_slope.append(data[2])
|
||
|
||
growth_slopee , lenM = find_growth_slope(growth_slope) # به دست آوردن درصد رشد هر مدل
|
||
growth_slope_list.append(growth_slopee)
|
||
growth_slope_list_info.append([model[1],growth_slopee,lenM])
|
||
|
||
|
||
|
||
growth_slope_list.sort(reverse=True)
|
||
maximum_growth_slope_list = growth_slope_list[0:limit_number]
|
||
maximum_growth_slope_info_list = []
|
||
n=0
|
||
for DCount in maximum_growth_slope_list:
|
||
|
||
for model in growth_slope_list_info:
|
||
if DCount == model[1]:
|
||
if n < limit_number :
|
||
maximum_growth_slope_info_list.append(model)
|
||
n+=1
|
||
|
||
# پایان پیدا کردن شیب
|
||
|
||
|
||
|
||
model_id_list = []
|
||
model_download_count_list = []
|
||
for info in maximum_download_info_list:
|
||
|
||
model_download_count_list.append(info[2])
|
||
model_id_list.append(str(info[1]))
|
||
|
||
listA = []
|
||
for x in model_download_count_list:
|
||
listA.append(int(x)/1000000)
|
||
model_download_count_list = listA
|
||
|
||
plt.plot(model_id_list,model_download_count_list,marker='o', linestyle='-')
|
||
plt.xticks(rotation=30, ha='right', fontsize=10)
|
||
plt.xlabel("Model Name", color='blue')
|
||
plt.ylabel("Download Count (milion)" ,color='red')
|
||
plt.tight_layout()
|
||
plt.savefig(f'Top_{limit_number}_download_rate.png', dpi=300)
|
||
pdf = FPDF()
|
||
pdf.add_page()
|
||
pdf.add_font('B Nazanin', '', '.\\fonts\\B Nazanin.ttf', uni=True)
|
||
pdf.set_font("Arial", size=16)
|
||
# اضافه کردن متن
|
||
pdf.multi_cell(0, 10, f"Top {limit_number} Model Information : \n ----------------------------------------------------------------")
|
||
pdf.set_font("Arial", size=12)
|
||
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 5, f" Download Rate Chart :")
|
||
pdf.ln()
|
||
|
||
# اضافه کردن عکس
|
||
pdf.image(f'Top_{limit_number}_download_rate.png', x=10, y=pdf.get_y() + 5, w=70)
|
||
pdf.ln(20) # یک خط فاصله بعد از عکس
|
||
# اضافه کردن جدول
|
||
pdf.ln(50)
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 5, f" Download Rate Table :")
|
||
pdf.ln()
|
||
# سربرگ جدول
|
||
for header in ['Count', 'Model_name','Download-rate','Task','Last_modified']:
|
||
if header == 'Model_name':
|
||
pdf.cell(80, 10, header, 1, 0, 'C')
|
||
else:
|
||
pdf.cell(40, 10, header, 1, 0, 'C')
|
||
pdf.ln()
|
||
|
||
# ردیفهای داده
|
||
x=1
|
||
for row in maximum_download_info_list:
|
||
n=0
|
||
row[0] = x
|
||
x+=1
|
||
for item in row:
|
||
if n == 1 :
|
||
pdf.set_font("Arial", size=6)
|
||
pdf.cell(80, 10, str(item), 1, 0, 'C')
|
||
n+=1
|
||
else:
|
||
pdf.set_font("Arial", size=10)
|
||
pdf.cell(40, 10, str(item), 1, 0, 'C')
|
||
n+=1
|
||
pdf.ln()
|
||
|
||
|
||
|
||
|
||
|
||
model_name_list = []
|
||
model_growth = []
|
||
for info in maximum_growth_slope_info_list:
|
||
|
||
model_name_list.append(info[0])
|
||
model_growth.append(round(info[1], 2) )
|
||
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 5, f" Download Growth Chart :")
|
||
pdf.ln()
|
||
plt.figure(figsize=(8, 6)) # تنظیم اندازه کلی نمودار (عرض و ارتفاع بر حسب اینچ)
|
||
plt.bar(model_name_list,model_growth,color='lightgreen', width=0.4)
|
||
plt.xticks(rotation=30, ha='right', fontsize=10)
|
||
plt.xlabel("Model Name", color='blue')
|
||
plt.ylabel("Model Growth (%)" ,color='red')
|
||
plt.tight_layout()
|
||
plt.savefig(f'Top_{limit_number}_growth_rate.png', dpi=300)
|
||
pdf.image(f'Top_{limit_number}_growth_rate.png', x=10, y=pdf.get_y() + 5, w=70)
|
||
pdf.ln(80) # یک خط فاصله بعد از عکس
|
||
|
||
pdf.ln()
|
||
pdf.multi_cell(0, 5, f" Download Growth Table :")
|
||
pdf.ln()
|
||
# سربرگ جدول
|
||
for header in [ 'Model_name','Growth-rate','Length-month']:
|
||
if header == 'Model_name':
|
||
pdf.cell(80, 10, header, 1, 0, 'C')
|
||
else:
|
||
pdf.cell(40, 10, header, 1, 0, 'C')
|
||
pdf.ln()
|
||
|
||
# ردیفهای داده
|
||
x=1
|
||
for row in maximum_growth_slope_info_list:
|
||
n=0
|
||
x+=1
|
||
for item in row:
|
||
if n == 0 :
|
||
pdf.set_font("Arial", size=6)
|
||
pdf.cell(80, 10, str(item), 1, 0, 'C')
|
||
n+=1
|
||
else:
|
||
pdf.set_font("Arial", size=10)
|
||
pdf.cell(40, 10, str(item), 1, 0, 'C')
|
||
n+=1
|
||
pdf.ln()
|
||
|
||
|
||
|
||
|
||
pdf.ln()
|
||
pdf.set_font("Arial", size=14)
|
||
pdf.multi_cell(0, 5, f"Models added this month :")
|
||
pdf.set_font("Arial", size=6)
|
||
pdf.ln()
|
||
|
||
txt=''
|
||
n=1
|
||
for model_name in Models_added_this_month:
|
||
txt +=f"{n} --> {model_name}\n"
|
||
n+=1
|
||
pdf.multi_cell(0, 5, f"{txt}")
|
||
|
||
pdf.ln()
|
||
pdf.set_font("Arial", size=14)
|
||
pdf.multi_cell(0, 5, f"Models deleted :")
|
||
pdf.set_font("Arial", size=6)
|
||
pdf.ln()
|
||
txt=''
|
||
n=1
|
||
for model_name in Models_deleted:
|
||
txt +=f"{n} --> {model_name}\n"
|
||
n+=1
|
||
pdf.multi_cell(0, 5, f"{txt}")
|
||
|
||
pdf.output("MultiModelInfo.pdf")
|
||
os.remove(f'Top_{limit_number}_download_rate.png')
|
||
os.remove(f'Top_{limit_number}_growth_rate.png')
|
||
|
||
# MultiModelInfo(5)
|
||
|
||
|
||
|
||
|