Compare commits

..

No commits in common. "792e7b6ba1c5274e4eb22e1ac921f648f6ada5d9" and "a61e9d68cae1d616b20de10116bbf25606423ab6" have entirely different histories.

22 changed files with 3487 additions and 20142 deletions

View File

@ -1,32 +1,28 @@
from transformers import AutoTokenizer
import json
file = open('./data/models_ner_info.txt', 'r')
models = file.readlines()
file.close()
file = open('./data/models_info.json', 'r')
models = json.load(file)
# Strips the newline character
text = 'جمهوری موافقت‌نامه معاملات قانون بودجه اساسی قضائی بین‌المللی تأسیس منطقه‌ای لازم‌الاجراء دامپروری راه‌آهن کمیسیون‌های جدیدالاحداث مسئول فرآورده زائد اسقاط پنجساله'
results = []
for line in models:
model_checkpoint = line.split(' -- ')[1].rstrip('\n')
# model_checkpoint = line['model_name']
model_checkpoint = line['model_name']
try:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
print(model_checkpoint)
tokens = tokenizer.tokenize(text)
print(tokens)
print(f'len(tokens): {len(tokens)}')
results.append({ 'model': model_checkpoint, 'len': len(tokens), 'tokens': tokens})
results.append({ 'model': model_checkpoint, 'tokens': tokens})
if len(tokens) == 2 :
file.write( '{'+model_checkpoint + " : [ " + ','.join(tokens) + ' ] }\n' )
#result = tokenizer(text)
#print(result)
#print(tokenizer.decode(result['input_ids']))
except:
error = "An exception occurred in tokenizer : " + model_checkpoint
#file.write( error + '\n' )
print(error)
with open('./data/models_tokenizer_info.json', 'w', encoding='utf-8') as file:
jsondata = json.dumps(results, ensure_ascii=False, indent=2)
file.write(jsondata)
print('finished!')
#tokenizer.save_pretrained(model_checkpoint+'-tokenizer')
file.close()

View File

@ -1,8 +0,0 @@
HooshvareLab/bert-base-parsbert-ner-uncased
HooshvareLab/bert-fa-base-uncased-ner-peyma
HooshvareLab/bert-base-parsbert-armanner-uncased
HooshvareLab/bert-fa-base-uncased-ner-arman
HooshvareLab/bert-base-parsbert-peymaner-uncased
nicolauduran45/affilgood-ner-multilingual-v2
Amirmerfan/bert-base-uncased-persian-ner-50k-base
AliFartout/Roberta-fa-en-ner

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,258 +0,0 @@
[
{
"model_name": "HooshvareLab/bert-base-parsbert-ner-uncased",
"task": "token-classification",
"last_modified": "2021-5-18",
"downloads": 4060,
"likes": 5,
"language": "fa"
},
{
"model_name": "jplu/tf-xlm-r-ner-40-lang",
"task": "token-classification",
"last_modified": "2022-10-6",
"downloads": 874,
"likes": 27,
"language": "fa"
},
{
"model_name": "nicolauduran45/affilgood-ner-multilingual-v2",
"task": "token-classification",
"last_modified": "2025-4-16",
"downloads": 668,
"likes": 0,
"language": "fa"
},
{
"model_name": "HooshvareLab/bert-fa-zwnj-base-ner",
"task": "token-classification",
"last_modified": "2021-5-18",
"downloads": 407,
"likes": 4,
"language": "fa"
},
{
"model_name": "igorsterner/xlmr-multilingual-sentence-segmentation",
"task": "token-classification",
"last_modified": "2024-10-5",
"downloads": 256,
"likes": 4,
"language": "fa"
},
{
"model_name": "hamedkhaledi/persian-flair-ner",
"task": "token-classification",
"last_modified": "2022-4-3",
"downloads": 230,
"likes": 1,
"language": "fa"
},
{
"model_name": "HooshvareLab/bert-fa-base-uncased-ner-peyma",
"task": "token-classification",
"last_modified": "2021-5-18",
"downloads": 138,
"likes": 8,
"language": "fa"
},
{
"model_name": "mradermacher/persian-question-generator-GGUF",
"task": "Unknown",
"last_modified": "2024-12-8",
"downloads": 110,
"likes": 0,
"language": "fa"
},
{
"model_name": "HooshvareLab/bert-base-parsbert-armanner-uncased",
"task": "token-classification",
"last_modified": "2021-5-18",
"downloads": 103,
"likes": 3,
"language": "fa"
},
{
"model_name": "HooshvareLab/distilbert-fa-zwnj-base-ner",
"task": "token-classification",
"last_modified": "2021-3-21",
"downloads": 101,
"likes": 4,
"language": "fa"
},
{
"model_name": "HooshvareLab/roberta-fa-zwnj-base-ner",
"task": "token-classification",
"last_modified": "2021-5-20",
"downloads": 92,
"likes": 1,
"language": "fa"
},
{
"model_name": "HooshvareLab/bert-fa-base-uncased-ner-arman",
"task": "token-classification",
"last_modified": "2021-5-18",
"downloads": 71,
"likes": 0,
"language": "fa"
},
{
"model_name": "HooshvareLab/albert-fa-zwnj-base-v2-ner",
"task": "token-classification",
"last_modified": "2021-3-21",
"downloads": 58,
"likes": 0,
"language": "fa"
},
{
"model_name": "artificial-nerds/pmnet",
"task": "sentence-similarity",
"last_modified": "2024-10-29",
"downloads": 33,
"likes": 0,
"language": "fa"
},
{
"model_name": "HooshvareLab/bert-base-parsbert-peymaner-uncased",
"task": "token-classification",
"last_modified": "2021-5-18",
"downloads": 28,
"likes": 0,
"language": "fa"
},
{
"model_name": "Erfan/mT5-base_Farsi_Title_Generator",
"task": "text-generation",
"last_modified": "2022-1-30",
"downloads": 24,
"likes": 2,
"language": "fa"
},
{
"model_name": "myrkur/persian-question-generator",
"task": "summarization",
"last_modified": "2024-12-10",
"downloads": 24,
"likes": 2,
"language": "fa"
},
{
"model_name": "m3hrdadfi/albert-fa-base-v2-ner-arman",
"task": "token-classification",
"last_modified": "2020-12-26",
"downloads": 21,
"likes": 3,
"language": "fa"
},
{
"model_name": "hezarai/bert-fa-ner-arman",
"task": "token-classification",
"last_modified": "2024-11-14",
"downloads": 17,
"likes": 0,
"language": "fa"
},
{
"model_name": "m3hrdadfi/albert-fa-base-v2-ner-peyma",
"task": "token-classification",
"last_modified": "2020-12-26",
"downloads": 16,
"likes": 1,
"language": "fa"
},
{
"model_name": "Amirmerfan/bert-base-uncased-persian-ner-50k-base",
"task": "token-classification",
"last_modified": "2025-3-18",
"downloads": 16,
"likes": 0,
"language": "fa"
},
{
"model_name": "tum-nlp/neural-news-generator-bloomz-7b1-fa",
"task": "text-generation",
"last_modified": "2024-8-15",
"downloads": 14,
"likes": 0,
"language": "fa"
},
{
"model_name": "dadashzadeh/test-trainer-persian",
"task": "text-classification",
"last_modified": "2023-11-13",
"downloads": 11,
"likes": 0,
"language": "fa"
},
{
"model_name": "tum-nlp/neural-news-generator-llama-7b-fa",
"task": "text-generation",
"last_modified": "2024-8-15",
"downloads": 11,
"likes": 0,
"language": "fa"
},
{
"model_name": "AliFartout/Roberta-fa-en-ner",
"task": "token-classification",
"last_modified": "2023-9-1",
"downloads": 9,
"likes": 0,
"language": "fa"
},
{
"model_name": "myrkur/persian-title-generator",
"task": "summarization",
"last_modified": "2024-6-13",
"downloads": 9,
"likes": 2,
"language": "fa"
},
{
"model_name": "NLPclass/mt5-title-generation",
"task": "text-generation",
"last_modified": "2024-7-24",
"downloads": 9,
"likes": 0,
"language": "fa"
},
{
"model_name": "SinaRp/Question_generator_persian",
"task": "text-generation",
"last_modified": "2024-11-8",
"downloads": 9,
"likes": 1,
"language": "fa"
},
{
"model_name": "mansoorhamidzadeh/mt5-title-generation",
"task": "text-generation",
"last_modified": "2024-7-24",
"downloads": 8,
"likes": 0,
"language": "fa"
},
{
"model_name": "dz5035/bicleaner-ai-full-large-de-xx",
"task": "Unknown",
"last_modified": "2024-6-3",
"downloads": 4,
"likes": 0,
"language": "fa"
},
{
"model_name": "m0javad/conversational_Persian_paraphrase_generation",
"task": "Unknown",
"last_modified": "2024-2-17",
"downloads": 0,
"likes": 0,
"language": "fa"
},
{
"model_name": "PranavKeshav/event-planner-gemma-4bit",
"task": "text-generation",
"last_modified": "2025-5-17",
"downloads": 0,
"likes": 0,
"language": "fa"
}
]

View File

@ -1,17 +0,0 @@
4060 -- HooshvareLab/bert-base-parsbert-ner-uncased
874 -- jplu/tf-xlm-r-ner-40-lang
668 -- nicolauduran45/affilgood-ner-multilingual-v2
407 -- HooshvareLab/bert-fa-zwnj-base-ner
230 -- hamedkhaledi/persian-flair-ner
138 -- HooshvareLab/bert-fa-base-uncased-ner-peyma
103 -- HooshvareLab/bert-base-parsbert-armanner-uncased
101 -- HooshvareLab/distilbert-fa-zwnj-base-ner
92 -- HooshvareLab/roberta-fa-zwnj-base-ner
71 -- HooshvareLab/bert-fa-base-uncased-ner-arman
58 -- HooshvareLab/albert-fa-zwnj-base-v2-ner
28 -- HooshvareLab/bert-base-parsbert-peymaner-uncased
21 -- m3hrdadfi/albert-fa-base-v2-ner-arman
17 -- hezarai/bert-fa-ner-arman
16 -- m3hrdadfi/albert-fa-base-v2-ner-peyma
16 -- Amirmerfan/bert-base-uncased-persian-ner-50k-base
9 -- AliFartout/Roberta-fa-en-ner

View File

@ -1,465 +0,0 @@
[
{
"model": "HooshvareLab/bert-base-parsbert-ner-uncased",
"len": 20,
"tokens": [
"جمهوری",
"موافقتنامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"قضايی",
"بینالمللی",
"تاسیس",
"منطقهای",
"لازمالاجراء",
"دامپروری",
"راهاهن",
"کمیسیونهای",
"جدیدالاحداث",
"مسيول",
"فراورده",
"زايد",
"اسقاط",
"پنجساله"
]
},
{
"model": "nicolauduran45/affilgood-ner-multilingual-v2",
"len": 39,
"tokens": [
"▁جمهوری",
"▁موافقت",
"▁نامه",
"▁معاملات",
"▁قانون",
"▁بودجه",
"▁اساسی",
"▁قضا",
"ئی",
"▁بین",
"▁المللی",
"▁تأسیس",
"▁منطقه",
"▁ای",
"▁لازم",
"▁الاج",
"راء",
"▁دام",
"پر",
"وری",
"▁راه",
"▁آهن",
"▁کمیسیون",
"▁های",
"▁جدید",
"الا",
"حد",
"اث",
"▁مسئول",
"▁فر",
"آور",
"ده",
"▁زائد",
"▁اس",
"ق",
"اط",
"▁پنج",
"سال",
"ه"
]
},
{
"model": "HooshvareLab/bert-fa-zwnj-base-ner",
"len": 37,
"tokens": [
"جمهوری",
"موافقت",
"[ZWNJ]",
"نامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"[UNK]",
"بین",
"[ZWNJ]",
"المللی",
"[UNK]",
"منطقه",
"[ZWNJ]",
"ای",
"لازم",
"[ZWNJ]",
"الاجرا",
"##ء",
"دامپروری",
"راه",
"[ZWNJ]",
"آ",
"##هن",
"کمیسیون",
"[ZWNJ]",
"های",
"جدیدا",
"##لاح",
"##داث",
"[UNK]",
"[UNK]",
"[UNK]",
"اسقاط",
"پنج",
"##ساله"
]
},
{
"model": "HooshvareLab/bert-fa-base-uncased-ner-peyma",
"len": 20,
"tokens": [
"جمهوری",
"موافقتنامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"قضايی",
"بینالمللی",
"تاسیس",
"منطقهای",
"لازمالاجراء",
"دامپروری",
"راهاهن",
"کمیسیونهای",
"جدیدالاحداث",
"مسيول",
"فراورده",
"زايد",
"اسقاط",
"پنجساله"
]
},
{
"model": "HooshvareLab/bert-base-parsbert-armanner-uncased",
"len": 20,
"tokens": [
"جمهوری",
"موافقتنامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"قضايی",
"بینالمللی",
"تاسیس",
"منطقهای",
"لازمالاجراء",
"دامپروری",
"راهاهن",
"کمیسیونهای",
"جدیدالاحداث",
"مسيول",
"فراورده",
"زايد",
"اسقاط",
"پنجساله"
]
},
{
"model": "HooshvareLab/distilbert-fa-zwnj-base-ner",
"len": 37,
"tokens": [
"جمهوری",
"موافقت",
"[ZWNJ]",
"نامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"[UNK]",
"بین",
"[ZWNJ]",
"المللی",
"[UNK]",
"منطقه",
"[ZWNJ]",
"ای",
"لازم",
"[ZWNJ]",
"الاجرا",
"##ء",
"دامپروری",
"راه",
"[ZWNJ]",
"آ",
"##هن",
"کمیسیون",
"[ZWNJ]",
"های",
"جدیدا",
"##لاح",
"##داث",
"[UNK]",
"[UNK]",
"[UNK]",
"اسقاط",
"پنج",
"##ساله"
]
},
{
"model": "HooshvareLab/roberta-fa-zwnj-base-ner",
"len": 37,
"tokens": [
"ĠجÙħÙĩÙĪØ±ÛĮ",
"ĠÙħÙĪØ§ÙģÙĤت",
"âĢĮ",
"ÙĨاÙħÙĩ",
"ĠÙħعاÙħÙĦات",
"ĠÙĤاÙĨÙĪÙĨ",
"ĠبÙĪØ¯Ø¬Ùĩ",
"ĠاساسÛĮ",
"ĠÙĤضائÛĮ",
"ĠبÛĮÙĨ",
"âĢĮ",
"اÙĦÙħÙĦÙĦÛĮ",
"ĠتأسÛĮس",
"ĠÙħÙĨØ·ÙĤÙĩ",
"âĢĮ",
"اÛĮ",
"ĠÙĦازÙħ",
"âĢĮ",
"اÙĦاج",
"راء",
"ĠداÙħپرÙĪØ±ÛĮ",
"ĠراÙĩ",
"âĢĮ",
"Ø¢ÙĩÙĨ",
"ĠÚ©ÙħÛĮسÛĮÙĪÙĨ",
"âĢĮ",
"ÙĩاÛĮ",
"ĠجدÛĮد",
"اÙĦ",
"اØŃ",
"داث",
"ĠÙħسئÙĪÙĦ",
"ĠÙģØ±Ø¢ÙĪØ±Ø¯Ùĩ",
"Ġزائد",
"ĠاسÙĤاط",
"ĠÙ¾ÙĨج",
"ساÙĦÙĩ"
]
},
{
"model": "HooshvareLab/bert-fa-base-uncased-ner-arman",
"len": 20,
"tokens": [
"جمهوری",
"موافقتنامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"قضايی",
"بینالمللی",
"تاسیس",
"منطقهای",
"لازمالاجراء",
"دامپروری",
"راهاهن",
"کمیسیونهای",
"جدیدالاحداث",
"مسيول",
"فراورده",
"زايد",
"اسقاط",
"پنجساله"
]
},
{
"model": "HooshvareLab/albert-fa-zwnj-base-v2-ner",
"len": 46,
"tokens": [
"▁جمهوری",
"▁موافقت",
"[ZWNJ]",
"نامه",
"▁معاملات",
"▁قانون",
"▁بودجه",
"▁اساسی",
"▁قضا",
"ي",
"ی",
"▁بین",
"[ZWNJ]",
"الم",
"لل",
"ی",
"▁تاسیس",
"▁منطقه",
"[ZWNJ]",
"ای",
"▁لازم",
"[ZWNJ]",
"ال",
"اجرا",
"ء",
"▁دامپروری",
"▁راه",
"[ZWNJ]",
"اهن",
"▁کمیسیون",
"[ZWNJ]",
"های",
"▁جدید",
"الا",
"حد",
"اث",
"▁مس",
"ي",
"ول",
"▁فراورده",
"▁زا",
"ي",
"د",
"▁اسقاط",
"▁پنج",
"ساله"
]
},
{
"model": "HooshvareLab/bert-base-parsbert-peymaner-uncased",
"len": 20,
"tokens": [
"جمهوری",
"موافقتنامه",
"معاملات",
"قانون",
"بودجه",
"اساسی",
"قضايی",
"بینالمللی",
"تاسیس",
"منطقهای",
"لازمالاجراء",
"دامپروری",
"راهاهن",
"کمیسیونهای",
"جدیدالاحداث",
"مسيول",
"فراورده",
"زايد",
"اسقاط",
"پنجساله"
]
},
{
"model": "Amirmerfan/bert-base-uncased-persian-ner-50k-base",
"len": 56,
"tokens": [
"جمهوری",
"م",
"##وا",
"##فق",
"##تن",
"##ام",
"##ه",
"مع",
"##امل",
"##ات",
"قانون",
"بود",
"##جه",
"اساسی",
"ق",
"##ضا",
"##يی",
"بینالمللی",
"تاسیس",
"منطقه",
"##ای",
"لازم",
"##ال",
"##اج",
"##راء",
"دا",
"##م",
"##پر",
"##وری",
"راه",
"##اه",
"##ن",
"کمی",
"##سی",
"##ون",
"##های",
"جدید",
"##ال",
"##اح",
"##دا",
"##ث",
"م",
"##سي",
"##ول",
"ف",
"##را",
"##ورد",
"##ه",
"ز",
"##ايد",
"اس",
"##قا",
"##ط",
"پنج",
"##سال",
"##ه"
]
},
{
"model": "AliFartout/Roberta-fa-en-ner",
"len": 39,
"tokens": [
"▁جمهوری",
"▁موافقت",
"▁نامه",
"▁معاملات",
"▁قانون",
"▁بودجه",
"▁اساسی",
"▁قضا",
"ئی",
"▁بین",
"▁المللی",
"▁تأسیس",
"▁منطقه",
"▁ای",
"▁لازم",
"▁الاج",
"راء",
"▁دام",
"پر",
"وری",
"▁راه",
"▁آهن",
"▁کمیسیون",
"▁های",
"▁جدید",
"الا",
"حد",
"اث",
"▁مسئول",
"▁فر",
"آور",
"ده",
"▁زائد",
"▁اس",
"ق",
"اط",
"▁پنج",
"سال",
"ه"
]
}
]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,11 +1,7 @@
from huggingface_hub import HfApi
from huggingface_hub import ModelCard
from huggingface_hub import ModelFilter
from huggingface_hub import HfApi, ModelFilter
import json
api = HfApi()
# card = ModelCard.load('model-name')
# persian languages tags
languages = ['pes', 'fas', 'fa'] # Language codes for Persian, Pashto, and Turkish

View File

@ -0,0 +1,173 @@
from huggingface_hub import HfApi
import json
import datetime
# تعریف تسک‌های رایج NLP
nlp_task_list = [
"text-classification",
"token-classification",
"question-answering",
"summarization",
"translation",
"text-generation",
"fill-mask",
"zero-shot-classification",
"feature-extraction",
"sentence-similarity",
"text2text-generation",
"conversational"
]
def persian_model_finder(nlp_tasks,json_file_name):
api = HfApi()
all_persian_nlp_models_data = []
seen_model_ids = set() # برای جلوگیری از اضافه شدن مدل‌های تکراری
print("در حال جستجو و استخراج اطلاعات مدل‌های NLP فارسی...")
# فیلتر کردن و پیمایش روی مدل‌ها
# برای هر تسک NLP، مدل‌های فارسی را جستجو می‌کنیم.
# محدودیت 500 مدل برای هر تسک در نظر گرفته شده است تا از دانلود بیش از حد جلوگیری شود.
# اگر می‌خواهید همه مدل‌ها را استخراج کنید، ممکن است نیاز به پیجینیشن (pagination) باشد.
for task in nlp_tasks:
print(f" جستجو برای تسک: {task} (زبان: فارسی)...")
try:
models_for_task = api.list_models(
language="fa",
task=task,
sort="downloads",
direction=-1, # نزولی (از بیشترین دانلود به کمترین)
limit=None # می‌توانید این عدد را تغییر دهید
)
for model_info in models_for_task:
if model_info.id not in seen_model_ids:
try : # اگر از کارت مدل توانست اطلاعات بیشتری به دست بیاورد :
model_ = api.model_info(model_info.id) # به دست آوردن شناسه مدل
card_data_dict = model_.card_data.to_dict() # از روی کارت مدل که شامل اطلاعات مدل میباشد یک دیکشنری میسازیم
model_data = {
"model_id": model_info.id,
"url": f"https://huggingface.co/{model_info.id}",
"downloads": model_info.downloads,
"private": model_info.private,
"author": model_info.author,
"tags": model_info.tags, # شامل زبان‌ها، تسک‌ها، لایبرری‌ها و...
"tag_dataset":"-",
"tag_base_model":"-",
"tag_license":"-",
"tag_region":"-",
"pipeline_tag": model_info.pipeline_tag, # تسک اصلی مدل که توسط هاب تعیین شده
"Likes":model_info.likes,
# چهار مورد پایینی از روی دیکشنری کارت مدل خوانده میشود
"languages":card_data_dict.get('language', 'N/A'), # زبان هایی که پشتیبانی میشود
"library":card_data_dict.get('library', 'N/A'), # کتابخانه های مورد استفاده
"datasets":card_data_dict.get('datasets', 'N/A'), # دیتابیس های مورد استفاده
"license":card_data_dict.get('license', 'N/A'),
"just_persian" : False
}
if model_data["library"] == 'N/A': # در بعضی موارد کتابخانه به این نام ('library_name') در دیکشنری کارت مدل ذخیره شده
model_data["library"] = card_data_dict.get('library_name', 'N/A')
# شرط پایینی ، مواردی که فقط مختص زبان فارسی هستند را در دیکشنری مشخص میکند
if len(model_data["languages"]) == 2 and "multilingual" in model_data["languages"] or\
len(model_data["languages"]) == 2 and "persian" in model_data["languages"] or\
len(model_data["languages"]) == 2 and "farsi" in model_data["languages"] or\
len(model_data["languages"]) == 2 and "fas" in model_data["languages"] or\
len(model_data["languages"]) == 2 and model_data["languages"]=="fa" or\
model_data["languages"] == "persian" or\
model_data["languages"] == "farsi" or\
model_data["languages"] == "fas" or\
model_data["languages"] == "pes" or\
len(model_data["languages"]) == 1 :
model_data["just_persian"] = True
for value in model_data["tags"]:
if "dataset:" in value :
if type(model_data["tag_dataset"]) == type(""):
model_data["tag_dataset"] = list(model_data["tag_dataset"])
model_data["tag_dataset"].pop(0)
model_data["tag_dataset"].append(f"{str(value).replace("dataset:","")}")
if "base_model:" in value :
if type(model_data["tag_base_model"]) == type(""):
model_data["tag_base_model"] = list(model_data["tag_base_model"])
model_data["tag_base_model"].pop(0)
model_data["tag_base_model"].append(f"{str(value).replace("base_model:","")}")
if "region:" in value :
model_data["tag_region"]=f"{str(value).replace("region:","")}"
if "license:" in value :
model_data["tag_license"]=f"{str(value).replace("license:","")}"
all_persian_nlp_models_data.append(model_data)
seen_model_ids.add(model_info.id)
except : # اگر استفاده از کارت مدل با مشکل مواجه شد :
model_data = {
"model_id": model_info.id,
"url": f"https://huggingface.co/{model_info.id}",
"downloads": model_info.downloads,
"private": model_info.private,
"author": model_info.author,
"tags": model_info.tags, # شامل زبان‌ها، تسک‌ها، لایبرری‌ها و...
"pipeline_tag": model_info.pipeline_tag, # تسک اصلی مدل که توسط هاب تعیین شده
"Likes":model_info.likes,
"library":model_info.library_name,
# افزودن لایسنس اگر موجود باشد
"license": model_info.card_data.license if model_info.card_data and model_info.card_data.license else "N/A"
}
for value in model_data["tags"]:
if "dataset:" in value :
if type(model_data["tag_dataset"]) == type(""):
model_data["tag_dataset"] = list(model_data["tag_dataset"])
model_data["tag_dataset"].pop(0)
model_data["tag_dataset"].append(f"{str(value).replace("dataset:","")}")
if "base_model:" in value :
if type(model_data["tag_base_model"]) == type(""):
model_data["tag_base_model"] = list(model_data["tag_base_model"])
model_data["tag_base_model"].pop(0)
model_data["tag_base_model"].append(f"{str(value).replace("base_model:","")}")
if "region:" in value :
model_data["tag_region"]=f"{str(value).replace("region:","")}"
if "license:" in value :
model_data["tag_license"]=f"{str(value).replace("license:","")}"
all_persian_nlp_models_data.append(model_data)
seen_model_ids.add(model_info.id)
print(f" تعداد مدل‌های یافت شده برای تسک '{task}': {len(models_for_task)}")
except Exception as e:
print(f" خطا در جستجو برای تسک {task}: {e}")
# مرتب‌سازی نهایی مدل‌ها بر اساس تعداد دانلود (کل لیست)
# این مرحله اطمینان می‌دهد که حتی اگر مدل‌ها از تسک‌های مختلف جمع‌آوری شده باشند،
# در نهایت بر اساس دانلود مرتب شده باشند.
all_persian_nlp_models_data_sorted = sorted(all_persian_nlp_models_data, key=lambda x: x['downloads'], reverse=True)
print(f"\nتعداد کل مدل‌های NLP فارسی منحصربه‌فرد یافت شده: {len(all_persian_nlp_models_data_sorted)}")
# ذخیره اطلاعات در یک فایل JSON
output_json_file = f"{json_file_name}"
with open(output_json_file, "w", encoding="utf-8") as f:
json.dump(all_persian_nlp_models_data_sorted, f, ensure_ascii=False, indent=4)
print(f"اطلاعات {len(all_persian_nlp_models_data_sorted)} مدل در فایل '{output_json_file}' ذخیره شد.")
persian_model_finder(nlp_task_list,"persian_nlp_models_info.json")

View File

@ -1,3 +0,0 @@
facebook/xlm-v-base
sharif-dal/dal-bert
lifeweb-ai/shiraz

View File

@ -1,30 +0,0 @@
import json
with open('./data/models_info.json', 'r', encoding='utf-8') as file:
models = json.load(file)
ner_models = []
for mdl in models:
if mdl['model_name'].__contains__('ner') or mdl['model_name'].__contains__('Ner') or mdl['model_name'].__contains__('Ner'):
ner_models.append(mdl)
# [mdl['model_name']]= {
# "task": mdl['task'],
# "last_modified": mdl['last_modified'],
# "downloads": int(mdl['downloads']),
# "likes": mdl['likes'],
# "language": mdl['language']
# }
sorted_ner_models = sorted(ner_models, key=lambda x: x["downloads"], reverse=True)
with open('./data/models_ner_info.json', 'w', encoding='utf-8') as file:
jsondata = json.dumps(sorted_ner_models, ensure_ascii=False, indent=2)
file.write(jsondata)
ner_models_text = ''
for mdl in sorted_ner_models:
ner_models_text += ''.join(f"{mdl['downloads']} -- {mdl['model_name']}\n")
with open('./data/models_ner_info.txt', 'w', encoding='utf-8') as file:
file.write(ner_models_text)
print(len(ner_models))

View File

@ -1,83 +1,58 @@
#بسم الله
from apscheduler.schedulers.background import BackgroundScheduler
from transformers import AutoTokenizer
from bidi.algorithm import get_display
from huggingface_hub import HfApi
import matplotlib.pyplot as plt
from fastapi import FastAPI
from datetime import date
import arabic_reshaper
from fpdf import FPDF
import threading
import random
import logging
import sqlite3
import string
import os
# تنظیم لاگ‌دهی برای دیدن خروجی زمان‌بندی
logging.basicConfig(level=logging.INFO)
logging.getLogger('apscheduler').setLevel(logging.INFO)
first_id = 6600
text = 'جمهوری موافقت‌نامه معاملات قانون بودجه اساسی قضائی بین‌المللی تأسیس منطقه‌ای لازم‌الاجراء دامپروری راه‌آهن کمیسیون‌های جدیدالاحداث مسئول فرآورده زائد اسقاط پنجساله'
list1 = ["ID","model_id","url","downloads","private","author","tags","tag_dataset",\
"tag_base_model","tag_license","tag_region","pipeline_tag","Likes","languages",\
"library","datasets","license","just_persian","deleted","date_added","last_modified"]
cnt = sqlite3.connect(".\\db\\persian_nlp_model.db", check_same_thread=False)
"library","datasets","license","just_persian","deleted","date_added"]
cnt = sqlite3.connect("persian_nlp_model.db")
c = cnt.cursor()
today = date.today()
d1 = today.strftime("%d-%m-%Y")
create_tables=True
if create_tables == True :
try:
# فقط برای اولین بار که جدول قرار است ساخته شود از این کد ها استفاده شود
c.execute("""CREATE TABLE PersianNlp(
ID INT PRIMARY KEY ,
model_id TEXT ,
url TEXT ,
downloads INT,
private TEXT,
author TEXT,
tags TEXT,
tag_dataset TEXT,
tag_base_model TEXT,
tag_license TEXT,
tag_region TEXT,
pipeline_tag TEXT,
Likes INT,
languages TEXT,
library TEXT,
datasets TEXT,
license TEXT,
just_persian TEXT,
deleted TEXT,
date_added TEXT,
last_modified TEXT
);""")
# c.execute("""CREATE TABLE PersianNlp(
# ID INT PRIMARY KEY ,
# model_id TEXT ,
# url TEXT ,
# downloads INT,
# private TEXT,
# author TEXT,
# tags TEXT,
# tag_dataset TEXT,
# tag_base_model TEXT,
# tag_license TEXT,
# tag_region TEXT,
# pipeline_tag TEXT,
# Likes INT,
# languages TEXT,
# library TEXT,
# datasets TEXT,
# license TEXT,
# just_persian TEXT,
# deleted TEXT,
# date_added TEXT
# );""")
# برای ساخت جدول میزان دانلود ها از این کد استفاده شود
c.execute("""CREATE TABLE downloadCountHistory(
ID INT PRIMARY KEY ,
key_id INT ,
downloads INT,
date TEXT
);""")
create_tables = False
except Exception as e:
print("--- یک خطای غیرمنتظره در ساخت تیبل رخ داد ---")
print(f"متن خطا: {e}")
# c.execute("""CREATE TABLE downloadCountHistory(
# ID INT PRIMARY KEY ,
# key_id INT ,
# downloads INT,
# date TEXT
# );""")
@ -114,358 +89,14 @@ def generate_random_id(length=10, chars=string.ascii_letters + string.digits):
# 3. تعریف تابع کمکی برای پردازش متن فارسی
def process_text_for_fpdf(text):
# مرحله 1: تغییر شکل حروف (اتصال و شکل صحیح)
reshaped_text = arabic_reshaper.reshape(text)
# مرحله 2: بازآرایی برای نمایش راست به چپ
bidi_text = get_display(reshaped_text)
return bidi_text
def add_download_count():
count = 1
api = HfApi()
allModel = c.execute(f'''SELECT * FROM "PersianNlp"
WHERE deleted != 'True' ;''')
all_model_id = []
for model in allModel:
all_model_id.append([model[0],model[1]])
for id_ in all_model_id:
# try:
print(count)
count+=1
id_12_digits = generate_random_id(length=12, chars=string.digits)
model_details = api.model_info(repo_id=id_[1])
c.execute(f"""INSERT INTO downloadCountHistory(ID,key_id,downloads,date)
VALUES ({id_12_digits},"{int(id_[0])}","{int(model_details.downloads)}","{str(d1)}");""")
# c.execute(f"""INSERT INTO downloadCountHistory(ID,key_id,downloads,date)
# VALUES ({id_12_digits},"{int(id_[0])}","{int(model_details.downloads)+1}","22-08-2025");""")
cnt.commit()
# except:
# print("Error!!")
# add_download_count()
def MultiModelInfo(limit_number=10):
today = date.today()
date_year = today.strftime("%Y")
date_month = today.strftime("%m")
month = int(date_month)
year = int(date_year)
Models_added_this_month=[]
Models_deleted=[]
all_id_download = []
all_download = []
growth_slope_list_info = []
growth_slope_list = []
model_info = c.execute(f'''SELECT *
FROM PersianNlp''')
for model in model_info:
if int(model[19].split("/")[1]) == month and int(model[19].split("/")[2]) == year :
Models_added_this_month.append(model[1])
if str(model[18]) == "True":
Models_deleted.append(model[1])
all_id_download.append([model[0],model[1],model[3],model[11],model[20]])
listX=[]
for model in all_id_download:
downloadCountHistory = c.execute(f'''SELECT *
FROM downloadCountHistory
WHERE key_id = {model[0]}''')
for models in downloadCountHistory :
if int(models[3].split("-")[1]) == month and int(models[3].split("-")[2]) == year :
model[2]=models[2]
listX.append(model)
all_id_download = listX
for model in all_id_download:
all_download.append(model[2])
all_download.sort(reverse=True)
maximum_download_list = all_download[0:limit_number]
maximum_download_info_list = []
n=0
for DCount in maximum_download_list:
for model in all_id_download:
if DCount == model[2]:
if n < limit_number :
maximum_download_info_list.append(model)
n+=1
# پیدا کردن بیشترین شیب دانلود ها در چند ماه :
for model in all_id_download:
growth_slope = []
DHList =c.execute(f'''SELECT *
FROM "downloadCountHistory"
WHERE key_id = {model[0]}''')
for data in DHList:
growth_slope.append(data[2])
growth_slopee , lenM = find_growth_slope(growth_slope) # به دست آوردن درصد رشد هر مدل
growth_slope_list.append(growth_slopee)
growth_slope_list_info.append([model[1],growth_slopee,lenM,model[2],model[3]])
growth_slope_list.sort(reverse=True)
maximum_growth_slope_list = growth_slope_list[0:limit_number]
maximum_growth_slope_info_list = []
n=0
for DCount in maximum_growth_slope_list:
for model in growth_slope_list_info:
if DCount == model[1]:
if n < limit_number :
maximum_growth_slope_info_list.append(model)
n+=1
# پایان پیدا کردن شیب
model_id_list = []
model_download_count_list = []
for info in maximum_download_info_list:
model_download_count_list.append(info[2])
model_id_list.append(str(info[1]))
listA = []
for x in model_download_count_list:
listA.append(int(x)/1000000)
model_download_count_list = listA
plt.plot(model_id_list,model_download_count_list,marker='o', linestyle='-')
plt.xticks(rotation=30, ha='right', fontsize=10)
plt.xlabel("Model Name", color='blue')
plt.ylabel("Download Count (milion)" ,color='red')
plt.tight_layout()
plt.savefig(f'Top_{limit_number}_download_rate.png', dpi=300)
pdf = FPDF()
pdf.add_page()
pdf.add_font('B Nazanin', '', '.\\fonts\\B Nazanin.ttf', uni=True)
pdf.set_font("Arial", size=16)
# اضافه کردن متن
pdf.multi_cell(0, 10, f"Top {limit_number} Model Information : \n ----------------------------------------------------------------")
pdf.set_font("Arial", size=12)
pdf.ln()
pdf.multi_cell(0, 5, f" Download Rate Chart :")
pdf.ln()
# اضافه کردن عکس
pdf.image(f'Top_{limit_number}_download_rate.png', x=10, y=pdf.get_y() + 5, w=70)
pdf.ln(20) # یک خط فاصله بعد از عکس
# اضافه کردن جدول
pdf.ln(50)
pdf.ln()
pdf.multi_cell(0, 5, f" Download Rate Table :")
pdf.ln()
# سربرگ جدول
for header in ['Count', 'Model_name','Download-rate','Task','Last_modified']:
if header == 'Model_name':
pdf.cell(80, 10, header, 1, 0, 'C')
else:
pdf.cell(40, 10, header, 1, 0, 'C')
pdf.ln()
# ردیف‌های داده
x=1
for row in maximum_download_info_list:
n=0
row[0] = x
x+=1
for item in row:
if n == 1 :
pdf.set_font("Arial", size=6)
pdf.cell(80, 10, f"https://huggingface.co/{str(item)}", 1, 0, 'C')
n+=1
else:
pdf.set_font("Arial", size=10)
pdf.cell(40, 10, str(item), 1, 0, 'C')
n+=1
pdf.ln()
pdf.add_page()
model_name_list = []
model_growth = []
for info in maximum_growth_slope_info_list:
model_name_list.append(info[0])
model_growth.append(round(info[1], 2) )
pdf.ln()
pdf.multi_cell(0, 5, f" Download Growth Chart :")
pdf.ln()
plt.figure(figsize=(8, 6)) # تنظیم اندازه کلی نمودار (عرض و ارتفاع بر حسب اینچ)
plt.bar(model_name_list,model_growth,color='lightgreen', width=0.4)
plt.xticks(rotation=30, ha='right', fontsize=10)
plt.xlabel("Model Name", color='blue')
plt.ylabel("Model Growth (%)" ,color='red')
plt.tight_layout()
plt.savefig(f'Top_{limit_number}_growth_rate.png', dpi=300)
pdf.image(f'Top_{limit_number}_growth_rate.png', x=10, y=pdf.get_y() + 5, w=70)
pdf.ln(80) # یک خط فاصله بعد از عکس
pdf.ln()
pdf.multi_cell(0, 5, f" Download Growth Table :")
pdf.ln()
# سربرگ جدول
for header in [ 'Model_name','Growth-rate','Length-month','Download-rate','Task']:
if header == 'Model_name':
pdf.cell(80, 10, header, 1, 0, 'C')
else:
pdf.cell(30, 10, header, 1, 0, 'C')
pdf.ln()
# ردیف‌های داده
x=1
for row in maximum_growth_slope_info_list:
n=0
x+=1
for item in row:
if n == 0 :
pdf.set_font("Arial", size=6)
pdf.cell(80, 10, f"https://huggingface.co/{str(item)}", 1, 0, 'C')
n+=1
else:
pdf.set_font("Arial", size=6)
pdf.cell(30, 10, str(item), 1, 0, 'C')
n+=1
pdf.ln()
pdf.add_page()
pdf.ln()
pdf.set_font("Arial", size=14)
pdf.multi_cell(0, 5, f"Models added this month :")
pdf.set_font("Arial", size=6)
pdf.ln()
txt=''
n=1
for model_name in Models_added_this_month:
txt +=f"{n} --> {model_name}\n"
n+=1
pdf.multi_cell(0, 5, f"{txt}")
pdf.ln()
pdf.set_font("Arial", size=14)
pdf.multi_cell(0, 5, f"Models deleted :")
pdf.set_font("Arial", size=6)
pdf.ln()
txt=''
n=1
for model_name in Models_deleted:
txt +=f"{n} --> {model_name}\n"
n+=1
pdf.multi_cell(0, 5, f"{txt}")
pdf.output(f"MultiModelInfo_{today}.pdf")
os.remove(f'Top_{limit_number}_download_rate.png')
os.remove(f'Top_{limit_number}_growth_rate.png')
# MultiModelInfo(5)
def find_growth_slope(number_list):
try:
n = -1
index = 0
# if n == -1:
last_num = number_list[-1]
first_num = number_list[-1]
if number_list[-1] <= number_list[-2]:
for num in range(len(number_list)-1):
if number_list[n-num] <= number_list[n-num-1]:
last_num = number_list[n-num-1]
index = n-num-1
else:
break
if number_list[-1] >= number_list[-2] and last_num >= first_num:
for num in range(len(number_list)-1):
if number_list[n-num] >= number_list[n-num-1]:
last_num = number_list[n-num-1]
index = n-num-1
else:
break
percentage_growth = ((first_num - last_num) / last_num) * 100
except:
percentage_growth = 0
return percentage_growth ,abs(index)
# find_growth_slope([15,16,17,16,10,8])
# x = find_growth_slope([1,2,3,4,5,6,7,8,10])
# c = find_growth_slope([8,8,9,10,8,8,7])
# v = find_growth_slope([8,8,7,5,4,6,7,7])
# print("finish!")
def persian_model_finder(nlp_tasks,idx):
today = date.today()
download_date = today.strftime("%d/%m/%Y")
idX = idx # اخرین آیدی موجود در دیتابیس را وارد میکنیم تا موارد جدید با آیدی های قبلی تداخل نکند
api = HfApi()
# all_persian_nlp_models_data = []
all_persian_nlp_models_data = []
seen_model_ids = set() # برای جلوگیری از اضافه شدن مدل‌های تکراری
new_seen_ids = set()
for task in nlp_tasks:
models_for_task = api.list_models(
language="fa",
task=task,
sort="downloads",
direction=-1, # نزولی (از بیشترین دانلود به کمترین)
limit=None # می‌توانید این عدد را تغییر دهید
)
for model_info in models_for_task:
new_seen_ids.add(model_info.id)
print("در حال جستجو و استخراج اطلاعات مدل‌های NLP فارسی...")
# فیلتر کردن و پیمایش روی مدل‌ها
@ -476,12 +107,8 @@ def persian_model_finder(nlp_tasks,idx):
try:
allModel = c.execute(f'''SELECT *
FROM PersianNlp''')
idX+=1
for model in allModel:
seen_model_ids.add(model[1])
idX+=1
except:
print("database not find!")
@ -502,7 +129,6 @@ FROM PersianNlp''')
idX+=1
# try : # اگر از کارت مدل توانست اطلاعات بیشتری به دست بیاورد :
model_ = api.model_info(model_info.id) # به دست آوردن شناسه مدل
lastModified = api.model_info(repo_id=model_info.id).last_modified
card_data_dict = model_.card_data.to_dict() # از روی کارت مدل که شامل اطلاعات مدل میباشد یک دیکشنری میسازیم
model_data = {
"model_id": model_info.id,
@ -524,8 +150,7 @@ FROM PersianNlp''')
"license":card_data_dict.get('license', 'N/A'),
"just_persian" : "False",
"deleted" : "False",
"date_added" : f"{download_date}",
"last_modified" : f"{str(lastModified.strftime("%d-%m-%Y"))}"
"date_added" : f"{download_date}"
}
@ -566,32 +191,19 @@ FROM PersianNlp''')
# all_persian_nlp_models_data.append(model_data)
c.execute(f"""INSERT INTO PersianNlp (ID,model_id,url,downloads,private,author,tags,tag_dataset,tag_base_model,tag_license,tag_region,pipeline_tag,Likes,languages,library,datasets,license,just_persian,deleted,date_added,last_modified)
VALUES ({idX},"{model_data["model_id"]}","{model_data["url"]}",{model_data["downloads"]},"{model_data["private"]}","{model_data["author"]}","{model_data["tags"]}","{model_data["tag_dataset"]}","{model_data["tag_base_model"]}","{model_data["tag_license"]}","{model_data["tag_region"]}","{model_data["pipeline_tag"]}",{model_data["Likes"]},"{model_data["languages"]}","{model_data["library"]}","{model_data["datasets"]}","{model_data["license"]}","{model_data["just_persian"]}","{model_data["deleted"]}","{model_data["date_added"]}","{model_data['last_modified']}");""")
c.execute(f"""INSERT INTO PersianNlp (ID,model_id,url,downloads,private,author,tags,tag_dataset,tag_base_model,tag_license,tag_region,pipeline_tag,Likes,languages,library,datasets,license,just_persian,deleted,date_added)
VALUES ({idX},"{model_data["model_id"]}","{model_data["url"]}",{model_data["downloads"]},"{model_data["private"]}","{model_data["author"]}","{model_data["tags"]}","{model_data["tag_dataset"]}","{model_data["tag_base_model"]}","{model_data["tag_license"]}","{model_data["tag_region"]}","{model_data["pipeline_tag"]}",{model_data["Likes"]},"{model_data["languages"]}","{model_data["library"]}","{model_data["datasets"]}","{model_data["license"]}","{model_data["just_persian"]}","{model_data["deleted"]}","{model_data["date_added"]}");""")
cnt.commit()
seen_model_ids.add(model_info.id)
print(f"\nتعداد کل مدل‌های NLP فارسی منحصربه‌فرد یافت شده: {len(seen_model_ids)}")
for modelID in seen_model_ids:
if modelID not in new_seen_ids:
c.execute(f'''UPDATE PersianNlp
SET deleted = 'True'
WHERE model_id = '{modelID}';''')
#اول لیست تسک ها را میدهیم برای جست و جو ، و بعد آخرین آیدی که در تیبل مدلها در دیتابیس موجود است
# persian_model_finder(nlp_task_list,8288)
cnt.commit()
add_download_count()
MultiModelInfo()
#اول لیست تسک ها را میدهیم برای جست و جو ، و بعد اولین آیدی که در تیبل مدلها در دیتابیس موجود است
# persian_model_finder(nlp_task_list,6600)
def search(name,search_by):
X = "------------------------------------------------------------------------------------\n+-+-+-+- FOUND MODEL +-+-+-+-\n------------------------------------------------------------------------------------\n\n"
@ -687,165 +299,28 @@ FROM PersianNlp''')
def singleModelInfo( model_id_ ,month_later = 6 , year_later = 0 ):
def add_download_count():
count = 1
api = HfApi()
allModel = c.execute(f'''SELECT *
FROM PersianNlp''')
all_model_id = []
for model in allModel:
all_model_id.append([model[0],model[1]])
today = date.today()
date_year = today.strftime("%Y")
date_month = today.strftime("%m")
month = int(date_month)
year = int(date_year)
model_info = c.execute(f'''SELECT *
FROM PersianNlp
WHERE model_id = "{model_id_}"''')
for model in model_info:
m = model
model_id = m[0]
last_modyfied = m[20]
Likes = m[12]
task = m[11]
downloadCountHistory = c.execute(f'''SELECT *
FROM downloadCountHistory
WHERE key_id = {model_id}''')
n=0
downloads_list = []
download_count_list = []
download_date_list = []
for model in downloadCountHistory :
if int(model[3].split("-")[1]) >= month-month_later and int(model[3].split("-")[2]) >= year-year_later :
download_count_list.append(model[2])
download_date_list.append(model[3])
downloads_list.append([model[3],model[2]])
growth_slope , lenM = find_growth_slope(download_count_list)
plt.plot(download_date_list,download_count_list, marker='o', linestyle='-')
plt.savefig('Download_rate_chart.png', dpi=300)
pdf = FPDF()
pdf.add_page()
pdf.add_font('B Nazanin', '', '.\\fonts\\B Nazanin.ttf', uni=True)
pdf.set_font("Arial", size=12)
# اضافه کردن متن
pdf.multi_cell(0, 10, f"Model -<< https://huggingface.co/{model_id_} >>- Information :")
pdf.ln()
pdf.multi_cell(0, 5, f"Download rate chart : ")
pdf.ln()
# اضافه کردن عکس
pdf.image("Download_rate_chart.png", x=10, y=pdf.get_y() + 5, w=70)
pdf.ln(20) # یک خط فاصله بعد از عکس
# pdf.cell(0, 10, " Download history chart ", ln=True, align='C')
# اضافه کردن جدول
pdf.ln(50)
pdf.ln()
pdf.multi_cell(0, 5, f"Download rate table : ")
pdf.ln()
# سربرگ جدول
for header in ['Date', 'Download-rate']:
pdf.cell(40, 10, header, 1, 0, 'C')
pdf.ln()
pdf.set_font("Arial", size=10)
# ردیف‌های داده
for row in downloads_list:
for item in row:
pdf.cell(40, 10, str(item), 1, 0, 'C')
pdf.ln()
pdf.ln()
tokenizer = AutoTokenizer.from_pretrained(model_id_)
tokens = tokenizer.tokenize(text)
print(tokens)
print(f'len(tokens): {len(tokens)}')
results = {'model': model_id_, 'len': len(tokens), 'tokens': tokens }
# results = str(results).encode('latin-1', 'replace').decode('latin-1')
# results = str(results).replace("▁","")
pdf.multi_cell(0, 3, f"Likes : {str(Likes)}")
pdf.ln()
pdf.multi_cell(0, 3, f"last_modyfied : {str(last_modyfied)}")
pdf.ln()
pdf.multi_cell(0, 3, f"Growth slope : {round(growth_slope, 2)} in {lenM} month .")
pdf.ln()
pdf.multi_cell(0, 7, f"task : {task}")
pdf.ln()
pdf.set_font("Arial", size=16)
pdf.multi_cell(0, 5, f"Tokenize info : ")
pdf.ln()
pdf.set_font("Arial", size=10)
pdf.multi_cell(0, 3, f"len : {str(results['len'])}")
pdf.ln()
pdf.multi_cell(0, 3, "tokenized list : ")
pdf.ln()
txt = ''
for token in results["tokens"]:
txt += f' [ {token} ] '
pdf.set_font("B Nazanin", size=10)
pdf.multi_cell(0, 5, f"{process_text_for_fpdf(txt)}",align='R')
pdf.output("singleModelInfo.pdf")
print("فایل PDF با FPDF ایجاد شد.")
os.remove("Download_rate_chart.png")
# singleModelInfo("amberoad/bert-multilingual-passage-reranking-msmarco")
# --- بخش ادغام با FastAPI و APScheduler ---
app = FastAPI()
scheduler = BackgroundScheduler()
# یک تابع برای دریافت آخرین ID قبل از اجرای job
# def get_last_id():
# try:
# last_id = c.execute("SELECT MAX(ID) FROM PersianNlp").fetchone()[0]
# return last_id if last_id is not None else 0
# except sqlite3.OperationalError:
# return 0
# تعریف کار زمان‌بندی شده: اجرای persian_model_finder در روز آخر هر ماه
def scheduled_job():
idx = first_id
print(f"شروع اجرای کار زمان‌بندی شده. آخرین ID: {idx}")
persian_model_finder(nlp_task_list, idx)
print("کار زمان‌بندی شده با موفقیت به پایان رسید.")
# scheduler.add_job(scheduled_job, 'cron', day='1', hour='0', minute='0')
# scheduler.add_job(scheduled_job, 'interval', minutes=5) # هر بیست دقیقه تابع رو اجرا میکنه که برای تست درست کار کردن هستش و الا کامنت بشه
scheduler.add_job(scheduled_job, 'cron', day='last', hour='0', minute='0') # آخرین روز هرماه رو به عنوان زمان بندی قرار میده
# رویداد startup: شروع زمان‌بندی هنگام روشن شدن سرور
@app.on_event("startup")
def startup_event():
print("رویداد startup: سرور در حال راه‌اندازی است و زمان‌بندی شروع می‌شود.")
scheduler.start()
print("زمان‌بندی فعال شد.")
# رویداد shutdown: خاموش کردن زمان‌بندی هنگام خاموش شدن سرور
@app.on_event("shutdown")
def shutdown_event():
print("رویداد shutdown: سرور در حال خاموش شدن است و زمان‌بندی متوقف می‌شود.")
scheduler.shutdown()
@app.get("/")
def read_root():
return {"message": "FastAPI service is running and the scheduler is active."}
@app.get("/run_monthly_job_now")
def run_job_manually():
print("درخواست برای اجرای دستی کار ماهانه دریافت شد...")
# اجرای کار در یک ترد مجزا برای جلوگیری از مسدود شدن سرور
threading.Thread(target=scheduled_job).start()
return {"message": "Monthly job has been triggered manually."}
for id_ in all_model_id:
# try:
print(count)
count+=1
id_12_digits = generate_random_id(length=12, chars=string.digits)
model_details = api.model_info(repo_id=id_[1])
c.execute(f"""INSERT INTO downloadCountHistory(ID,key_id,downloads,date)
VALUES ({id_12_digits},"{int(id_[0])}","{int(model_details.downloads)}","{str(d1)}");""")
cnt.commit()
# except:
# print("Error!!")
# add_download_count()