Nlp_models/get_persian_nlp_models.py
2025-07-13 18:34:40 +03:30

37 lines
1.5 KiB
Python

from huggingface_hub import HfApi, ModelFilter
import json
# Initialize the Hugging Face API
api = HfApi()
# Define the languages you're interested in
languages = ['pes', 'fas', 'fa'] # Language codes for Persian, Pashto, and Turkish
nlp_tasks = ['text-classification', 'token-classification','table-question-answering','question-answering','zero-shot-classification','translation','summarization','feature-extraction','text-generation','text2text-generation','fill-mask','sentence-similarity']
# Initialize a list to store the model information
models_info = []
# Iterate over each language
for lang in languages:
# Filter models by language
models = api.list_models(filter=ModelFilter(language=lang))
for model in models:
date = f"{str(model.lastModified.year)}-{str(model.lastModified.month)}-{str(model.lastModified.day)}"
task = model.pipeline_tag if model.pipeline_tag else 'Unknown'
if not(task in nlp_tasks or task == 'Unknown'):#
continue
model_info = {
'model_name': model.modelId,
'task': model.pipeline_tag if model.pipeline_tag else 'Unknown',
'last_modified': date,
'downloads': model.downloads,
'likes': model.likes,
'language': lang
}
models_info.append(model_info)
# Save the collected data to a JSON file
with open('models_info.json', 'w', encoding='utf-8') as f:
json.dump(models_info, f, ensure_ascii=False, indent=4)
print("Data collection complete. Saved to models_info.json")