send editions to git

2025-07-15 17:39:08 +03:30 · 2025-07-15 17:39:08 +03:30 · ae1a9b11c0
commit ae1a9b11c0
parent de17c18a1a
5 changed files with 7 additions and 8 deletions
--- a/check_tokenizers.py
+++ b/check_tokenizers.py
@ -1,7 +1,7 @@
 from transformers import AutoTokenizer
 import json

-file = open('models_info.json', 'r')
+file = open('./data/models_info.json', 'r')
 models = json.load(file)

 # Strips the newline character
--- a/data/huggingface-fa-models.txt
+++ b/data/huggingface-fa-models.txt
--- a/data/models_info.json
+++ b/data/models_info.json
--- a/data/tokenizer-fa-models-logs.txt
+++ b/data/tokenizer-fa-models-logs.txt
--- a/get_persian_nlp_models.py
+++ b/get_persian_nlp_models.py
@ -1,16 +1,15 @@
 from huggingface_hub import HfApi, ModelFilter
 import json

-# Initialize the Hugging Face API
 api = HfApi()

-# Define the languages you're interested in
+# persian languages tags
 languages = ['pes', 'fas', 'fa']  # Language codes for Persian, Pashto, and Turkish
 nlp_tasks = ['text-classification', 'token-classification','table-question-answering','question-answering','zero-shot-classification','translation','summarization','feature-extraction','text-generation','text2text-generation','fill-mask','sentence-similarity']
-# Initialize a list to store the model information
+# list of model information
 models_info = []

-# Iterate over each language
+# Iterate languages
 for lang in languages:
    # Filter models by language
    models = api.list_models(filter=ModelFilter(language=lang))
@ -30,8 +29,8 @@ for lang in languages:
        }
        models_info.append(model_info)

-# Save the collected data to a JSON file
-with open('models_info.json', 'w', encoding='utf-8') as f:
+# Save models_info
+with open('./data/models_info.json', 'w', encoding='utf-8') as f:
    json.dump(models_info, f, ensure_ascii=False, indent=4)

-print("Data collection complete. Saved to models_info.json")
+print("Finished!")