data_processes/test/test__p3_words_embedder.py
2025-08-11 19:56:29 +03:30

61 lines
2.6 KiB
Python

import os
import sys
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(parent_dir)
from p3_words_embedder import do_word_embedder, get_sentence_embeddings, cosine_similarity
import unittest
import numpy as np
import json
from sentence_transformers import SentenceTransformer
class TestSentenceEmbedder(unittest.TestCase):
@classmethod
def setUpClass(cls):
# مدل را یکبار بارگذاری می‌کنیم تا در همه تست‌ها استفاده شود
cls.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
def test_get_sentence_embeddings(self):
# بررسی خروجی تابع get_sentence_embeddings
sentence = "This is a test sentence."
embeddings = get_sentence_embeddings(sentence)
self.assertEqual(len(embeddings.shape), 1, "Embedding should be a 1D vector.")
self.assertGreater(embeddings.shape[0], 0, "Embedding vector should not be empty.")
def test_cosine_similarity(self):
# تست تابع cosine_similarity برای دو بردار
vec1 = np.array([1, 0, 0])
vec2 = np.array([0, 1, 0])
vec3 = np.array([1, 1, 0])
sim1 = cosine_similarity(vec1, vec2)
sim2 = cosine_similarity(vec1, vec3)
self.assertAlmostEqual(sim1, 0.0, places=5, msg="Cosine similarity between orthogonal vectors should be 0.")
self.assertAlmostEqual(sim2, 0.7071, places=4, msg="Cosine similarity between vec1 and vec3 should be approximately 0.7071.")
def test_do_word_embedder(self):
# تست تابع do_word_embedder
sections = {
"sec1": {"content": "This is the first section."},
"sec2": {"content": "This is the second section."}
}
do_word_embedder(sections)
# فایل خروجی را بررسی می‌کنیم
with open('sect_embeddings.json', 'r', encoding='utf-8') as f:
data = json.load(f)
for key in sections.keys():
self.assertIn('embeddings', data[key], f"Section {key} should contain 'embeddings'.")
self.assertEqual(len(data[key]['embeddings']), 384, f"Embedding size for section {key} should be 384.")
def test_cosine_similarity_edge_cases(self):
# تست موارد خاص برای cosine_similarity
vec1 = np.array([0, 0, 0])
vec2 = np.array([1, 1, 1])
with self.assertRaises(ZeroDivisionError, msg="Cosine similarity should handle zero vectors."):
cosine_similarity(vec1, vec2)
if __name__ == '__main__':
unittest.main()