61 lines
2.6 KiB
Python
61 lines
2.6 KiB
Python
import os
|
|
import sys
|
|
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
sys.path.append(parent_dir)
|
|
from p3_words_embedder import do_word_embedder, get_sentence_embeddings, cosine_similarity
|
|
import unittest
|
|
import numpy as np
|
|
import json
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
class TestSentenceEmbedder(unittest.TestCase):
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
# مدل را یکبار بارگذاری میکنیم تا در همه تستها استفاده شود
|
|
cls.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
|
|
|
|
def test_get_sentence_embeddings(self):
|
|
# بررسی خروجی تابع get_sentence_embeddings
|
|
sentence = "This is a test sentence."
|
|
embeddings = get_sentence_embeddings(sentence)
|
|
self.assertEqual(len(embeddings.shape), 1, "Embedding should be a 1D vector.")
|
|
self.assertGreater(embeddings.shape[0], 0, "Embedding vector should not be empty.")
|
|
|
|
def test_cosine_similarity(self):
|
|
# تست تابع cosine_similarity برای دو بردار
|
|
vec1 = np.array([1, 0, 0])
|
|
vec2 = np.array([0, 1, 0])
|
|
vec3 = np.array([1, 1, 0])
|
|
|
|
sim1 = cosine_similarity(vec1, vec2)
|
|
sim2 = cosine_similarity(vec1, vec3)
|
|
|
|
self.assertAlmostEqual(sim1, 0.0, places=5, msg="Cosine similarity between orthogonal vectors should be 0.")
|
|
self.assertAlmostEqual(sim2, 0.7071, places=4, msg="Cosine similarity between vec1 and vec3 should be approximately 0.7071.")
|
|
|
|
def test_do_word_embedder(self):
|
|
# تست تابع do_word_embedder
|
|
sections = {
|
|
"sec1": {"content": "This is the first section."},
|
|
"sec2": {"content": "This is the second section."}
|
|
}
|
|
do_word_embedder(sections)
|
|
|
|
# فایل خروجی را بررسی میکنیم
|
|
with open('sect_embeddings.json', 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
for key in sections.keys():
|
|
self.assertIn('embeddings', data[key], f"Section {key} should contain 'embeddings'.")
|
|
self.assertEqual(len(data[key]['embeddings']), 384, f"Embedding size for section {key} should be 384.")
|
|
|
|
def test_cosine_similarity_edge_cases(self):
|
|
# تست موارد خاص برای cosine_similarity
|
|
vec1 = np.array([0, 0, 0])
|
|
vec2 = np.array([1, 1, 1])
|
|
|
|
with self.assertRaises(ZeroDivisionError, msg="Cosine similarity should handle zero vectors."):
|
|
cosine_similarity(vec1, vec2)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main() |