# bag of words (bow) algorithm implementation import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, paired_distances, pairwise_distances # متون ورودی text0 = "قانون خانواده و جوانی جمعیت" text1 = "قانون اصلاح موادی از قانون خانواده و جوانی جمعیت ایران" text2 = "خانواده و جمعیت جوانی مهم است" # ایجاد یک نمونه از CountVectorizer برای تبدیل متن به بردارهای ویژگی vectorizer = CountVectorizer() # تبدیل متون به بردارهای ویژگی X = vectorizer.fit_transform([text0,text1, text2]) # Y = vectorizer.fit_transform([text1,text3]) # Z = vectorizer.fit_transform([text2,text2]) # تبدیل ماتریس sparse به آرایه numpy X_array = X.toarray() # محاسبه شباهت کسینوسی بین دو بردار similarity = cosine_similarity(X_array) manhattan_distance = manhattan_distances(X_array) pairwise_distance = pairwise_distances(X_array) #paired_distance = paired_distances(X_array) # نمایش شباهت کسینوسی print(f"Cosine Similarity between text0 and text1: {similarity[0, 1]}") print(f"Cosine Similarity between text0 and text2: {similarity[0, 2]}") print(f"Cosine Similarity between text1 and text2: {similarity[2, 1]}") print(f"manhattan_distance between text0 and text1: {manhattan_distance[0, 1]}") print(f"manhattan_distance between text0 and text2: {manhattan_distance[0, 2]}") print(f"manhattan_distance between text1 and text2: {manhattan_distance[2, 1]}") #print(f"paired_distance between text0 and text1: {paired_distance[0, 1]}") #print(f"paired_distance between text0 and text2: {paired_distance[0, 2]}") #print(f"paired_distance between text1 and text2: {paired_distance[2, 1]}") print(f"pairwise_distance between text0 and text1: {pairwise_distance[0, 1]}") print(f"pairwise_distance between text0 and text2: {pairwise_distance[0, 2]}") print(f"pairwise_distance between text1 and text2: {pairwise_distance[2, 1]}")