Prevent space_correction_plus2() from repeating one word in two combinations

This commit is contained in:
m_ghazizadeh 2025-07-01 14:58:17 +03:30
parent 7b9537dd55
commit d6f3f95c37

View File

@ -250,18 +250,54 @@ class Normalizer():
L = wrds.__len__()
if L < 2:
return doc_string
cnt = 1
for i in range(0, L - 1):
w = wrds[i] + wrds[i + 1]
# NOTE: مشکل کد زیر :
# NOTE: یک کلمه می تواند هم با کلمه قبل و هم با کلمه بعد ترکیب موفق درست کند و این باعث تکرار یک تک کلمه در دو ترکیب می شود.
# cnt = 1
# for i in range(0, L - 1):
# w = wrds[i] + wrds[i + 1]
# try:
# out_sentences = out_sentences + ' ' + self.dic2[w]
# cnt = 0
# except KeyError:
# if cnt == 1:
# out_sentences = out_sentences + ' ' + wrds[i]
# cnt = 1
# if cnt == 1:
# out_sentences = out_sentences + ' ' + wrds[i + 1]
# NOTE: کد جایگزین
# cnt = 1
# for i in range(L-1):
# if cnt == 0:
# cnt = 1
# continue
# w = wrds[i] + wrds[i+1]
# try:
# out_sentences = out_sentences + ' ' + self.dic2[w]
# cnt = 0
# except KeyError:
# if cnt == 1:
# out_sentences = out_sentences + ' ' + wrds[i]
# cnt = 1
# if cnt == 1:
# out_sentences = out_sentences + ' ' + wrds[L-1]
# NOTE: کد جایگزین دیگر
i = 0
while i > L -1:
w = wrds[i] + wrds[i+1]
try:
out_sentences = out_sentences + ' ' + self.dic2[w]
cnt = 0
i = i + 2
except KeyError:
if cnt == 1:
out_sentences = out_sentences + ' ' + wrds[i]
cnt = 1
if cnt == 1:
out_sentences = out_sentences + ' ' + wrds[i + 1]
out_sentences = out_sentences + ' ' + wrds[i]
i = i + 1
if i == L - 1:
out_sentences = out_sentences + ' ' + wrds[L-1]
return out_sentences
def space_correction_plus3(self, doc_string):