Prevent space_correction_plus2() from repeating one word in two combinations
This commit is contained in:
parent
7b9537dd55
commit
d6f3f95c37
|
@ -250,18 +250,54 @@ class Normalizer():
|
||||||
L = wrds.__len__()
|
L = wrds.__len__()
|
||||||
if L < 2:
|
if L < 2:
|
||||||
return doc_string
|
return doc_string
|
||||||
cnt = 1
|
|
||||||
for i in range(0, L - 1):
|
# NOTE: مشکل کد زیر :
|
||||||
w = wrds[i] + wrds[i + 1]
|
# NOTE: یک کلمه می تواند هم با کلمه قبل و هم با کلمه بعد ترکیب موفق درست کند و این باعث تکرار یک تک کلمه در دو ترکیب می شود.
|
||||||
|
|
||||||
|
# cnt = 1
|
||||||
|
# for i in range(0, L - 1):
|
||||||
|
# w = wrds[i] + wrds[i + 1]
|
||||||
|
# try:
|
||||||
|
# out_sentences = out_sentences + ' ' + self.dic2[w]
|
||||||
|
# cnt = 0
|
||||||
|
# except KeyError:
|
||||||
|
# if cnt == 1:
|
||||||
|
# out_sentences = out_sentences + ' ' + wrds[i]
|
||||||
|
# cnt = 1
|
||||||
|
# if cnt == 1:
|
||||||
|
# out_sentences = out_sentences + ' ' + wrds[i + 1]
|
||||||
|
|
||||||
|
# NOTE: کد جایگزین
|
||||||
|
|
||||||
|
# cnt = 1
|
||||||
|
# for i in range(L-1):
|
||||||
|
# if cnt == 0:
|
||||||
|
# cnt = 1
|
||||||
|
# continue
|
||||||
|
# w = wrds[i] + wrds[i+1]
|
||||||
|
# try:
|
||||||
|
# out_sentences = out_sentences + ' ' + self.dic2[w]
|
||||||
|
# cnt = 0
|
||||||
|
# except KeyError:
|
||||||
|
# if cnt == 1:
|
||||||
|
# out_sentences = out_sentences + ' ' + wrds[i]
|
||||||
|
# cnt = 1
|
||||||
|
# if cnt == 1:
|
||||||
|
# out_sentences = out_sentences + ' ' + wrds[L-1]
|
||||||
|
|
||||||
|
# NOTE: کد جایگزین دیگر
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i > L -1:
|
||||||
|
w = wrds[i] + wrds[i+1]
|
||||||
try:
|
try:
|
||||||
out_sentences = out_sentences + ' ' + self.dic2[w]
|
out_sentences = out_sentences + ' ' + self.dic2[w]
|
||||||
cnt = 0
|
i = i + 2
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if cnt == 1:
|
out_sentences = out_sentences + ' ' + wrds[i]
|
||||||
out_sentences = out_sentences + ' ' + wrds[i]
|
i = i + 1
|
||||||
cnt = 1
|
if i == L - 1:
|
||||||
if cnt == 1:
|
out_sentences = out_sentences + ' ' + wrds[L-1]
|
||||||
out_sentences = out_sentences + ' ' + wrds[i + 1]
|
|
||||||
return out_sentences
|
return out_sentences
|
||||||
|
|
||||||
def space_correction_plus3(self, doc_string):
|
def space_correction_plus3(self, doc_string):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user