Prevent space_correction_plus2() from repeating one word in two combinations
This commit is contained in:
parent
7b9537dd55
commit
d6f3f95c37
|
@ -250,18 +250,54 @@ class Normalizer():
|
|||
L = wrds.__len__()
|
||||
if L < 2:
|
||||
return doc_string
|
||||
cnt = 1
|
||||
for i in range(0, L - 1):
|
||||
w = wrds[i] + wrds[i + 1]
|
||||
|
||||
# NOTE: مشکل کد زیر :
|
||||
# NOTE: یک کلمه می تواند هم با کلمه قبل و هم با کلمه بعد ترکیب موفق درست کند و این باعث تکرار یک تک کلمه در دو ترکیب می شود.
|
||||
|
||||
# cnt = 1
|
||||
# for i in range(0, L - 1):
|
||||
# w = wrds[i] + wrds[i + 1]
|
||||
# try:
|
||||
# out_sentences = out_sentences + ' ' + self.dic2[w]
|
||||
# cnt = 0
|
||||
# except KeyError:
|
||||
# if cnt == 1:
|
||||
# out_sentences = out_sentences + ' ' + wrds[i]
|
||||
# cnt = 1
|
||||
# if cnt == 1:
|
||||
# out_sentences = out_sentences + ' ' + wrds[i + 1]
|
||||
|
||||
# NOTE: کد جایگزین
|
||||
|
||||
# cnt = 1
|
||||
# for i in range(L-1):
|
||||
# if cnt == 0:
|
||||
# cnt = 1
|
||||
# continue
|
||||
# w = wrds[i] + wrds[i+1]
|
||||
# try:
|
||||
# out_sentences = out_sentences + ' ' + self.dic2[w]
|
||||
# cnt = 0
|
||||
# except KeyError:
|
||||
# if cnt == 1:
|
||||
# out_sentences = out_sentences + ' ' + wrds[i]
|
||||
# cnt = 1
|
||||
# if cnt == 1:
|
||||
# out_sentences = out_sentences + ' ' + wrds[L-1]
|
||||
|
||||
# NOTE: کد جایگزین دیگر
|
||||
|
||||
i = 0
|
||||
while i > L -1:
|
||||
w = wrds[i] + wrds[i+1]
|
||||
try:
|
||||
out_sentences = out_sentences + ' ' + self.dic2[w]
|
||||
cnt = 0
|
||||
i = i + 2
|
||||
except KeyError:
|
||||
if cnt == 1:
|
||||
out_sentences = out_sentences + ' ' + wrds[i]
|
||||
cnt = 1
|
||||
if cnt == 1:
|
||||
out_sentences = out_sentences + ' ' + wrds[i + 1]
|
||||
out_sentences = out_sentences + ' ' + wrds[i]
|
||||
i = i + 1
|
||||
if i == L - 1:
|
||||
out_sentences = out_sentences + ' ' + wrds[L-1]
|
||||
return out_sentences
|
||||
|
||||
def space_correction_plus3(self, doc_string):
|
||||
|
|
Loading…
Reference in New Issue
Block a user