diff --git a/tokenizer.py b/tokenizer.py index 59694cd..946a7ad 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -16,7 +16,7 @@ class Tokenizer(): nums_list = re.findall(pattern, doc_string) doc_string = re.sub(pattern, 'floatingpointnumber', doc_string) - pattern = r'([!\.\?؟]+)[\n]*' + pattern = r'([!\.\?؟]+)\n*' tmp = re.findall(pattern, doc_string) doc_string = re.sub(pattern, self.add_tab, doc_string) @@ -32,7 +32,7 @@ class Tokenizer(): tmp = re.findall(pattern, doc_string) doc_string = re.sub(pattern, self.add_tab, doc_string) - pattern = r'[\n]+' + pattern = r'\n+' doc_string = re.sub(pattern, self.add_tab, doc_string) for number in nums_list: