diff --git a/ner_dataset/REF_finder.py b/ner_dataset/REF_finder.py
new file mode 100644
index 0000000..20e2056
--- /dev/null
+++ b/ner_dataset/REF_finder.py
@@ -0,0 +1,67 @@
+# بسم الله
+
+
+import json
+from elastic_helper import ElasticHelper 
+
+
+
+Read = open (".\data\DATASET140402_no_arefـoutput.json","r",encoding='utf8')
+RefList = json.loads(Read.read())
+path = ".\data\mj_qa_section-v02.zip"
+eh_obj = ElasticHelper()
+sections = eh_obj.iterateJsonFile(path, True)
+
+all_ref_list = []
+find_refs_list = []
+not_find_refs_list = []
+
+for index, item in enumerate(sections):
+    ref_id = item['id']
+    source = item['source']
+    content = source['content']
+    all_ref_list.append([ref_id,content.strip()])
+    
+    
+    
+n=1
+for item in RefList :
+    refID2 , Content2 , ner_list = item['id'],item['content'].strip(),item['ner']
+    x=0
+    for refID1 , Content in all_ref_list:
+        
+        if len(ner_list)==0:
+            x=1
+            continue
+        else:
+            if Content2 == Content and x == 0:
+                find_refs_list.append([refID1,refID2,Content])
+                print(f"REF ID {refID2} Found ! ... ")
+                x = 1
+    
+    if x == 0:
+        not_find_refs_list.append(refID2)
+        
+    print(f"{n} OF {len(RefList)} searched ...")
+    n+=1
+    
+    
+    
+    
+
+
+with open("foundfind_refs_list.json", "w" , encoding="utf8") as f:
+    json.dump(find_refs_list, f, indent=4, ensure_ascii=False )
+
+
+txt=''
+for id_ in not_find_refs_list:
+    txt+=f"{id_}\n"
+
+with open("not_found_ids.txt", "w",encoding="utf8") as file:
+        # نوشتن داده‌ها در فایل
+        file.write(txt)
+
+
+print("finish!")
+    
\ No newline at end of file
diff --git a/ner_dataset/__pycache__/elastic_helper.cpython-313.pyc b/ner_dataset/__pycache__/elastic_helper.cpython-313.pyc
new file mode 100644
index 0000000..38eba02
Binary files /dev/null and b/ner_dataset/__pycache__/elastic_helper.cpython-313.pyc differ
diff --git a/ner_dataset/data/mj_qa_section-v02.zip b/ner_dataset/data/mj_qa_section-v02.zip
new file mode 100644
index 0000000..3c707e9
Binary files /dev/null and b/ner_dataset/data/mj_qa_section-v02.zip differ
diff --git a/ner_dataset/elastic_helper.py b/ner_dataset/elastic_helper.py
new file mode 100644
index 0000000..aa5abe1
--- /dev/null
+++ b/ner_dataset/elastic_helper.py
@@ -0,0 +1,677 @@
+import zipfile
+import sys
+import os
+import json
+from time import sleep
+from elasticsearch import Elasticsearch,helpers
+
+class ElasticHelper():
+    
+    counter = 0
+    total = 0
+    id = ""
+    path_mappings = os.getcwd() + '/repo/_other/'
+    
+    # def __init__(self, es_url="http://127.0.0.1:6900", es_pass="", es_user="elastic", path_mappings = ""):
+
+        # if path_mappings : 
+        #    self.path_mappings = path_mappings
+
+        # if es_pass == '' :
+        #     self.es = Elasticsearch(es_url)
+        # else:
+        #     self.es = Elasticsearch(
+        #         es_url,
+        #         http_auth=(es_user, es_pass),
+        #     )
+        
+        # print(es_url)
+        # print(self.es)
+
+        # self.success_connect = False
+        # for a in range(0,10):
+        #     try :
+        #         if not self.es.ping():
+        #             print('elastic not ping, sleep 30 s   : ', a)
+        #             sleep(5)
+        #             continue
+        #         else:
+        #             self.success_connect = True
+        #             break
+
+        #     except Exception as e:
+        #         break
+        # if not self.success_connect :
+        #     print('******','not access to elastic service')
+        #     return
+
+
+        # self.counter = 0
+        # self.total = 0
+        # self.id = ""
+
+
+    def get_doctument(self, index_name, id):
+        res = self.es.get(index=index_name, id=id)
+        return res
+    
+    def exist_doctument(self, index_name, id):
+        res = self.es.exists(index=index_name, id=id)
+        return res
+
+    def update_index_doc(self, is_update_state, index_name_o, eid, data):
+        if is_update_state:
+            resp = self.es.update(index=index_name_o, id=eid, doc=data)
+            # resp = self.es.update(index=index_name_o, id=eid, body={'doc':data})
+        else:
+            resp = self.es.index(index=index_name_o, id=eid, document=data)
+        return resp    
+
+    
+    def exportToJsonForAI(self, path_back, index_name, out_name= '', body={}, fields=[]) :
+        print('*' * 50, ' start backup -->', index_name)
+        self.counter = 0
+        sid = None
+
+        out = out_name
+        if out_name == '' :
+           out = index_name 
+
+        fout = open( path_back + "/"+ out + '.json', 'a+' , encoding='utf-8')
+
+        s_res = self.es.search(
+            index=index_name,
+            scroll='5m',
+            size=1000,
+            body=body
+        )
+        self.total = s_res["hits"]["total"]['value']
+
+        print('start index = %s' % index_name)
+        print('total = %d' % self.total)
+
+        sid = s_res['_scroll_id']
+        scroll_size = len(s_res['hits']['hits'])
+        file_count = 1
+        out_json = []
+        while scroll_size > 0:
+            "Scrolling..."
+            self.counter += scroll_size
+            print("progress -> %.2f %%" % ((self.counter / self.total)*100))
+            #############################
+            for item in s_res['hits']['hits']:                
+
+                if fields :
+                   item2={}
+                   item2['id']=item['_id']
+                   for kf in  fields :
+                       #print(kf)
+                       if kf in item['_source'] :
+                          # print(item['_source'][kf])
+                          item2[kf] = item['_source'][kf]
+                       #exit()
+                else :
+                    item2=item        
+
+                out_json.append(item2)
+
+
+            s_res = self.es.scroll(scroll_id=sid, scroll='2m', request_timeout=100000)
+            sid = s_res['_scroll_id']
+            scroll_size = len(s_res['hits']['hits'])
+
+        sid = None
+        text = json.dumps(out_json, ensure_ascii=False) 
+        fout.write(text)
+
+        ##############################
+
+    def backupIndexToZipfile(self, path_back, index_name, out_name= '', body={}, byzip = True, fields=[], noFields=[]) :
+        print('*' * 50, ' start backup -->', index_name)
+        self.counter = 0
+        sid = None 
+
+        out = out_name
+        if out_name == '' :
+           out = index_name 
+
+
+        if body == {} :
+            s_res = self.es.search(
+                index=index_name,
+                scroll='5m',
+                size=1000
+            )
+        else:
+            s_res = self.es.search(
+                index=index_name,
+                scroll='5m',
+                size=1000,
+                body=body
+            )
+    
+        self.total = s_res["hits"]["total"]['value']
+        if self.total == 0 :
+           print('total index_name by query = %d' % self.total)
+           return False
+
+        if byzip:
+            fout = zipfile.ZipFile(path_back + "/"+ out + '.zip', 'w')
+        else:    
+            fout = open( path_back + "/"+ out + '.json', 'a+' , encoding='utf-8')
+
+
+        print('start index = %s' % index_name)
+        print('total = %d' % self.total)
+
+        sid = s_res['_scroll_id']
+        scroll_size = len(s_res['hits']['hits'])
+        file_count = 1
+        while scroll_size > 0:
+            "Scrolling..."
+            self.counter += scroll_size
+            print("progress -> %.2f %%" % ((self.counter / self.total)*100))
+            #############################
+            out_json = []
+            for item in s_res['hits']['hits']:                
+                if fields :
+                   item2={}
+                   item2['id']=item['_id']
+                   item2['_source']={}
+                   for kf in  fields :
+                       if kf in item['_source'] :
+                          item2['_source'][kf] = item['_source'][kf]
+                else :
+                    item2=item   
+
+                if noFields :
+                    for kf in  noFields :
+                       if kf in item2['_source']:
+                          del item2['_source'][kf]
+                          
+
+                out_json.append(item2)
+
+
+            text = json.dumps(out_json, ensure_ascii=False) 
+            out_json = []
+            if byzip:
+                filename = out + str(file_count) + '.json'    
+                file_count +=1
+                fout.writestr(filename, text.encode('utf-8'), zipfile.ZIP_DEFLATED )
+            else:    
+                fout.write(text)
+
+            ##############################
+            s_res = self.es.scroll(scroll_id=sid, scroll='2m', request_timeout=100000)
+            sid = s_res['_scroll_id']
+            scroll_size = len(s_res['hits']['hits'])
+        sid = None    
+        fout.close()
+
+
+    def restorFileToElastic(self, path_back, index_name, app_key = '', queryDelete = True, map_name='') :
+        if not os.path.exists(path_back) : 
+            print(' **** error *** path not exist: ', path_back)  
+            return False
+
+        file_path = path_back + '/' + index_name + '.zip'
+        if not os.path.exists(file_path ) :
+            return False
+
+        if queryDelete :
+            # اگر وجود داشته باشد، از کاربر برای حذفش سوال میکند
+            if  self.deleteIndex(index_name) :
+                self.createIndex(index_name, app_key, map_name)
+                self.zipFileToElastic(file_path, index_name)
+        else : # اگر وجود داشته باشد پرش می کند و کاری نمیکند
+            self.createIndex(index_name, app_key, map_name)
+            self.zipFileToElastic(file_path, index_name) 
+
+    def restorFileToElastic2(self, path_file, index_name, app_key = '', queryDelete = True, map_name='') :
+        if not os.path.exists(path_file) : 
+            print(' **** error *** path not exist: ', path_file)  
+            return False
+
+        file_path = path_file
+        if not os.path.exists(file_path ) :
+            return False
+
+        if queryDelete :
+            # اگر وجود داشته باشد، از کاربر برای حذفش سوال میکند
+            if  self.deleteIndex(index_name) :
+                self.createIndex(index_name, app_key, map_name)
+                self.zipFileToElastic(file_path, index_name)
+        else : # اگر وجود داشته باشد پرش می کند و کاری نمیکند
+            self.createIndex(index_name, app_key, map_name)
+            self.zipFileToElastic(file_path, index_name) 
+
+
+    def renameElasticIndex(self, index_name_i, index_name_o, app_key = '', map_name='') :
+
+        if self.createIndex(index_name_o, app_key, map_name) :
+           res = self.es.reindex(
+                    body={
+                        "source": {"index": index_name_i},
+                        "dest": {"index": index_name_o}
+                    },
+                    wait_for_completion=False)
+           
+           print(type(res))
+           print(res)
+
+           taskid = res["task"] if res["task"] else ""
+           #tasks = client.TasksClient(self.es)
+           tasks = self.es.tasks
+           while True :               
+               res = tasks.get(task_id = taskid)
+               if res["completed"] :
+                  break
+
+               # print( res["task"])
+               print( '----', index_name_o, '  imported : ', res["task"]["status"]["total"] , ' / ', res["task"]["status"]["created"])
+               sleep(1)
+           print( '----', index_name_o, '  complated')
+
+
+    def deleteIndex(self, index_name) :
+        if not self.es.indices.exists(index=index_name) :
+            print(' ' * 10, " for delete NOT exist index :", index_name )
+            return True
+
+        question = 'Is DELETE elastic index (' + index_name +') ? ' 
+        if self.query_yes_no(question) :
+           self.es.indices.delete(index = index_name)  
+           print('%' * 10 , "  Finish DELETE  index :", index_name )
+           return True
+        else :
+            return False   
+
+    def query_yes_no(self, question, default="no"):
+        valid = { "yes": True, "y": True, "ye": True, "no": False, "n": False }
+        if default is None:
+            prompt = " [y/n] "
+        elif default == "yes":
+            prompt = " [Y/n] "
+        elif default == "no":
+            prompt = " [y/N] " 
+        else:
+            raise ValueError("invalid default answer: '%s'" % default)
+
+        while True:
+            print('%'*10, '  quistion ', '%'*10 , '\n')
+            sys.stdout.write(question + prompt) 
+            choice = input().lower()
+            if default is not None and choice == "":
+                return valid[default]
+            elif choice in valid:
+                return valid[choice]
+            else:
+                sys.stdout.write("لطفا یکی از موارد روبرو را وارد کنید : 'yes' or 'no' " "(or 'y' or 'n').\n") 
+
+    def createIndexIfNotExist(self, index_name_o, mapping_o=""):
+        try:
+            if not self.es.indices.exists(index=index_name_o):
+                response = self.es.indices.create(index=index_name_o, body=mapping_o)
+                # print out the response:
+                print("create index response:", response)
+        except:
+            print("....... index exist ! ... not created")
+
+
+    def createIndex(self, index_name, app_key='', map_name=''):
+
+        path_base = self.path_mappings
+        path_mapping1 =  path_base + 'general/'
+        if app_key == '' :
+           app_key = 'tavasi'
+        path_mapping2 =  path_base + app_key + '/'
+
+
+        if map_name == '':
+            map_name = index_name
+            
+        if self.es.indices.exists(index=index_name) :
+            print("============== exist index :", index_name )
+            return True  
+
+        if map_name == 'mj_rg_section' or map_name == 'semantic_search' :
+            map_name = 'mj_qa_section'
+        elif map_name[-3]=='_ai':
+            map_name=[0-len(map_name)-3]
+            print(map_name)
+    
+        mapping_file_path = path_mapping1 + map_name + '.json'
+        print("mapping_file_path : " , mapping_file_path)
+        if not os.path.isfile(mapping_file_path):
+            if not os.path.isfile(mapping_file_path):
+                mapping_file_path = path_mapping2 + map_name + '.json'
+
+        print("mapping_file_path : " , mapping_file_path)
+
+        # Create Index With Mapping
+        if os.path.isfile(mapping_file_path):
+            mapping_file = open( mapping_file_path,'r', encoding='utf-8' )
+            mapping_file_read = mapping_file.read()
+            mapping_data = json.loads(mapping_file_read)
+            mapping_file.close()   
+            if self.es.indices.exists(index=index_name) :
+                print("============== exist index :", index_name )
+            else :                   
+                self.es.indices.create(index = index_name , body = mapping_data) 
+            return True   
+        else:       
+            print('*** error not find maping file elastic : *******',  mapping_file_path)     
+            return False
+
+
+    def updateBulkList(self, listData, index_name):
+        chunk_size=1000
+        raise_on_error=False
+        raise_on_exception=False
+        stats_only=True
+        yield_ok = False
+
+        actions=[]
+        for item in listData:
+            actions.append({
+                            "_op_type": "update", 
+                            "_index": index_name,
+                            "_id"   : item['_id'],
+                            "doc": item['_source']
+                        } 
+            )                                       
+        helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok )
+
+    def importBulkList(self, listData, index_name):
+        chunk_size=100000
+        raise_on_error=False
+        raise_on_exception=False
+        stats_only=True
+        yield_ok = False
+
+        for item in listData:
+            actions = [{
+                            "_op_type": "index", 
+                            "_index": index_name,
+                            "_id"   : item['_id'],
+                            "_source": item['_source']
+                        } 
+                    ]                                             
+            helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok )
+
+
+    def importJsonDataToElastic(self, jsonData, index_name, fields=[]):
+        chunk_size=1000
+        raise_on_error=False
+        raise_on_exception=False
+        stats_only=True
+        yield_ok = False
+
+        actions=[]
+
+        for item in jsonData:
+            id = item['_id'] if item['_id'] else item['id']
+            source = item['_source']
+            if fields :
+                source = {}
+                for col in fields :
+                    if col in item['_source'] :
+                      source[col] = item['_source']
+
+
+            actions.append({
+                            "_op_type": "index", 
+                            "_index": index_name,
+                            "_id"   : id,
+                            "_source": source
+                        }) 
+        helpers.bulk(self.es, actions, chunk_size, raise_on_error, raise_on_exception, stats_only, yield_ok )
+                      
+
+    def fileToElastic(self, file_path, index_name, limit_pack = -1, fields=[]):
+            if not os.path.exists(file_path):
+                print("file zip:" , file_path , " not exist")
+                return
+            print("index:" , index_name , '=>' , file_path ) 
+            self.counter = 0
+            with open(file_path) as file:  
+                data = json.loads(file.read())
+                self.importJsonDataToElastic(data, index_name, fields)
+
+            self.es.indices.refresh(index=index_name)
+            print(self.es.cat.count(index=index_name, format="json"))
+                                    
+    def zipFileToElastic(self, file_path, index_name, limit_pack = -1, fields=[]):
+            if not os.path.exists(file_path):
+                print("file zip:" , file_path , " not exist for imort to elastic : ", index_name )
+                return
+
+            fileNo = 0
+            with zipfile.ZipFile(file_path, 'r') as zObject:
+                fileNo +=1
+                print("="*10, " zip fileNo: " , fileNo ,"  - ( ", index_name," ) | File Numbers:" ,len(zObject.namelist()) , "=" * 10)
+
+                packNo = 0   
+                self.counter = 0
+                for filename in zObject.namelist(): 
+                    packNo += 1
+                    if limit_pack != -1 :
+                        if packNo > limit_pack :
+                            print('limit_data  ', index_name, '  ', limit_pack)
+                            break
+
+                    print("index:" , index_name , '=>' , filename ) 
+                    with zObject.open(filename) as file:  
+                        data = json.loads(file.read())
+                        self.importJsonDataToElastic(data, index_name, fields)
+      
+                self.es.indices.refresh(index=index_name)
+                print(self.es.cat.count(index=index_name, format="json"))
+                print(" END Of Import to elastic ", index_name ,"\n")
+        
+
+    def iterateJsonFile(self, file_path, isZip=True, limit_pack = -1):
+            if not os.path.exists(file_path):
+                print("file zip:" , file_path , " not exist  iterateJsonFile " )
+                return
+
+            if isZip :
+                fileNo = 0
+                with zipfile.ZipFile(file_path, 'r') as zObject:
+                    fileNo +=1
+                    print("="*10, " zip fileNo: " , fileNo ,"  iterateJsonFile - | File Numbers:" ,len(zObject.namelist()) , "=" * 10)
+
+                    packNo = 0   
+                    self.counter = 0
+                    for filename in zObject.namelist(): 
+                        packNo += 1
+                        if limit_pack != -1 :
+                            if packNo > limit_pack :
+                                print('limit_data  iterateJsonFile   ', limit_pack)
+                                break
+
+                        print("index iterateJsonFile :", '=>' , filename ) 
+                        with zObject.open(filename) as file:  
+                            data = json.loads(file.read())
+                            # Yield each entry
+                            # yield data
+                            yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in data)
+            else :
+                with open(filename, 'r', encoding='utf-8') as file:  
+                    data = json.loads(file.read())
+                    # Yield each entry
+                    # yield from (hit for hit in data)
+                    #return data
+                    yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in data)
+    
+
+    def es_iterate_all_documents(self, index, body="", pagesize=250, scroll_timeout="25m", **kwargs):
+        """
+        Helper to iterate ALL values from a single index
+        Yields all the documents.
+        """
+        is_first = True
+        while True:
+            # Scroll next
+            if is_first:  # Initialize scroll
+                # result = self.es.search(index=index, scroll="2m", **kwargs, body={
+                #     "size": pagesize
+                # })
+                if body : 
+                    result = self.es.search(
+                        index=index,
+                        scroll=scroll_timeout,
+                        **kwargs,
+                        size=pagesize,
+                        body=body
+                    )
+                else :
+                    result = self.es.search(
+                        index=index,
+                        scroll=scroll_timeout,
+                        **kwargs,
+                        size=pagesize
+                    )
+
+                self.total = result["hits"]["total"]["value"]
+                if self.total > 0:
+                    print("total = %d" % self.total)
+                is_first = False
+            else:
+                # result = es.scroll(body={
+                #     "scroll_id": scroll_id,
+                #     "scroll": scroll_timeout
+                # })
+                result = self.es.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
+               
+            scroll_id = result["_scroll_id"]
+            hits = result["hits"]["hits"]
+            self.counter += len(hits)
+            if self.total > 0 :
+                print("progress -> %.2f %%" % ((self.counter / self.total) * 100))
+            # Stop after no more docs
+            if not hits:
+                break
+            # Yield each entry
+            yield from ({"source": hit["_source"], "id": hit["_id"]} for hit in hits)
+
+
+    def moveCustomFileds(self, index_name_i, index_name_o, fields=[], renameFileds={}):
+        try:
+            body = {}
+            list = []
+            try:
+                list = self.es_iterate_all_documents(index_name_i)
+            except Exception as e:
+                print(e)
+
+            count = 0
+            for mentry in list:
+                count += 1
+               
+                entry = mentry["source"]
+                id = mentry["id"]
+                # print(id)
+                eid = id
+
+                if (count % 100) == 0 :   
+                    print("%s -> %.2f " % (id , (count / self.total) if self.total > 0 else 0))
+
+                data_filled = False
+                data = {}
+                for col in fields:
+
+                    if '.' in col :
+                        cols = col.split('.')
+                        subsource = entry 
+                        for sub in cols :
+                            dCol = subsource.get(sub, None)
+                            if dCol :
+                               subsource = dCol
+                            else :
+                                break
+                    else : 
+                        dCol = entry.get(col, None)
+
+                    if dCol is None:
+                        continue
+
+                    if col in renameFileds :                       
+                        data[renameFileds[col]] = dCol    
+                    else:
+                        data[col] = dCol    
+
+                    data_filled = True
+
+                if not data_filled :
+                    continue
+
+                try:
+                    resp = self.update_index_doc(True, index_name_o, eid, data)
+                except Exception as e:
+                    print(e)
+                    # save_error(id, e)
+
+        except Exception as e:
+            # print("1111")
+            print(e)
+
+            # save_error(id, e)
+
+    def mappingIndex(self, index_name_i):
+        # فقط از طریق کیبانا میشه تغییر مپ داد 
+        
+        #  با پایتون نمیشه 
+        # باید ایندکس جدیدی با مپ مطلوب ایجاد کرد و رایندکس کرد
+        pass
+
+    def updateByQueryIndex(self, index_name_i, body):
+        ## sample
+        # body = {
+        #         "script": {
+        #             "inline": "ctx._source.Device='Test'",
+        #             "lang": "painless"
+        #         },
+        #         "query": {
+        #             "match": {
+        #                 "Device": "Boiler"
+        #             }
+        #         }
+        #     }
+        try:
+            self.es.update_by_query(body=body, index=index_name_i)
+        
+        except Exception as e:
+            print(e)
+            # save_error(id, e)    
+
+
+    def deleteByQueryIndex(self, index_name_i, body):
+        ## sample
+        # body = {
+        #         "query": {
+        #             "match": {
+        #                 "Device": "Boiler"
+        #             }
+        #         }
+        #     }
+        try:
+            self.es.delete_by_query(index=index_name_i, body=body )
+        
+        except Exception as e:
+            print(e)
+            # save_error(id, e)   
+
+    def delete_by_ids(self, index_name_i, ids):
+        try:
+            # ids = ['test1', 'test2', 'test3'] 
+
+            query = {"query": {"terms": {"_id": ids}}}
+            res = self.es.delete_by_query(index=index_name_i, body=query)
+            print(res)                    
+            
+        except Exception as e:
+            print(e)
+            # save_error(id, e)   
+
diff --git a/ner_dataset/find_law.py b/ner_dataset/find_law.py
new file mode 100644
index 0000000..2bad9e6
--- /dev/null
+++ b/ner_dataset/find_law.py
@@ -0,0 +1,334 @@
+# بسم الله 
+
+
+
+from elastic_helper import ElasticHelper
+from thefuzz import fuzz
+import json
+
+
+
+
+
+Read = open ('.\data\DATASET140402_no_arefـoutput.json',"r",encoding='utf8')
+RefList = json.loads(Read.read())
+path = ".\\data\\mj_qa_section-v02.zip"
+eh_obj = ElasticHelper()
+sections = eh_obj.iterateJsonFile(path, True)
+
+
+no_found_id = []
+txt_file = open(".\\no_find_txt.txt" , "r" , encoding="utf8")
+n = 0
+for line in txt_file:
+    if n != 0:
+        no_found_id.append(int(line.strip()))
+        n=0
+        continue
+    n = 1
+
+
+all_law_dict = []
+for index, item in enumerate(sections):
+    ref_id = item['id']
+    source = item['source']
+    content = source['content'].strip()
+    all_law_dict.append({"id":ref_id , "caption":content, "approve_date":source['ts_date']})
+
+
+
+
+
+def law_dict_saver(law_id,start_token_index,end_token_index,found_law_list,law_captions,matched_string,original_string,multi_flag):
+    
+        dict = {
+                                "law_id"        : law_id,
+                                "start_token_index": start_token_index,
+                                "end_token_index"  : end_token_index,
+                                "found_law_list": found_law_list,
+                                "law_captions"  : law_captions,
+                                "matched_string": matched_string,
+                                "original_string": original_string,
+                                "multi_flag": multi_flag
+                                }
+        return dict
+    
+def remove_latest_added_token(text):
+        temp = text.strip().split(' ')
+        temp.pop()
+        text = ''
+        for token in temp:
+            text = text + ' ' + token
+        
+        return text.strip()
+
+def law_recognizer(text, law_dict):
+
+        i = 0
+
+        normalized_content = text
+        text_token_list = normalized_content.strip().split()
+        matched_token_index_list = []
+        
+        # جمع آوری عناوین احتمالی قانون در یک متن بر اساس کلیدواژه قانون
+        for index,token in enumerate(text_token_list):
+            if 'قانون' in token:
+                matched_token_index_list.append(index)
+            
+        content_token_list = []
+        law_token_list     = []
+        for index, item in enumerate(matched_token_index_list):
+            # اگر آیتم، آخرین عنصر موجود در آرایه نبود ...
+            
+            end = 12  # در اینجا مشخص میکنیم چند کلمه را بررسی کند و حلقه بررسی چندبار تکرار شود
+
+            if item < len(text_token_list):
+                # نُه توکن بعدی را به عنوان عبارات تکمیلی احتمالی عنوان قانون ذخیره می کنیم
+                if item + end < len(text_token_list):
+                    for i in range(end):
+                        if item + (i+1) >= len(text_token_list):
+                            break
+                        content_token_list.append(text_token_list[item + (i+1)])
+                    i = 0
+                # توکن های باقیمانده(که کمتر از نُه توکن است) تا پایان آرایه را ذخیره کن
+                else:
+                    j = 0
+                    while j < len(text_token_list)-index:
+                        if item + (j+1) >= len(text_token_list)-index:
+                            break
+                        content_token_list.append(text_token_list[item + (j+1)])
+                        j += 1
+                    j = 0
+            law_token_list.append({
+                'start_token_index': item,
+                'law_token'        : content_token_list
+                })
+
+            if len(content_token_list) < end :   # اگر مقدار کلمات انتخابی برای بررسی از طول کلمات جمله بیشتر بود
+                end = len(content_token_list)  # کلمات انتخابی برای بررسی را به اندازه کل کلمات جمله قرار بده
+
+            content_token_list = []
+        matched_law_list = [] 
+        c = 0 
+
+
+        for key, law_value in enumerate(law_token_list):
+            c += 1
+            law_token         = law_value['law_token']
+            start_token_index = law_value['start_token_index']
+            end_token_index   = 0
+            found_law_list_1     = []
+            found_law_list_2     = []
+            found_law_list_3     = []
+            # اگر تعداد توکن های متنی که احتمالا عنوان یک قانون است، صفر بود،
+            # از حلقه خارج می شویم و به سراغ بررسی عنوان قانون بعدی می رویم
+            if len(law_token) < 1:
+                break
+
+            # در ابتدا اولین توکن عبارتی که احتمالا عنوان یک قانون است را در عنوان قانون موجود در بانک بررسی می کنیم
+            # در مراحل بعدی تا به نُه گام برسیم، یکی یکی توکن ها را به توکن اول اضافه و سپس با عناوین قانون ها مقایسه می کنیم
+            law_section = law_token[0]
+            for index, value in enumerate(law_dict):
+                # عنوان قانونی که در حال مقایسه متن مورد نظر با آن هستیم
+                id = value['id']
+                current_caption = value['caption']
+                current_approve_date = value['approve_date']
+                # بررسی وجود عبارت مورد نظر در عنوان قانون
+                if current_caption.__contains__(law_section):
+                    # به دست آوردن اولین توکن از عنوان قانون
+                    current_law_first_token = current_caption.strip().split(' ')[0]
+                    # اگر اولین توکن از عنوان قانون برابر با کلمه "قانون" بود، این کلمه را نادیده میگیریم
+                    # زیرا در لیست مربوط به لیست توکن های احتمالی مربوط به قوانین، کلمه قانون را در نظر نگرفته ایم 
+                    if current_law_first_token == 'قانون':
+                        current_law_first_token = current_caption.strip().split(' ')[1]
+                    if law_section == current_law_first_token:
+                        # اگر زیر رشته موردنظر ما در عنوان قانون وجود داشت، نام قانون را در یک لیست ذخیره می کنیم. 
+                        # در مرحله بعد متن احتمالی قانون که در حال بررسی آن هستیم را با این لیست مقایسه می کنیم تا مقایسه محدود تری داشته باشیم
+                        found_law_list_1.append({"id": id ,"caption": current_caption, "approve_date":current_approve_date})
+                    else:
+                        continue
+        
+
+            X = 0
+            FoundLawList=[]
+            OldFoundLawList=[]
+            NewFoundLawList=[]
+            while X < end-1 :
+            # for x in range(end):
+
+
+                X+=1
+                if X == 1:  # در بررسی توکن اول وارد این شرط میشود
+                    if len(found_law_list_1) == 0:
+                        # X= X+1
+                        continue
+                    else:
+                        # X=X+1
+                        if len(found_law_list_1) == 1:
+                            found_law = []
+                            found_law.append(found_law_list_1.pop())
+                            k = 0
+                            matched_string = ''
+                            found_law_caption = found_law[0]['caption'].strip()
+                            if found_law_caption.startswith('قانون'):
+                                found_law_caption = found_law_caption[5:]
+                            found_law_caption_tokens = found_law_caption.strip().split()
+                            for k in range(len(law_token)):
+                                if k >= len(found_law_caption_tokens):
+                                    break
+                                if law_token[k] == found_law_caption_tokens[k]:
+                                    matched_string += law_token[k] + ' '
+                                else:
+                                    end_token_index = start_token_index + len(matched_string.strip().split())
+                                    found_law_dict = law_dict_saver(found_law[0]['id'],start_token_index,end_token_index,found_law,found_law[0]['caption'],matched_string.strip(),law_token,False)
+                                    matched_law_list.append(found_law_dict)
+
+                                    break
+                                    
+                                end_token_index = start_token_index + len(matched_string.strip().split())
+                                found_law_dict = law_dict_saver(found_law[0]['id'],start_token_index,end_token_index,found_law,found_law[0]['caption'],matched_string.strip(),law_token,False)
+                                matched_law_list.append(found_law_dict)
+
+                            continue
+
+                        if len(law_token) < 2:
+                            continue
+                            
+                        law_section = law_token[0]+' '+law_token[1]
+                        for value in found_law_list_1:
+                            id = value['id']
+                            current_caption = value['caption']
+                            current_approve_date = value['approve_date']
+                            rate = fuzz.token_set_ratio(current_caption,law_section)
+                            if rate == 100:
+                                found_law_list_2.append({"id": id ,"caption": current_caption, "approve_date":current_approve_date})
+                    
+                    FoundLawList = found_law_list_1
+                    NewFoundLawList = found_law_list_2
+                    continue
+
+                OldFoundLawList = FoundLawList
+                FoundLawList = NewFoundLawList
+                NewFoundLawList = []
+                
+                if X == int(end-1): # در بررسی آخرین کلمه وارد این شرط میشود
+                    if len(FoundLawList) == 0:
+                        # اگر در مرحله قبل بیش از یک مورد پیدا کرده اما در این مرحله تعداد موارد مشابه به صفر رسیده
+                        if len(OldFoundLawList) > 1 and len(OldFoundLawList) < 6:
+                            # به دقت کنترل شود
+                            # مرتب سازی بر اساس قدیم به جدیدترین شناسه
+                            sorted_found_law_list   = sorted(OldFoundLawList, key=lambda x: x['approve_date'])
+                            found_law               = sorted_found_law_list.pop()
+                            end_token_index = start_token_index + len(law_section.strip().split())
+                            # آخرین توکنی که اخیرا به عنوان قانون اضافه شده را باید برگردانیم
+                            # زیرا متناظر با این توکن اضافه شده، عنوان قانونی پیدا نشده
+                            law_section = remove_latest_added_token(law_section)
+                            found_law_dict = law_dict_saver(found_law['id'],start_token_index,end_token_index,sorted_found_law_list,found_law['caption'],law_section,law_token,True)
+                            matched_law_list.append(found_law_dict)
+                        continue
+                    else:
+                        if len(FoundLawList) == 1:
+                            sorted_found_law_list   = sorted(FoundLawList, key=lambda x: x['approve_date'])
+                            found_law = []
+                            found_law.append(FoundLawList.pop())
+                            end_token_index = start_token_index + len(law_section.strip().split())
+                            found_law_dict = law_dict_saver(found_law[0]['id'],start_token_index,end_token_index,found_law,found_law[0]['caption'],law_section,law_token,False)
+                            matched_law_list.append(found_law_dict)
+
+                        elif len(FoundLawList) > 1 and len(FoundLawList) < 6:
+                            sorted_found_law_list   = sorted(OldFoundLawList, key=lambda x: x['approve_date'] )
+                            found_law               = sorted_found_law_list.pop()
+                            end_token_index = start_token_index + len(law_section.strip().split())
+                            found_law_dict = law_dict_saver(found_law['id'],start_token_index,end_token_index,FoundLawList,found_law['caption'],law_section,law_token,True)
+                            matched_law_list.append(found_law_dict)
+                    break
+                
+                if len(FoundLawList) == 0:
+                    # اگر در مرحله قبل بیش از یک مورد پیدا کرده اما در این مرحله تعداد موارد مشابه به صفر رسیده
+                    if len(OldFoundLawList) > 1 and len(OldFoundLawList) < 6:
+                        # به دقت کنترل شود
+                        # مرتب سازی بر اساس قدیم به جدیدترین شناسه
+                        sorted_found_law_list   = sorted(OldFoundLawList, key=lambda x: x['approve_date'])
+                        found_law               = sorted_found_law_list.pop()
+                        end_token_index = start_token_index + len(law_section.strip().split())
+                        # آخرین توکنی که اخیرا به عنوان قانون اضافه شده را باید برگردانیم
+                        # زیرا متناظر با این توکن اضافه شده، عنوان قانونی پیدا نشده
+                        law_section = remove_latest_added_token(law_section)
+                        found_law_dict = law_dict_saver(found_law['id'],start_token_index,end_token_index,sorted_found_law_list,found_law['caption'],law_section,law_token,True)
+                        matched_law_list.append(found_law_dict)
+                    continue
+                else:
+                    if len(FoundLawList) == 1:
+                        found_law = []
+                        found_law.append(FoundLawList.pop()) #             = found_law_list_2.pop()
+                        end_token_index = start_token_index + len(law_section.strip().split())
+                        found_law_dict = law_dict_saver(found_law[0]['id'],start_token_index,end_token_index,found_law,found_law[0]['caption'],law_section,law_token,False)
+                        matched_law_list.append(found_law_dict)
+                        # اگر در جستجوی عنوان قانون، به یک مورد منحصر به فرد رسیده بودیم، فقط همین یک عنوان را ذخیره کند
+                        continue
+
+                    if len(law_token) < X+1 :
+                        continue
+                    law_section += ' ' + law_token[X] # X = new token
+                    # law_section     =  List_Law_tokens
+                    for value in FoundLawList:
+                        id = value['id']
+                        current_caption = value['caption']
+                        current_approve_date = value['approve_date']
+                        rate = fuzz.token_set_ratio(current_caption,law_section)
+
+                        if rate == 100:
+
+                            NewFoundLawList.append({"id": id ,"caption": current_caption, "approve_date":current_approve_date})
+
+                # OldFoundLawList=FoundLawList
+                # FoundLawList=NewFoundLawList
+                # NewFoundLawList=[]
+        
+        if matched_law_list:
+            for law_item in matched_law_list:
+                temp_list = []
+                found_list = law_item['found_law_list']
+                for item in found_list:
+                    temp_list.append(item['caption'] + '#' + str(item['id']) + '#' + item['approve_date'])
+                        
+                law_item['found_law_list'] = temp_list
+        return matched_law_list, law_token_list
+    
+n=0
+not_found_ids = []
+all_laws_founded = []
+for section in RefList :
+    
+    refID , Content , ner_list = section['id'],section['content'].strip(),section['ner']
+    print(f"ID {refID} is searching... ")
+    if refID in no_found_id:
+        matched_law_list, law_token_list = law_recognizer(Content, all_law_dict )
+        matched_law_list_ids = []
+        matched_law_list_content = []
+        for law in matched_law_list:
+            matched_law_list_ids.append(law['law_id'])
+            matched_law_list_content.append(law['law_captions'])
+        if len(matched_law_list) != 0 :
+            n+=1
+            all_laws_founded.append({"dataset-REF":{"id":refID,"content":Content},
+                                 "All-REF":{"id":matched_law_list_ids,"content":matched_law_list_content}})
+        
+        else: 
+            not_found_ids.append(refID)
+
+
+
+txt=''
+for id_ in not_found_ids:
+    txt+=f"{id_}\n"
+
+with open("not_found_idsX.txt", "w",encoding="utf8") as file:
+        # نوشتن داده‌ها در فایل
+        file.write(txt)
+
+with open("founded_lawsX.json", "w" , encoding="utf8") as f:
+    json.dump(all_laws_founded, f, indent=4, ensure_ascii=False )
+
+print(f"{n} Law Founded ! ")
+print(f"{len(not_found_ids)} Law Not Founded ! ")
\ No newline at end of file
diff --git a/ner_dataset/normalizer.py b/ner_dataset/normalizer.py
new file mode 100644
index 0000000..bb541a5
--- /dev/null
+++ b/ner_dataset/normalizer.py
@@ -0,0 +1,1413 @@
+from re import sub
+import copy
+import os
+from tokenizer import Tokenizer
+from data_helper import DataHelper
+import traceback,sys
+class Normalizer():
+
+    def __init__(self,
+                 half_space_char='\u200c',
+                 date_normalizing_needed=False,
+                 pinglish_conversion_needed=False,
+                 train_file_path="resource/tokenizer/Bijan_khan_chunk.txt",
+                 token_merger_path="resource/tokenizer/TokenMerger.pckl"):
+        self.dir_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+
+        self.dic1_path = self.dir_path + 'resource/normalizer/Dic1_new.txt'
+        self.dic2_path = self.dir_path + 'resource/normalizer/Dic2_new.txt'
+        self.dic3_path = self.dir_path + 'resource/normalizer/Dic3_new.txt'
+        self.dic1 = self.load_dictionary(self.dic1_path)
+        self.dic2 = self.load_dictionary(self.dic2_path)
+        self.dic3 = self.load_dictionary(self.dic3_path)
+
+        self.date_normalizing_needed = date_normalizing_needed
+        self.pinglish_conversion_needed = pinglish_conversion_needed
+        self.data_helper = DataHelper()
+
+
+        if self.date_normalizing_needed or self.pinglish_conversion_needed:
+            self.tokenizer = Tokenizer()
+            self.date_normalizer = DateNormalizer()
+            self.pinglish_conversion = PinglishNormalizer()
+
+        
+
+    def load_dictionary(self, file_path):
+        dict = {}
+        with open(file_path, 'r', encoding='utf-8') as f:
+            g = f.readlines()
+            for Wrds in g:
+                wrd = Wrds.split(' ')
+                dict[wrd[0].strip()] = sub('\n', '', wrd[1].strip())
+        return dict
+
+    def sub_alphabets(self, doc_string):
+        # try:
+        #     doc_string = doc_string.decode('utf-8')
+        # except UnicodeEncodeError:
+        #     pass
+        a0 = "ء"
+        b0 = "ئ"
+        c0 = sub(a0, b0, doc_string)
+        a1 = r"ٲ|ٱ|إ|ﺍ|أ"
+        a11 = r"ﺁ|آ"
+        b1 = r"ا"
+        b11 = r"آ"
+        c11 = sub(a11, b11, c0)
+        c1 = sub(a1, b1, c11)
+        a2 = r"ﺐ|ﺏ|ﺑ"
+        b2 = r"ب"
+        c2 = sub(a2, b2, c1)
+        a3 = r"ﭖ|ﭗ|ﭙ|ﺒ|ﭘ"
+        b3 = r"پ"
+        c3 = sub(a3, b3, c2)
+        a4 = r"ﭡ|ٺ|ٹ|ﭞ|ٿ|ټ|ﺕ|ﺗ|ﺖ|ﺘ"
+        b4 = r"ت"
+        c4 = sub(a4, b4, c3)
+        a5 = r"ﺙ|ﺛ"
+        b5 = r"ث"
+        c5 = sub(a5, b5, c4)
+        a6 = r"ﺝ|ڃ|ﺠ|ﺟ"
+        b6 = r"ج"
+        c6 = sub(a6, b6, c5)
+        a7 = r"ڃ|ﭽ|ﭼ"
+        b7 = r"چ"
+        c7 = sub(a7, b7, c6)
+        a8 = r"ﺢ|ﺤ|څ|ځ|ﺣ"
+        b8 = r"ح"
+        c8 = sub(a8, b8, c7)
+        a9 = r"ﺥ|ﺦ|ﺨ|ﺧ"
+        b9 = r"خ"
+        c9 = sub(a9, b9, c8)
+        a10 = r"ڏ|ډ|ﺪ|ﺩ"
+        b10 = r"د"
+        c10 = sub(a10, b10, c9)
+        a11 = r"ﺫ|ﺬ|ﻧ"
+        b11 = r"ذ"
+        c11 = sub(a11, b11, c10)
+        a12 = r"ڙ|ڗ|ڒ|ڑ|ڕ|ﺭ|ﺮ"
+        b12 = r"ر"
+        c12 = sub(a12, b12, c11)
+        a13 = r"ﺰ|ﺯ"
+        b13 = r"ز"
+        c13 = sub(a13, b13, c12)
+        a14 = r"ﮊ"
+        b14 = r"ژ"
+        c14 = sub(a14, b14, c13)
+        a15 = r"ݭ|ݜ|ﺱ|ﺲ|ښ|ﺴ|ﺳ"
+        b15 = r"س"
+        c15 = sub(a15, b15, c14)
+        a16 = r"ﺵ|ﺶ|ﺸ|ﺷ"
+        b16 = r"ش"
+        c16 = sub(a16, b16, c15)
+        a17 = r"ﺺ|ﺼ|ﺻ"
+        b17 = r"ص"
+        c17 = sub(a17, b17, c16)
+        a18 = r"ﺽ|ﺾ|ﺿ|ﻀ"
+        b18 = r"ض"
+        c18 = sub(a18, b18, c17)
+        a19 = r"ﻁ|ﻂ|ﻃ|ﻄ"
+        b19 = r"ط"
+        c19 = sub(a19, b19, c18)
+        a20 = r"ﻆ|ﻇ|ﻈ"
+        b20 = r"ظ"
+        c20 = sub(a20, b20, c19)
+        a21 = r"ڠ|ﻉ|ﻊ|ﻋ"
+        b21 = r"ع"
+        c21 = sub(a21, b21, c20)
+        a22 = r"ﻎ|ۼ|ﻍ|ﻐ|ﻏ"
+        b22 = r"غ"
+        c22 = sub(a22, b22, c21)
+        a23 = r"ﻒ|ﻑ|ﻔ|ﻓ"
+        b23 = r"ف"
+        c23 = sub(a23, b23, c22)
+        a24 = r"ﻕ|ڤ|ﻖ|ﻗ"
+        b24 = r"ق"
+        c24 = sub(a24, b24, c23)
+        a25 = r"ڭ|ﻚ|ﮎ|ﻜ|ﮏ|ګ|ﻛ|ﮑ|ﮐ|ڪ|ك"
+        b25 = r"ک"
+        c25 = sub(a25, b25, c24)
+        a26 = r"ﮚ|ﮒ|ﮓ|ﮕ|ﮔ"
+        b26 = r"گ"
+        c26 = sub(a26, b26, c25)
+        a27 = r"ﻝ|ﻞ|ﻠ|ڵ"
+        b27 = r"ل"
+        c27 = sub(a27, b27, c26)
+        a28 = r"ﻡ|ﻤ|ﻢ|ﻣ"
+        b28 = r"م"
+        c28 = sub(a28, b28, c27)
+        a29 = r"ڼ|ﻦ|ﻥ|ﻨ"
+        b29 = r"ن"
+        c29 = sub(a29, b29, c28)
+        a30 = r"ވ|ﯙ|ۈ|ۋ|ﺆ|ۊ|ۇ|ۏ|ۅ|ۉ|ﻭ|ﻮ|ؤ"
+        b30 = r"و"
+        c30 = sub(a30, b30, c29)
+        a31 = r"ﺔ|ﻬ|ھ|ﻩ|ﻫ|ﻪ|ۀ|ە|ة|ہ"
+        b31 = r"ه"
+        c31 = sub(a31, b31, c30)
+        a32 = r"ﭛ|ﻯ|ۍ|ﻰ|ﻱ|ﻲ|ں|ﻳ|ﻴ|ﯼ|ې|ﯽ|ﯾ|ﯿ|ێ|ے|ى|ي"
+        b32 = r"ی"
+        c32 = sub(a32, b32, c31)
+        a33 = r'¬'
+        b33 = r'‌'
+        c33 = sub(a33, b33, c32)
+        pa0 = r'•|·|●|·|・|∙|｡|ⴰ'
+        pb0 = r'.'
+        pc0 = sub(pa0, pb0, c33)
+        pa1 = r',|٬|٫|‚|，'
+        pb1 = r'،'
+        pc1 = sub(pa1, pb1, pc0)
+        pa2 = r'ʕ'
+        pb2 = r'؟'
+        pc2 = sub(pa2, pb2, pc1)
+        na0 = r'۰|٠'
+        nb0 = r'0'
+        nc0 = sub(na0, nb0, pc2)
+        na1 = r'۱|١'
+        nb1 = r'1'
+        nc1 = sub(na1, nb1, nc0)
+        na2 = r'۲|٢'
+        nb2 = r'2'
+        nc2 = sub(na2, nb2, nc1)
+        na3 = r'۳|٣'
+        nb3 = r'3'
+        nc3 = sub(na3, nb3, nc2)
+        na4 = r'۴|٤'
+        nb4 = r'4'
+        nc4 = sub(na4, nb4, nc3)
+        na5 = r'۵'
+        nb5 = r'5'
+        nc5 = sub(na5, nb5, nc4)
+        na6 = r'۶|٦'
+        nb6 = r'6'
+        nc6 = sub(na6, nb6, nc5)
+        na7 = r'۷|٧'
+        nb7 = r'7'
+        nc7 = sub(na7, nb7, nc6)
+        na8 = r'۸|٨'
+        nb8 = r'8'
+        nc8 = sub(na8, nb8, nc7)
+        na9 = r'۹|٩'
+        nb9 = r'9'
+        nc9 = sub(na9, nb9, nc8)
+        np2 = r'²'
+        nm2 = r'2'
+        ng2 = sub(np2, nm2, nc9)
+        ea1 = r'|ِ|ُ|َ|ٍ|ٌ|ً|'
+        eb1 = r''
+        ec1 = sub(ea1, eb1, ng2)
+        ea1 = r'ـ'
+        eb1 = r'_'
+        ec2 = sub(ea1, eb1, ec1)
+        Sa1 = r'( )+'
+        Sb1 = r' '
+        Sc1 = sub(Sa1, Sb1, ec2)
+        Sa2 = r'(\n)+'
+        Sb2 = r'\n'
+        Sc2 = sub(Sa2, Sb2, Sc1)
+        return Sc2
+
+    def space_correction(self, doc_string):
+        a00 = r'^(بی|می|نمی)( )'
+        b00 = r'\1‌'
+        c00 = sub(a00, b00, doc_string)
+        a0 = r'( )(می|نمی|بی)( )'
+        b0 = r'\1\2‌'
+        c0 = sub(a0, b0, c00)
+        a1 = r'( )(هایی|ها|های|ایی|هایم|هایت|هایش|هایمان|هایتان|هایشان|ات|ان|ین' \
+             r'|انی|بان|ام|ای|یم|ید|اید|اند|بودم|بودی|بود|بودیم|بودید|بودند|ست)( )'
+        b1 = r'‌\2\3'
+        c1 = sub(a1, b1, c0)
+        a2 = r'( )(شده|نشده)( )'
+        b2 = r'‌\2‌'
+        c2 = sub(a2, b2, c1)
+        a3 = r'( )(طلبان|طلب|گرایی|گرایان|شناس|شناسی|گذاری|گذار|گذاران|شناسان|گیری|پذیری|بندی|آوری|سازی|' \
+             r'بندی|کننده|کنندگان|گیری|پرداز|پردازی|پردازان|آمیز|سنجی|ریزی|داری|دهنده|آمیز|پذیری' \
+             r'|پذیر|پذیران|گر|ریز|ریزی|رسانی|یاب|یابی|گانه|گانه‌ای|انگاری|گا|بند|رسانی|دهندگان|دار)( )'
+        b3 = r'‌\2\3'
+        c3 = sub(a3, b3, c2)
+        return c3
+
+    def space_correction_plus1(self, doc_string):
+        out_sentences = ''
+        for wrd in doc_string.split(' '):
+            try:
+                out_sentences = out_sentences + ' ' + self.dic1[wrd]
+            except KeyError:
+                out_sentences = out_sentences + ' ' + wrd
+        return out_sentences
+
+    def space_correction_plus2(self, doc_string):
+        out_sentences = ''
+        wrds = doc_string.split(' ')
+        L = wrds.__len__()
+        if L < 2:
+            return doc_string
+        cnt = 1
+        for i in range(0, L - 1):
+            w = wrds[i] + wrds[i + 1]
+            try:
+                out_sentences = out_sentences + ' ' + self.dic2[w]
+                cnt = 0
+            except KeyError:
+                if cnt == 1:
+                    out_sentences = out_sentences + ' ' + wrds[i]
+                cnt = 1
+        if cnt == 1:
+            out_sentences = out_sentences + ' ' + wrds[i + 1]
+        return out_sentences
+
+    def space_correction_plus3(self, doc_string):
+        # Dict = {'گفتوگو': 'گفت‌وگو'}
+        out_sentences = ''
+        wrds = doc_string.split(' ')
+        L = wrds.__len__()
+        if L < 3:
+            return doc_string
+        cnt = 1
+        cnt2 = 0
+        for i in range(0, L - 2):
+            w = wrds[i] + wrds[i + 1] + wrds[i + 2]
+            try:
+                out_sentences = out_sentences + ' ' + self.dic3[w]
+                cnt = 0
+                cnt2 = 2
+            except KeyError:
+                if cnt == 1 and cnt2 == 0:
+                    out_sentences = out_sentences + ' ' + wrds[i]
+                else:
+                    cnt2 -= 1
+                cnt = 1
+        if cnt == 1 and cnt2 == 0:
+            out_sentences = out_sentences + ' ' + wrds[i + 1] + ' ' + wrds[i + 2]
+        elif cnt == 1 and cnt2 == 1:
+            out_sentences = out_sentences + ' ' + wrds[i + 2]
+        return out_sentences
+
+    def normalize(self, doc_string, new_line_elimination=False, return_dates = False):
+        normalized_string = normalize_content(doc_string)
+        #normalized_string = self.data_helper.clean_text(normalized_string, new_line_elimination).strip()
+
+        #normalized_string = self.space_correction(self.space_correction_plus1(self.space_correction_plus2(self.space_correction_plus3(normalized_string)))).strip()
+        
+        # !!!آئین نامه را به آئین‌نامه تبدیل می کند و دو توکن را به یکی تبدیل می کند
+        #normalized_string = (self.space_correction_plus1(self.space_correction_plus2(self.space_correction_plus3(normalized_string)))).strip()
+
+        #if self.pinglish_conversion_needed:
+            #normalized_string = self.pinglish_conversion.pingilish2persian(self.tokenizer.tokenize_words(normalized_string))
+
+        if self.date_normalizing_needed:
+            token_list = self.tokenizer.tokenize_words(normalized_string)
+            # نرمالایز کردن اعداد حروفی و ...
+            normalized_string, additional_token_index_array = self.date_normalizer.normalize_numbers2(token_list)
+            # نرمالایز و تشخیص تاریخ ها
+            normalized_string, dates, recognized_dates = self.date_normalizer.general_normalize_date(normalized_string, additional_token_index_array)
+            #normalized_string_list = normalized_string.strip().split()
+            #normalized_string, dates, recognized_dates = self.date_normalizer.normalize_dates(normalized_string_list, additional_token_index_array, token_list_len= len(normalized_string_list), previous_slice_index_array=[0,])
+            # کنترل اعدادی که پشت سر هم آمده اند و با واو از هم جدا شده اند
+            normalized_string, recognized_numbers = self.date_normalizer.handle_continuous_numbers(normalized_string)
+        # در مواردی مانند وسیصد که واو به عدد سیصد چسبیده، ابتدا این دو را از هم جدا می کنیم
+        # تا عدد اصلی را به دست بیاوریم.
+        # در این صورت یک توکن اضافه تولید می شود که با این متد، توکن های اضافی را حذف می کنیم            
+        normalized_string =self.date_normalizer.remove_additional_tokens(normalized_string.split(), additional_token_index_array)
+        if return_dates:
+            # مرتب کردن آرایه تاریخ های پیدا شده بر اساس توکن شروع عبارت حاوی تاریخ
+            recognized_dates.sort(key=lambda x: int(x['start_date_token_index']), reverse=False)
+            
+            return normalized_string, dates, recognized_dates, recognized_numbers
+        else:
+            return normalized_string
+
+
+class DateNormalizer():
+    def __init__(self):
+
+        self.month_dict = { "فروردین": 1,"فروردینماه": 1,'فروردین\u200cماه':1,
+                            "اردیبهشت": 2,"اردیبهشتماه": 2,'اردیبهشت\u200cماه':2,
+                            "خرداد": 3,"خردادماه": 3,
+                            "تیر": 4,"تیرماه": 4,
+                            "مرداد": 5,"مردادماه": 5,
+                            "شهریور": 6,"شهریورماه": 6,
+                            "مهر": 7,"مهرماه": 7,
+                            "آبان": 8,"آبانماه": 8,'آبان\u200cماه':8,
+                            "آذر": 9,"آذرماه": 9,
+                            "دی": 10,"دیماه": 10,'دی\u200cماه':10,
+                            "بهمن": 11,"بهمنماه": 11,'بهمن\u200cماه':11,
+                            "اسفند": 12,"اسفندماه": 12}
+        self.num_dict = {"صد": 100,"وصد": 100, "یکصد": 100, "ویکصد": 100, "هزار": 1000,"وهزار": 1000,
+                         "یکهزار": 1000,"ویکهزار": 1000, 
+                         "میلیون": 1000000,"ملیون": 1000000, "ومیلیون": 1000000,"وملیون": 1000000,
+                         "یکمیلیون": 1000000,"یکملیون": 1000000,
+                         "ویکمیلیون": 1000000,"ویکملیون": 1000000, "دویست": 200,"ودویست": 200,
+                         "ده": 10,"وده": 10, "نه": 9,"ونه": 9, "هشت": 8,"وهشت": 8, "هفت": 7,"وهفت": 7,
+                         "شش": 6,"وشش": 6, "پنج": 5,"وپنج": 5,"چهار": 4,"وچهار": 4, "سه": 3,"وسه": 3,
+                         "دو": 2,"ودو": 2, "یک": 1,"ویک": 1, "یازده": 11,"ویازده": 11, "سیزده": 13, "وسیزده": 13,
+                         "چهارده": 14, "دوازده": 12, "پانزده": 15, "شانزده": 16, "هفده": 17,"هیفده": 17,
+                         "وچهارده": 14, "ودوازده": 12, "وپانزده": 15, "وشانزده": 16, "وهفده": 17,"وهیفده": 17,
+                         "هجده": 18,"هیجده": 18, "نوزده": 19, "بیست": 20, "سی": 30, "چهل": 40, "پنجاه": 50,
+                         "وهجده": 18,"وهیجده": 18, "ونوزده": 19, "وبیست": 20, "وسی": 30, "وچهل": 40, "وپنجاه": 50,
+                         "شصت": 60, "هفتاد": 70, "نود": 90, "سیصد": 300, "چهارصد": 400,
+                         "وشصت": 60, "وهفتاد": 70, "ونود": 90, "وسیصد": 300, "وچهارصد": 400,
+                         "پانصد": 500, "ششصد": 600, "هفتصد": 700, "هشتصد": 800, "نهصد": 900,
+                         "وپانصد": 500, "وششصد": 600, "وهفتصد": 700, "وهشتصد": 800, "ونهصد": 900,
+                         "هشتاد": 80, " ": 0, "میلیارد": 1000000000,"ملیارد": 1000000000,
+                         "یکمیلیارد": 1000000000,"یکملیارد": 1000000000,
+                         "وهشتاد": 80, " ": 0, "ومیلیارد": 1000000000,"وملیارد": 1000000000,
+                         "ویکمیلیارد": 1000000000,"ویکملیارد": 1000000000,
+                         "صدم": 100, "هزارم": 1000, "دویستم": 200,
+                         "وصدم": 100, "وهزارم": 1000, "ودویستم": 200,
+                         "دهم": 10, "نهم": 9, "هشتم": 8, "هفتم": 7, "ششم": 6, "پنجم": 5,
+                         "ودهم": 10, "ونهم": 9, "وهشتم": 8, "وهفتم": 7, "وششم": 6, "وپنجم": 5,
+                         "چهارم": 4, "سوم": 3, "دوم": 2, "یکم": 1, "اول": 1, "یازدهم": 11, "سیزدهم": 13,
+                         "وچهارم": 4, "وسوم": 3, "ودوم": 2, "ویکم": 1, "واول": 1, "ویازدهم": 11, "وسیزدهم": 13,
+                         "چهاردهم": 14, "دوازدهم": 12, "پانزدهم": 15, "شانزدهم": 16, "هفدهم": 17,"هیفدهم": 17,
+                         "وچهاردهم": 14, "ودوازدهم": 12, "وپانزدهم": 15, "وشانزدهم": 16, "وهفدهم": 17,"وهیفدهم": 17,
+                         "هجدهم": 18,"هیجدهم": 18, "نوزدهم": 19, "بیستم": 20, "چهلم": 40, "پنجاهم": 50,
+                         "وهجدهم": 18,"وهیجدهم": 18, "ونوزدهم": 19, "وبیستم": 20, "وچهلم": 40, "وپنجاهم": 50,
+                         "شصتم": 60, "هفتادم": 70, "نودم": 90, "سیصدم": 300, "چهارصدم": 400,
+                         "وشصتم": 60, "وهفتادم": 70, "ونودم": 90, "وسیصدم": 300, "وچهارصدم": 400,
+                         "پانصدم": 500, "ششصدم": 600, "هفتصدم": 700, "هشتصدم": 800, "نهصدم": 900,
+                         "وپانصدم": 500, "وششصدم": 600, "وهفتصدم": 700, "وهشتصدم": 800, "ونهصدم": 900,
+                         "هشتادم": 80,"وهشتادم": 80}
+
+    def find_date_part(self, token_list):
+        import re
+        for index, element in enumerate(token_list):
+            # بررسی الگوی تاریخ 1398/12/18 
+            pattern_date = r'^(\d{4}\s*/\s*([1-9]|0[1-9]|1[0-2])\s*/\s*([1-9]|0[1-9]|[12][0-9]|3[01]))$'
+            date_pattern = re.compile(pattern_date)
+            match_date   = date_pattern.match(element)
+            if match_date:
+                date_parts = match_date.string.split('/')
+                year  = int(re.search(r'\d+', date_parts[0]).group())
+                month = int(re.search(r'\d+', date_parts[1]).group())
+                day   = int(re.search(r'\d+', date_parts[2]).group())
+                # اگر مقداری که در روز است، چهار رقمی باشد باید مقدار روز و سال را با هم عوض کرد
+                if day > 999:
+                    day  = int(re.search(r'\d+', date_parts[0]).group())
+                    year = int(re.search(r'\d+', date_parts[2]).group())
+                formal_date = "y" + str(year) + "m" + str(month) + "d" + str(day)
+                return formal_date, index, index, index
+            
+            # 24 /12 /1401 بررسی الگوی تاریخ 
+            pattern_date = r'\s*\d{2}\s*/\s*\d{2}\s*/\s*\d{4}\s*'
+            date_pattern = re.compile(pattern_date)
+            match_date   = date_pattern.match(element)
+            if match_date:
+                date_parts = match_date.string.split('/')
+                year       = int(re.search(r'\d+', date_parts[0]).group())
+                month      = int(re.search(r'\d+', date_parts[1]).group())
+                day        = int(re.search(r'\d+', date_parts[2]).group())
+                # اگر مقداری که در روز است، چهار رقمی باشد باید مقدار روز و سال را با هم عوض کرد
+                if day > 999:
+                    day  = int(re.search(r'\d+', date_parts[0]).group())
+                    year = int(re.search(r'\d+', date_parts[2]).group())
+                formal_date = "y" + str(year) + "m" + str(month) + "d" + str(day)
+                return formal_date, index, index, index
+            
+            # بررسی الگوی تاریخ 1402،10،06
+            patterndate2 = r'^(\d{4}\s*،\s*([1-9]|0[1-9]|1[0-2])\s*،\s*([1-9]|0[1-9]|[12][0-9]|3[01]))$'
+            date_pattern = re.compile(patterndate2)
+            match_date2  = date_pattern.match(element)
+            if match_date2:
+                date_parts = match_date2.string.split('،')
+                year  = int(re.search(r'\d+', date_parts[0]).group())
+                month = int(re.search(r'\d+', date_parts[1]).group())
+                day   = int(re.search(r'\d+', date_parts[2]).group())
+                # اگر مقداری که در روز است، چهار رقمی باشد باید مقدار روز و سال را با هم عوض کرد
+                if day > 999:
+                    day  = int(re.search(r'\d+', date_parts[0]).group())
+                    year = int(re.search(r'\d+', date_parts[2]).group())
+                formal_date = "y" + str(year) + "m" + str(month) + "d" + str(day)
+                return formal_date, index, index, index
+            
+            # بررسی الگوی تاریخ 13ر04ر1345
+            patterndate2 = r'^(\d{4}\s*ر\s*([1-9]|0[1-9]|1[0-2])\s*ر\s*([1-9]|0[1-9]|[12][0-9]|3[01]))$'
+            date_pattern = re.compile(patterndate2)
+            match_date2  = date_pattern.match(element)
+            if match_date2:
+                date_parts = match_date2.string.split('ر')
+                year  = int(re.search(r'\d+', date_parts[0]).group())
+                month = int(re.search(r'\d+', date_parts[1]).group())
+                day   = int(re.search(r'\d+', date_parts[2]).group())
+                # اگر مقداری که در روز است، چهار رقمی باشد باید مقدار روز و سال را با هم عوض کرد
+                if day > 999:
+                    day  = int(re.search(r'\d+', date_parts[0]).group())
+                    year = int(re.search(r'\d+', date_parts[2]).group())
+                formal_date = "y" + str(year) + "m" + str(month) + "d" + str(day)
+                return formal_date, index, index, index
+
+            if element == "/":
+                if index-1 >= 0 and index+1 < len(token_list) \
+                        and token_list[index -1].isdigit() and token_list[index+1].isdigit():
+                    if index+3 < len(token_list) and token_list[index+2] == "/" \
+                            and token_list[index + 3].isdigit():
+                        if int(token_list[index-1]) < 1450 and int(token_list[index+1]) < 13 and \
+                            int(token_list[index+3]) < 32:
+                            formal_date = [int(token_list[index-1]), int(token_list[index+1]), int(token_list[index+3])]
+                            formal_date = "y" + str(formal_date[0]) + "m" + str(formal_date[1]) + "d" + str(formal_date[2])
+                            #return formal_date, index-1, index+3, index
+                            return formal_date, index, index, index
+                        elif int(token_list[index-1]) < 32 and int(token_list[index+1]) < 13 and \
+                            int(token_list[index+3]) < 1450:
+                            formal_date = [int(token_list[index-1]), int(token_list[index+1]), int(token_list[index+3])]
+                            formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0])
+                            #return formal_date, index-1, index+3, index
+                            return formal_date, index, index, index
+                        else:
+                            formal_date = [int(token_list[index-1]), int(token_list[index+1]), int(token_list[index+3])]
+                            formal_date = "num(" + str(formal_date[0]) + "/" + str(formal_date[1]) + "/" + str(formal_date[2]) + ")"
+                            #return formal_date, index-1, index+3, index
+                            return formal_date, index, index, index
+                    elif (int(token_list[index-1]) < 1450 and int(token_list[index-1]) > 1250)  and int(token_list[index+1]) < 13:
+                        formal_date = [int(token_list[index-1]), int(token_list[index+ 1]), 0]
+                        formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0])
+                        #return formal_date, index-1 , index+1, index
+                        return formal_date, index , index, index
+                    else:
+                        formal_date = [int(token_list[index-1]), int(token_list[index+ 1])]
+                        formal_date = "num(" + str(formal_date[0]) + "/" + str(formal_date[1]) + ")"
+                        #return formal_date, index-1 , index+1, index
+                        return formal_date, index , index, index
+
+            if element in self.month_dict or element == "سال":
+                if index + 1 < len(token_list) and index - 1 > -2:
+                    try:
+                        start_date_index = end_date_index = 0
+                        if(token_list[index + 1] == 'ماه'):
+                            if(token_list[index + 2] == 'سال' and str(token_list[index + 3]).isdigit()):
+
+                                if(int(token_list[index + 3])==1000):
+                                    # در این حالت، امکان دارد روز مربوط به این تاریخ هم به صورت حروف در متن قانون نوشته شده باشد
+                                    # به همین دلیل ، توکن های قبل از ماه را نیز کنترل می کنیم
+                                    start_date_index = index - 1
+                                    day = int(token_list[index - 1])
+                                    if(token_list[index - 2]=='و' and token_list[index - 3].isdigit()):
+                                        day += int(token_list[index - 3])# رقم دهگان روز
+                                        start_date_index = index - 3
+                                    year = 1000
+                                    if(len(token_list)>= index + 5):
+                                        if(token_list[index + 4]=='و' and token_list[index + 5].isdigit()):
+                                            year += int(token_list[index + 5])# رقم صدگان سال
+                                            end_date_index = index + 5
+                                    if(len(token_list)>= index + 7):
+                                        if(token_list[index + 6]=='و' and token_list[index + 7].isdigit()):
+                                            year += int(token_list[index + 7])# رقم دهگان سال
+                                            end_date_index = index + 7
+                                    if(len(token_list)>= index + 9):
+                                        if(token_list[index + 8]=='و' and token_list[index + 9].isdigit()):
+                                            year += int(token_list[index + 9])# رقم یکان سال
+                                            end_date_index = index + 9
+
+                                    formal_date = [day, int(self.month_dict[token_list[index]]), year]# مثلا 20 و 8 تیر ماه سال 1000 و 300 و 20 و 5
+                                    
+                                else:
+                                    start_date_index = index - 1
+                                    end_date_index = index + 3
+                                    day = int(token_list[index - 1])
+                                    if(token_list[index - 2]=='و' and int(token_list[index - 1])<10 and int(token_list[index - 3])<31 and token_list[index - 3].isdigit()):
+                                        start_date_index = index - 3
+                                        day += int(token_list[index - 3])# رقم دهگان روز
+
+                                        formal_date = [day, int(self.month_dict[token_list[index]]), int(token_list[index + 3])]# مثلا 20 و 7 تیر ماه سال 1368
+                                        
+                                    
+                                    formal_date = [day, int(self.month_dict[token_list[index]]), int(token_list[index + 3])]# مثلا 27 تیر ماه سال 1368
+
+                            elif(str(token_list[index + 2]).isdigit()):
+                                if(int(token_list[index + 2])==1000):
+                                    
+                                    if token_list[index-1].strip() == 'ام': # مثلا 30 ام تیر ماه 1000 و 300 و 80 و 2
+                                        start_date_index = index - 2
+                                        day = int(token_list[index - 2])
+                                    else:
+                                        # در این حالت، امکان دارد روز مربوط به این تاریخ هم به صورت حروف در متن قانون نوشته شده باشد
+                                        # به همین دلیل ، توکن های قبل از ماه را نیز کنترل می کنیم
+                                        start_date_index = index - 1
+                                        day = int(token_list[index - 1])
+                                        
+                                        if(token_list[index - 2]=='و' and token_list[index - 3].isdigit()):
+                                            day += int(token_list[index - 3])# رقم دهگان روز
+                                            start_date_index = index - 3
+                                    year = 1000
+                                    if(len(token_list)>= index + 4):
+                                        if(token_list[index + 3]=='و' and token_list[index + 4].isdigit()):
+                                            year += int(token_list[index + 4])# رقم صدگان سال
+                                            end_date_index = index + 4
+                                    if(len(token_list)>= index + 6):
+                                        if(token_list[index + 5]=='و' and token_list[index + 6].isdigit()):
+                                            year += int(token_list[index + 6])# رقم دهگان سال
+                                            end_date_index = index + 6
+                                    if(len(token_list)>= index + 8):
+                                        if(token_list[index + 7]=='و' and token_list[index + 8].isdigit()):
+                                            year += int(token_list[index + 8])# رقم یکان سال
+                                            end_date_index = index + 8
+                                    formal_date = [day, int(self.month_dict[token_list[index]]), year]# مثلا 20 و 7 تیر ماه 1000 و 300 و 20 و 5
+                                    
+                                else:
+                                    formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), int(token_list[index + 2])]# مثلا 27 تیر ماه سال 1368
+                                    start_date_index = index - 1
+                                    end_date_index = index + 2
+                                #formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), int(token_list[index + 2])] # مثلا 27 تیر ماه 1368
+                        
+                        elif(token_list[index + 1] == 'سال' and (not(token_list[index + 2]).isdigit())):
+                            
+                            formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), int(token_list[index + 2])] # مثلا 27 تیر سال 1368
+                            start_date_index = index - 1
+                            end_date_index = index + 2
+                        else:
+                            if(token_list[index + 1] == 'سال' and str(token_list[index + 2]).isdigit()):
+
+                                if(int(token_list[index + 2])==1000):
+                                    
+                                    # در این حالت، امکان دارد روز مربوط به این تاریخ هم به صورت حروف در متن قانون نوشته شده باشد
+                                    # به همین دلیل ، توکن های قبل از ماه را نیز کنترل می کنیم
+                                    start_date_index = index - 1
+                                    day = int(token_list[index - 1])
+                                    if(token_list[index - 2]=='و' and token_list[index - 3].isdigit()):
+                                        day += int(token_list[index - 3])# رقم دهگان روز
+                                        start_date_index = index - 3
+                                    year = 1000
+                                    if(len(token_list)>= index + 4):
+                                        if(token_list[index + 3]=='و' and token_list[index + 4].isdigit()):
+                                            year += int(token_list[index + 4])# رقم صدگان سال
+                                            end_date_index = index + 4
+                                    if(len(token_list)>= index + 6):
+                                        if(token_list[index + 5]=='و' and token_list[index + 6].isdigit()):
+                                            year += int(token_list[index + 6])#  # رقم دهگان سال !!! برای تاریخ بین 1399 تا 1410 هم همین روال درست بر می گرداند، هرچند منطق غلطی دارد چون مثلا سال 1402 رقم صدگان ندارد
+                                            end_date_index = index + 6
+                                    if(len(token_list)>= index + 8):
+                                        if((len(token_list)>= index + 7 and token_list[index + 7]=='و') and (len(token_list)>= index + 8 and token_list[index + 8].isdigit())):
+                                            year += int(token_list[index + 8])# رقم یکان سال
+                                            end_date_index = index + 8
+
+                                    formal_date = [day, int(self.month_dict[token_list[index]]), year]# مثلا 20 و 8 تیر ماه سال 1000 و 300 و 20 و 5
+                                    
+                                else:
+                                    formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), int(token_list[index + 2])]# مثلا 27 تیر سال 1368
+                                    start_date_index = index - 1
+                                    end_date_index = index + 2
+
+                            elif(str(token_list[index + 1]).isdigit()):
+                                if(int(token_list[index + 1])==1000):
+                                    # در این حالت، امکان دارد روز مربوط به این تاریخ هم به صورت حروف در متن قانون نوشته شده باشد
+                                    # به همین دلیل ، توکن های قبل از ماه را نیز کنترل می کنیم
+                                    start_date_index = index - 1
+                                    day = int(token_list[index - 1])
+                                    if(token_list[index - 2]=='و' and token_list[index - 3].isdigit()):
+                                        day += int(token_list[index - 3])# رقم دهگان روز
+                                        start_date_index = index - 3
+                                    year = 1000
+                                    if(len(token_list)>= index + 3):
+                                        if(token_list[index + 2]=='و' and token_list[index + 3].isdigit()):
+                                            year += int(token_list[index + 3])# رقم صدگان سال
+                                            end_date_index = index + 3
+                                    if(len(token_list)>= index + 5):
+                                        if(token_list[index + 4]=='و' and token_list[index + 5].isdigit()):
+                                            year += int(token_list[index + 5])# رقم دهگان سال !!! برای تاریخ بین 1399 تا 1410 هم همین روال درست بر می گرداند، هرچند منطق غلطی دارد چون مثلا سال 1402 رقم صدگان ندارد
+                                            end_date_index = index + 5
+                                    if(len(token_list)>= index + 7):
+                                        if(token_list[index + 6]=='و' and token_list[index + 7].isdigit()):
+                                            year += int(token_list[index + 7])# رقم یکان سال
+                                            end_date_index = index + 7
+                                    formal_date = [day, int(self.month_dict[token_list[index]]), year]# مثلا 20 و 7 تیر ماه 1000 و 300 و 20 و 5
+                                else:
+                                    formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), int(token_list[index + 1])]# مثلا 27 تیر 1320‌
+                                    start_date_index = index - 1
+                                    end_date_index = index + 1
+                                    
+                            else:
+                                formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), int(token_list[index + 1])]# مثلا 27 تیر 1365
+                                start_date_index = index - 1
+                                end_date_index = index + 1
+                        formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0])
+                        if token_list[index - 1] and token_list[index + 1]:
+                            return formal_date, start_date_index, end_date_index, start_date_index
+                        
+                    except Exception as e:
+                        error = e.args[0]
+                        try:
+                            formal_date = [int(token_list[index - 1]), int(self.month_dict[token_list[index]]), 0]
+                            formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0])#مثلا  y1358m12d0
+                            return formal_date, index-1, index, index-1
+                        except:
+                            try:
+                                # مثلا سال 1400
+                                if token_list[index] == "سال":
+                                    formal_date = [int(token_list[index + 1]),0, 0]
+                                    formal_date = "y" + str(formal_date[0]) + "m" + str(formal_date[1]) + "d" + str(formal_date[2])# y1358m0d0
+                                    return formal_date, index, index+1, index
+                                elif token_list[index+1] == "ماه" and index + 1 < len(token_list):
+                                    if index + 2 < len(token_list) and token_list[index + 2].isdigit() and \
+                                        int(token_list[index-2]) < 1450:
+                                        formal_date = [0,int(self.month_dict[token_list[index]]),int(token_list[index+2])]
+                                        formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0]) # y0m12d5
+                                        return formal_date, index, index+2, index
+                                    else:
+                                        formal_date = [0,int(self.month_dict[token_list[index]]),0]
+                                        formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0]) #y0m12d0
+                                        return formal_date, index, index+1, index
+                                elif index + 1 < len(token_list) and token_list[index + 1].isdigit() and int(token_list[index-2]) < 1450:
+                                    formal_date = [0,int(self.month_dict[token_list[index]]),int(token_list[index+1])]
+                                    formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0]) # y0m12d5
+                                    return formal_date, index, index+1, index
+                                elif index-2 >= 0 and index+2 < len(token_list) and \
+                                    token_list[index - 1] == "/" and token_list[index+1] == "/" and \
+                                    token_list[index - 2].isdigit() and int(token_list[index-2]) < 32 and \
+                                    token_list[index + 2].isdigit() and int(token_list[index+2]) < 1450:
+                                    formal_date = [int(token_list[index-2]),int(self.month_dict[token_list[index]]),int(token_list[index+2])]
+                                    formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0])
+                                    return formal_date, index-2, index+2, index-2
+                                elif index + 2 < len(token_list) and token_list[index + 1] == 'سال' and \
+                                    token_list[index + 2].isdigit() and int(token_list[index-2]) < 1450 :
+                                    formal_date = [0,int(self.month_dict[token_list[index]]),int(token_list[index+2])]
+                                    formal_date = "y" + str(formal_date[2]) + "m" + str(formal_date[1]) + "d" + str(formal_date[0])
+                                    return formal_date, index, index+1, index
+                                #else:
+                                #    print("Name : %s -> after1: %s -> after2: %s -> after3: %s" % (token_list[index], token_list[index+1], token_list[index+2], token_list[index+3]))
+                            except:
+                                pass
+
+    def general_normalize_date(self,normalized_string, additional_token_index_array):
+        # 24 /12 /1401 جهت بررسی الگوی تاریخ 
+        normalized_string, recognized_dates = DateNormalizer.separated_date_format_finder(normalized_string)
+        normalized_string_list = normalized_string.strip().split()
+        # جهت یافتن فرمت های مختلف تاریخ غیر از فرمت بررسی شده در بالا
+        normalized_string, dates, recognized_dates_2 = \
+            self.normalize_dates(normalized_string_list, additional_token_index_array,
+                token_list_len = len(normalized_string_list), previous_slice_index_array=[0,])
+        # تجمیع همه تاریخ ها
+        for item in recognized_dates_2:
+            recognized_dates.append(item)
+        return normalized_string, dates, recognized_dates
+
+    # 24 /12 /1401 متد بررسی الگوی تاریخ 
+    def separated_date_format_finder(normalized_string):
+        import re
+        date_pattern = r'\d{1,2} /\d{1,2} /\d{2,4}'
+        regex = re.compile(date_pattern)
+        match_dates = regex.finditer(normalized_string)
+        recognized_dates = []
+        for date_item in match_dates:
+            position = date_item.span()
+            founded_item = date_item.group()
+            start_index = date_item.start() + 1
+            end_index = date_item.end() - 1
+            current_date = founded_item.replace(' ', '')
+            date_parts = current_date.split('/')
+            formal_date = "y" + str(date_parts[2]) + "m" + str(date_parts[1]) + "d" + str(date_parts[0])
+            pattern_tokens_state = token_state_finder(normalized_string, start_index, end_index)
+            recognized_dates.append(
+                    {
+                        "original_date"         : founded_item,
+                        "date"                  : formal_date,
+                        "start_index"           : start_index,
+                        "end_index"             : end_index,
+                        "date_token_index"      : pattern_tokens_state["start_token_state"],
+                        "start_date_token_index": pattern_tokens_state["start_token_state"],
+                        "end_date_token_index"  : pattern_tokens_state["end_token_state"],
+                    })
+        normalized_string_list = normalized_string.strip().split()
+        for date_item in recognized_dates:
+            normalized_string_list[int(date_item['date_token_index'])]         = date_item['date']
+            normalized_string_list[int(date_item['start_date_token_index'])+1] = 'tttt'
+            normalized_string_list[int(date_item['end_date_token_index'])]     = 'tttt'
+        normalized_string_temp = ''
+        for token in normalized_string_list:
+            normalized_string_temp = normalized_string_temp + ' ' + token
+        return normalized_string_temp.strip(), recognized_dates   
+
+    def normalize_dates(self, token_list, additional_token_index_array, token_list_len, previous_slice_index_array= []):
+        try:
+            finded = self.find_date_part(token_list)
+        except Exception as e:
+            finded = None
+            save_error(0, e)
+
+        recognized_dates = []
+        if finded != None:
+            date_part              = finded[0]
+            start_date_token_index = finded[1]
+            end_date_token_index   = finded[2]
+            date_token_index       = finded[3]
+            befor_date_part        = " ".join(x for x in token_list[:start_date_token_index])
+            after_date_part        = [x for x in token_list[end_date_token_index + 1:]]
+            previous_slice_index   = token_list_len - len(after_date_part)
+            previous_slice_index_array.append(previous_slice_index)
+            after_normalized, after_dates, recognized_dates = self.normalize_dates(after_date_part, additional_token_index_array, token_list_len, previous_slice_index_array)
+            if after_dates == '':
+                after_dates = []
+            if recognized_dates == '':
+                recognized_dates = []
+            after_dates.insert(0, date_part)
+            previous_slice_index = previous_slice_index_array.pop(len(previous_slice_index_array)-2)
+            recognized_dates.append(
+                {
+                 "date"                  : date_part,
+                 "date_token_index"      : date_token_index + previous_slice_index,
+                 "start_date_token_index": start_date_token_index + previous_slice_index,
+                 "end_date_token_index"  : end_date_token_index + previous_slice_index
+                })
+            
+            i = 0
+            while((date_token_index - start_date_token_index) > i ):
+                befor_date_part = ''.join([befor_date_part," tttt"]) # به عنوان توکنی که از جنس تاریخ بوده t
+                i += 1
+            i = 0
+            while((end_date_token_index - date_token_index) > i ):
+                date_part = ''.join([date_part," tttt"]) # به عنوان توکنی که از جنس تاریخ بوده t
+                i += 1
+            
+            return befor_date_part + " " + date_part + " " + after_normalized, after_dates, recognized_dates
+        else:
+            return " ".join(x for x in token_list), '',''
+
+    def list2num(self, numerical_section_list):
+        value = 1
+        l = len(numerical_section_list)>3
+        if l :
+            value = 0
+        for index, el in enumerate(numerical_section_list):
+            if self.is_number(el):
+                value = self.num_dict[el]
+            elif el == '000':
+                #value *= 1000
+                pass
+            elif l == True:
+                value += float(el)
+            else:
+                value *= float(el)
+        return value
+
+    def convert2num(self, numerical_section_list):
+        value = 0
+        tmp_section_list = []
+        for index, el in enumerate(numerical_section_list):
+            if self.is_number(el) or (el.replace('.', '', 1).isdigit()):
+                tmp_section_list.append(el)
+            elif el == "و":
+                value += self.list2num(tmp_section_list)
+                tmp_section_list[:] = []
+        if len(tmp_section_list) > 0:
+            value += self.list2num(tmp_section_list)
+            tmp_section_list[:] = []
+        try:
+            if (value-int(value) == 0):
+                return int(value)
+            else:
+                return value
+        except:
+            return 0
+        
+
+    def is_number(self, word):
+        return word in self.num_dict
+
+    def find_number_location(self, token_list, addWithVaa = False):
+        start_index = 0
+        number_section =[]
+        for i , el in enumerate(token_list):
+            if self.is_number(el) or (el.replace('.', '', 1).isdigit()):
+                start_index = i
+                number_section.append(start_index)
+                break
+
+        i = start_index+1
+        while(i < len(token_list)):
+            if token_list[i] == "و" and (i+1)<len(token_list) and addWithVaa:
+                if self.is_number(token_list[i+1]) or (token_list[i+1].replace('.', '', 1).isdigit()):
+                    number_section.append(i)
+                    number_section.append(i+1)
+                    i += 2
+                else:
+                    break
+            elif self.is_number(token_list[i]) or (token_list[i].replace('.', '', 1).isdigit()):
+                number_section.append(i)
+                i += 1
+            elif token_list[i] == "/" and (i+1)<len(token_list) and token_list[i+1] == "000":
+                number_section.append(i)
+                number_section.append(i+1)
+                i += 2
+            else:
+                break
+        return number_section
+
+    def normalize_numbers(self, token_list, converted=""):
+        for i, el in enumerate(token_list):
+            if el.endswith("ین") and self.is_number(el[:-2]):
+                token_list[i] = el[:-2]
+        finded = self.find_number_location(token_list)
+        if len(finded) == 0:
+            rest_of_string = " ".join(t for t in token_list)
+            return converted + " " + rest_of_string
+        else:
+            numerical_subsection = [token_list[x] for x in finded]
+            numerical_subsection = self.convert2num(numerical_subsection)
+
+            converted = converted + " " + " ".join(x for x in token_list[:finded[0]]) + " " + str(numerical_subsection)
+
+            new_index = finded[-1] + 1
+            return self.normalize_numbers(token_list[new_index:], converted)
+
+    # این متد برای تبدیل اعدادی حروفی به معادل رقمی ایجاد شده است
+    def normalize_numbers2(self, token_list, converted=""):
+        additional_token_index_array = [] # آرایه ای از توکن هایی که اضافه بر اصل متن ممکن است در این متد اضافه شوند
+        for index, token in enumerate(token_list):
+
+            if token in self.num_dict:
+                if token.startswith('و'): # برای حالتی مانند وسیصد که بین واو و سیصد، فاصله ای وجود ندارد
+                    temp_token = token[1:]
+                    if temp_token in self.num_dict:
+                       # در متن، واو را از عدد جدا می کنیم
+                       # در این حالت یک توکن به متن اصلی اضافه می شود که باید کنترل شود
+                       converted = converted + " " + "و" + " " + str(int(self.num_dict[token]))
+                       additional_token_index_array.append(index) 
+                else:    
+                    converted = converted + " " + str(int(self.num_dict[token]))
+            else:
+                converted = converted + " " + token
+        return converted, additional_token_index_array
+
+    # این متد برای ضرب اعداد متوالی با جدا کننده اسپیس ایجاد شده
+    def multiply_continuous_numbers(self, normalized_string):
+        token_list       = normalized_string.strip().split()
+        converted_string = ''
+        for index,token in enumerate(token_list):
+            num_array = []
+            if token.isdigit():
+                #if (token_list[index+1]).isdigit():    
+                    # اگر عدد مورد نظر با فرمت بالا نبود، حالت اعداد بدون فاصله کنار هم که باید در هم ضرب شوند را بررسی می کنیم
+                    num_array.clear() 
+                    num_array.append(int(token))
+                    step = 1
+                    deletting_indexes = []
+                    num_array_sum = 1
+                    while True:
+                        if (token_list[index+step]).isdigit() :
+                            num_array.append(int(token_list[index+step]))
+                            deletting_indexes.append(index+step)
+                            step += 1
+                            continue
+                        else:
+                            break
+                        
+                    for index in deletting_indexes:
+                        # بازای توکن هایی که عدد هستند و به دلیل ضرب در هم شدن باید حذف شوند، رشته بی معنی زیر را جایگزین کن
+                        token_list[index] =   'nnnn' # به معنای توکنی که از جنس عدد بوده number
+                    for number in num_array: # ،عدادی که پشت سرهم بوده اند، بدون فاصله و در مراحل قبل پردازش نشده اند و در آرایه ذخیره شده را ضرب می کند و در یک عدد، یک کاسه می کند، مثلا 2 1000 1000000000 تومان را
+                        num_array_sum *= number 
+                    converted_string = converted_string + " " + str(num_array_sum) 
+               
+            else:
+                converted_string = converted_string + " " + token
+        return converted_string
+    
+    # این متد برای جمع اعداد متوالی با جدا کننده واو ایجاد شده
+    def sum_continuous_numbers(self, normalized_string):
+        token_list = normalized_string.strip().split()
+        converted_string = ''
+        operand = '+'
+        for index,token in enumerate(token_list):
+            num_array = []
+            if token.isdigit():
+                # این بخش، متن را بررسی می کند و اعداد دو یا سه رقمی و بیشتر که با واو از هم جدا شده را با هم جمع و یک کاسه می کند
+                if token_list[index+1]=='و': # 2 , 50 , 600 :مانند
+                    
+                    if(int(token) % 10 != 0): # اگر عدد دو یا سه یا چهار یا ... رقمی باشد، رقم دهگاه، صدگان، هزارگان و ... باید باقیمانده شان بر 2 برابر با صفر باشد، اگر نبود، ادامه نده
+                        converted_string = converted_string + " " + token
+                    else: 
+                        num_array.append(int(token))
+                        step = 1
+                        deletting_indexes = []
+                        num_array_sum = 0
+                        while True:
+                            if token_list[index+step] == 'و' :
+                                # اگر یکی از شرط های زیر را داشت، اعدادی که با واو جدا شده اند، مربوط به تاریخ هستند و نباید جمع عددی انجام شود
+                                if (token_list[index+step+1] == 'tttt' or token_list[index+step+1].startswith('y')):
+                                    deletting_indexes.clear()
+                                    num_array.clear()
+                                    num_array_sum = int(token)
+                                    break
+                                elif token_list[index+step+1].isdigit() and token_list[index+step+2]=='nnnn' :
+                                    operand = '*'
+                                    num_array_sum = 1
+                                    num_array.append(int(token_list[index+step+1]))
+                                    # ذخیره کردن ایندکس هایی که جمع شده و باید به جای آن یک توکن خنثی بگذاریم
+                                    deletting_indexes.append(index+step)
+                                    deletting_indexes.append(index+step+1)
+                                    step += 3
+                                    continue
+                                elif token_list[index+step+1].isdigit() :
+                                    num_array.append(int(token_list[index+step+1]))
+                                    # ذخیره کردن ایندکس هایی که جمع شده و باید به جای آن یک توکن خنثی بگذاریم
+                                    deletting_indexes.append(index+step)
+                                    deletting_indexes.append(index+step+1)
+                                    step += 2
+                                    continue
+                            else:
+                                break
+                            
+                        for index in deletting_indexes:
+                            # بازای توکن هایی که عدد یا واو هستند و  باید حذف شوند، رشته بی معنی زیر را جایگزین کن
+                            token_list[index] =   'nnnn' # به معنای توکنی که از جنس عدد بوده number
+
+                        for number in num_array: # اعدادی که با واو پشت سرهم بوده اند و در آرایه ذخیره شده را جمع می کند و در یک عدد، یک کاسه می کند
+                            if operand == '+':
+                                num_array_sum += number 
+                            elif operand == '*':
+                                num_array_sum *= number
+                        converted_string = converted_string + " " + str(num_array_sum)
+ 
+            else:
+                converted_string = converted_string + " " + token
+        return converted_string
+    
+    # این متد اعداد پشت سر هم که با اسپیس یا واو از هم جدا شده اند و باید تبدیل به یک عدد به صورت یک کاسه شوند را هندل می کند
+    def handle_continuous_numbers(self, normalized_string):
+        token_list = normalized_string.strip().split()
+        converted_string = ''
+        number_parts_array = []
+        start_token_index = 0
+        end_token_index = 0
+        recognized_numbers = []
+        flag = True
+        is_tabsare_numbers = False
+        is_mavad_numbers = False
+        is_madde_numbers = False
+        for token_index,token in enumerate(token_list):
+            if token.isdigit() :
+                # if token == '61' :
+                #      print(len(token_list))
+                if token_index - 2 >= 0:
+                    # تشخیص اینکه یک سلسله از اعداد که با واو از هم جدا شده اند تبصره های مربوط به یک ماده نباشند
+                    is_tabsare_numbers = (token_list[token_index-2] + ' ' + token_list[token_index-1]=='تبصره های')
+                if token_index - 1 >= 0:
+                    # تشخیص اینکه یک سلسله از اعداد که با واو از هم جدا شده اند مواد یک قانون نباشند
+                    is_mavad_numbers = (token_list[token_index-1]=='مواد')
+                if token_index - 1 >= 0:
+                    # تشخیص اینکه یک سلسله از اعداد که با واو از هم جدا شده اند ماده های یک قانون نباشند
+                    is_madde_numbers = (token_list[token_index-1]=='ماده')
+                if is_tabsare_numbers or is_mavad_numbers or is_madde_numbers:
+                    flag = False
+                start_token_index = end_token_index = token_index
+                number_parts_array.clear()
+                current_token_index = token_index
+                number_parts_array.append(token)
+                if current_token_index + 1 < len(token_list):
+                    while ((flag) and (token_list[current_token_index+1] == 'و' or token_list[current_token_index+1].isdigit())):
+                        number_parts_array.append(token_list[current_token_index+1])
+                        current_token_index += 1
+                        end_token_index = current_token_index
+                        if not (current_token_index + 1 < len(token_list)):
+                            break
+                
+                final_number = number_completter(number_parts_array)
+                token_list[token_index] = str(final_number)
+                recognized_numbers.append({
+                    'number_value'     : final_number,
+                    'number_token_list': number_parts_array,
+                    'start_token_index': start_token_index,
+                    'end_token_index'  : end_token_index
+                    })
+                # جایگذاری مقدار رشته بی ارزش به جای توکن های مربوط به عدد جاری
+                # به منظور اینکه متن اصلی از نظر تعداد توکن تغییر نکند
+                for i in range(token_index + 1 , len(number_parts_array) + token_index):
+                    token_list[i] = 'nnnn'
+                converted_string = converted_string + ' ' + token_list[token_index]
+                if token_index + 1 < len(token_list):
+                    if flag == False and (token_list[token_index+1]=='و'):
+                        flag = False
+                    else:
+                        flag = True
+            else:
+                converted_string = converted_string + ' ' + token
+               
+        return converted_string, recognized_numbers
+
+    
+
+
+    def remove_additional_tokens(self,token_list,additional_token_index_array):
+        converted_string = ''
+        for index in additional_token_index_array:
+            del token_list[index]
+
+        for token in token_list:
+            converted_string = converted_string + ' ' + token
+        return converted_string
+    
+class PinglishNormalizer():
+    def __init__(self):
+        self.data_helper = DataHelper()
+        self.file_dir = os.path.dirname(os.path.realpath(__file__)) + "/"
+
+        self.en_dict_filename = self.file_dir + "resource/tokenizer/enDict"
+        self.en_dict = self.data_helper.load_var(self.en_dict_filename)
+
+        self.fa_dict_filename = self.file_dir + "resource/tokenizer/faDict"
+        self.fa_dict = self.data_helper.load_var(self.fa_dict_filename)
+
+
+    def pingilish2persian(self, pinglish_words_list):
+
+        for i, word in enumerate(pinglish_words_list):
+            if word in self.en_dict:
+                pinglish_words_list[i] = self.en_dict[word]#.decode("utf-8")
+                #inp = inp.replace(word, enDict[word], 1)
+            else:
+                ch = self.characterize(word)
+                pr = self.map_char(ch)
+                amir = self.make_word(pr)
+                for wd in amir:
+                    am = self.escalation(wd)
+                    asd = ''.join(am)
+                    if asd in self.fa_dict:
+                        pinglish_words_list[i] = asd#.decode("utf-8")
+                        #inp = inp.replace(word, asd, 1)
+        inp = " ".join(x for x in pinglish_words_list)
+        return inp
+
+    def characterize(self, word):
+        list_of_char = []
+        i = 0
+        while i < len(word):
+            char = word[i]
+            sw_out = self.switcher(char)
+            if (sw_out == None):
+                esp_out = None
+                if(i < len(word) - 1):
+                    esp_out = self.esp_check(word[i], word[i + 1])
+                if(esp_out == None):
+                    list_of_char.append(word[i])
+                else:
+                    list_of_char.append(esp_out)
+                    i += 1
+            else:
+                list_of_char.append(sw_out)
+            i += 1
+        return list_of_char
+
+    def switcher(self, ch):
+        switcher = {
+            "c": None,
+            "k": None,
+            "z": None,
+            "s": None,
+            "g": None,
+            "a": None,
+            "u": None,
+            "e": None,
+            "o": None
+        }
+        return switcher.get(ch, ch)
+
+    def esp_check(self, char1, char2):
+        st = char1 + char2
+        if (st == "ch"):
+            return "ch"
+        elif (st == "kh"):
+            return "kh"
+        elif (st == "zh"):
+            return "zh"
+        elif (st == "sh"):
+            return "sh"
+        elif (st == "gh"):
+            return "gh"
+        elif (st == "aa"):
+            return "aa"
+        elif (st == "ee"):
+            return "ee"
+        elif (st == "oo"):
+            return "oo"
+        elif (st == "ou"):
+            return "ou"
+        else:
+            return None
+
+    def map_char(self, word):
+        listm = []
+        sw_out = self.map_switcher(word[0])
+        i = 0
+        if (sw_out == None):
+            listm.append(["ا"])
+            i += 1
+        if (word[0] == "oo"):
+            listm.append(["او"])
+            i += 1
+        while i < len(word):
+            listm.append(self.char_switcher(word[i]))
+            i += 1
+        if word[len(word) - 1] == "e":
+            listm.append(["ه"])
+        elif word[len(word) - 1] == "a":
+            listm.append(["ا"])
+        elif word[len(word) - 1] == "o":
+            listm.append(["و"])
+        elif word[len(word) - 1] == "u":
+            listm.append(["و"])
+
+        return listm
+
+    def map_switcher(self, ch):
+        switcher = {
+            "a": None,
+            "e": None,
+            "o": None,
+            "u": None,
+            "ee": None,
+
+            "ou": None
+        }
+        return switcher.get(ch, ch)
+
+    def make_word(self, chp):
+        word_list = [[]]
+        for char in chp:
+            word_list_temp = []
+            for tmp_word_list in word_list:
+                for chch in char:
+                    tmp = copy.deepcopy(tmp_word_list)
+                    tmp.append(chch)
+                    word_list_temp.append(tmp)
+            word_list = word_list_temp
+        return word_list
+
+    def escalation(self, word):
+        tmp = []
+        i = 0
+        t = len(word)
+        while i < t - 1:
+            tmp.append(word[i])
+            if word[i] == word[i + 1]:
+                i += 1
+            i += 1
+        if i != t:
+            tmp.append(word[i])
+        return tmp
+
+    def char_switcher(self, ch):
+        switcher = {
+            'a': ["", "ا"],
+            'c': ["ث", "ص", "ص"],
+            'h': ["ه", "ح"],
+            'b': ["ب"],
+            'p': ["پ"],
+            't': ["ت", "ط"],
+            's': ["س", "ص", "ث"],
+            'j': ["ج"],
+            'ch': ["چ"],
+            'kh': ["خ"],
+            'q': ["ق", "غ"],
+            'd': ["د"],
+            'z': ["ز", "ذ", "ض", "ظ"],
+            'r': ["ر"],
+            'zh': ["ژ"],
+            'sh': ["ش"],
+            'gh': [",ق", "غ"],
+            'f': ["ف"],
+            'k': ["ک"],
+            'g': ["گ"],
+            'l': ["ل"],
+            'm': ["م"],
+            'n': ["ن"],
+            'v': ["و"],
+            'aa': ["ا"],
+            'ee': ["ی"],
+            'oo': ["و"],
+            'ou': ["و"],
+            'i': ["ی"],
+            'y': ["ی"],
+            ' ': [""],
+            'w': ["و"],
+            'e': ["", "ه"],
+            'o': ["", "و"]
+        }
+        return switcher.get(ch, "")
+
+_normalizer = Normalizer(date_normalizing_needed=True)
+yeAr = r"ﻱ|ې|ێ|ے|ى|ي|ئ"
+yeFr= r"ی"
+keAr = r"ڭ|ﻚ|ﮎ|ﻜ|ﮏ|ګ|ﻛ|ﮑ|ﮐ|ڪ|ك"
+keFr = r"ک"
+
+# این روال جهت جمع و ضرب کردن بخش های مختلف از صدگان و هزارگان و بالاتر از یک عدد که با حروف در متن وجود دارد کار می کند
+def number_completter2(number_parts_array):
+        zarb_value = 0
+        temp_number_array = []
+        sum_number_parts = 0
+        final_number = 0
+        for index,item in enumerate(number_parts_array):
+                    if item.isdigit():
+                        temp_number_array.append(int(item))
+                        if index + 1 >= len(number_parts_array):
+                            for item3 in temp_number_array:
+                                final_number += int(item3)
+                            return final_number 
+                        current_value = number_parts_array[index+1]       
+                        if (current_value.isdigit() and (int(current_value) == 1000 or int(current_value) == 1000000 or int(current_value) == 1000000000)):# or int(current_value) == 1000000
+                            zarb_value = int(number_parts_array[index+1])
+                            for num_item in temp_number_array:
+                                sum_number_parts += num_item
+                            final_number = sum_number_parts * zarb_value
+                            temp_array2 = []
+                            x = index + 2
+                            while x < len(number_parts_array):
+                            # for x in range(index+2,len(number_parts_array)):
+                                temp_array2.append(number_parts_array[x])
+                                if x + 1 < len(number_parts_array):
+                                    if number_parts_array[x+1].isdigit() and (int(number_parts_array[x+1]) == 1000 or int(number_parts_array[x+1]) == 1000000 or int(number_parts_array[x+1]) == 1000000000):
+                                        if int(number_parts_array[x+1]) > zarb_value: # 1000000>1000
+                                            zarb_value = int(number_parts_array[x+1])
+                                            # تابع بازگشتی برای محاسبه عدد نهایی
+                                            final_number += number_completter2(temp_array2)
+                                            final_number *= zarb_value
+                                            temp_array2.clear()
+                                            zarb_value = 0
+                                            x += 2 # به این دلیل که مقدار ضرب را در آرایه تمپ ذخیره نکند
+                                        else: # 1000!>1000000
+                                            zarb_value = int(number_parts_array[x+1])
+                                            temp_num = 0
+                                            if (zarb_value == 1000 or zarb_value == 1000000) and len(number_parts_array) > x + 1:
+                                                num_array = number_parts_array[x+2:len(number_parts_array)]
+                                                temp_num = number_completter(num_array) 
+                                                if temp_num == None:
+                                                    temp_num = 0 
+                                                
+                                                '''for i in range(x+2,len(number_parts_array)):
+                                                    if(number_parts_array[i].isdigit()):
+                                                        temp_num += int(number_parts_array[i])
+                                                        i+=1'''
+                                            temp_num2 = 1
+                                            for num_item2 in temp_array2:
+                                                if(num_item2.isdigit()):
+                                                    temp_num2 += int(num_item2)
+                                            
+                                            temp_num2 *= zarb_value  
+                                            final_number += temp_num + temp_num2
+                                            break
+                                    else:
+                                        x += 1 #
+                                else: # اگر از رنج آرایه خارج شدیم
+                                    temp_num = 0
+                                    # مقادیر موجود در آرایه تمپ را جمع کن
+                                    for num_item3 in temp_array2:
+                                        if(num_item3.isdigit()):
+                                            temp_num += int(num_item3)
+                                     # حاصل جمع اعداد موجود در آرایه ذخیره را در عدد نهایی که قبلا داشته ایم ضرب کن و از حلقه خارج شو
+                                    final_number *= temp_num
+                                    #final_number += temp_num
+                                    break       
+                            return final_number
+                        
+# این روال جهت جمع و ضرب کردن بخش های مختلف از صدگان و هزارگان و بالاتر از یک عدد که با حروف در متن وجود دارد کار می کند
+def number_completter(number_parts_array):
+        zarb_value                = 0
+        previous_zarb_value       = 0
+        previous_number_parts_sum = 0
+        temp_number_array         = []
+        number_parts_sum          = 0
+        final_number              = 0
+        current_number_part       = 0
+        for index,item in enumerate(number_parts_array):
+                    if item.isdigit():      
+                        if (not(int(item) == 1000 or int(item) == 1000000 or int(item) == 1000000000)):
+                            temp_number_array.append(item)
+                            continue
+                        elif((int(item) == 1000 or int(item) == 1000000 or int(item) == 1000000000)):
+                            zarb_value = int(item)    
+                            for num_item in temp_number_array:
+                                number_parts_sum += int(num_item)
+                            if number_parts_sum == 0 and previous_number_parts_sum == 0:  
+                                number_parts_sum = 1  
+                            temp_number_array.clear()    
+                            
+                        else:# for example 952
+                            zarb_value = 1    
+                            for num_item in temp_number_array:
+                                number_parts_sum += int(num_item)
+                            current_number_part = number_parts_sum + previous_number_parts_sum
+                            continue
+                        if previous_zarb_value < zarb_value:# for example 1000 < 1000000000   
+                            current_number_part = previous_number_parts_sum + number_parts_sum
+                            current_number_part = zarb_value * current_number_part
+                        else:# previous_zarb_value > zarb_value
+                            if number_parts_sum == 0:
+                                number_parts_sum = 1
+                            current_number_part  = zarb_value * number_parts_sum
+                            current_number_part += previous_number_parts_sum
+
+                        previous_number_parts_sum = current_number_part
+                        current_number_part = 0
+                        previous_zarb_value = zarb_value
+                        number_parts_sum = 0
+        if len(temp_number_array) != 0:
+            remained_parts_sum = 0  
+            for num_item in temp_number_array:
+                remained_parts_sum += int(num_item)
+            final_number = previous_number_parts_sum + remained_parts_sum
+            return final_number
+            
+        final_number = previous_number_parts_sum
+        return final_number
+
+def normalize_content(content):
+    text = normalYehKe(content)
+    # جایگزین کردن یک فرمت استاندارد از حروف فارسی به جای فرمت های مختلف فارسی و
+    text = _normalizer.sub_alphabets(text)
+    # کلماتی که با نیم فاصله از هم جدا شده اند، را به هم می چسباند
+    # در این صورت، اگر با یک اسپیس جایگزین شود، یک توکن به متن اصلی اضافه می کند
+    text = sub('\u200c','',text)
+    text = sub('\u200F','',text)
+    pattern = r',|٬|٫|‚|，|؟|ʕ|_|ـ|،'# r',|٬|٫|‚|，|؟|ʕ|،|_|ـ'
+    text = sub(pattern,' ', text)
+    text = text.replace('.',' ')
+
+    return text.strip()
+
+def normalYehKe(text):
+    if(text == None) :
+       return ''
+
+    c1 = sub(yeAr, yeFr, text)
+    c2 = sub(keAr, keFr, c1)
+    c2 = c2.replace('\u00A0', '')    
+    return c2.strip()
+
+# این متد یک متن و ایندکس آغاز و پایان یک عبارت درون آن متن را دریافت می کند
+# و شماره توکن آغازین و توکن پایانی مربوط به عبارت در متن را بر می گرداند
+def token_state_finder(normalized_section_content, start_index, end_index):
+    before_substring = normalized_section_content[0:start_index-1].strip()
+    pattern_substring = normalized_section_content[start_index-1:end_index+1].strip()
+    before_substring_token_list = before_substring.strip().split()
+    pattern_token_list = pattern_substring.strip().split()
+    start_token_state = len(before_substring_token_list)
+    end_token_state = len(before_substring_token_list) + (len(pattern_token_list)-1)
+    pattern_tokens_state ={
+        "start_token_state": start_token_state,
+        "end_token_state"  : end_token_state
+          }
+    return pattern_tokens_state
+
+def save_error(id=0, e=Exception):
+    import datetime
+    err_date = datetime.datetime.now()
+    filename =  'law_section_errors.txt'
+    exc_type, exc_value, exc_traceback = sys.exc_info()
+    frame = exc_traceback.tb_frame
+    function_array = traceback.extract_tb(exc_traceback)
+    current_function = function_array[len(function_array)-1]
+    error = f'''
+        id           : {id}
+        filename     : {current_function.filename}
+        function     : {current_function.name}
+        err line no  : {current_function.lineno}
+        err line     : {current_function.line}
+        err message  : {e}
+        err date     : {err_date}
+    '''
+    print( '*'*60 + error + '*'*60)
+    with open(filename, 'a+', encoding='utf-8') as file:
+        # نوشتن خطا در فایل
+        file.write(error + '\n' + 50*'*' + '\n')
+    return error
\ No newline at end of file