thing remover function added
This commit is contained in:
parent
075f931654
commit
4e3214b6c6
|
@ -43,17 +43,28 @@ def remove_think_tags(strings):
|
||||||
else:
|
else:
|
||||||
return [re.sub(r'<think>[\s\S]*?</think>', '', s) for s in strings]
|
return [re.sub(r'<think>[\s\S]*?</think>', '', s) for s in strings]
|
||||||
|
|
||||||
|
def think_remover(text):
|
||||||
|
if 'HAMTAA' in text:
|
||||||
|
part_after_HAMTAA = text.split('HAMTAA')[1]
|
||||||
|
return part_after_HAMTAA
|
||||||
|
if '</think>' in text:
|
||||||
|
part_after_tag = text.split('</think>')[1]
|
||||||
|
return part_after_tag
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
SYS_PROMPT_simpler = """
|
SYS_PROMPT_simpler = """
|
||||||
Analyze the input sentence and divide it into several simpler and clearer sentences. The new sentences should retain the main meaning of the original sentence but be easier to understand. Use short and direct sentences and avoid complex terms or confusing grammatical structures.
|
Analyze the input sentence and divide it into several simpler and clearer sentences. The new sentences should retain the main meaning of the original sentence but be easier to understand. Use short and direct sentences and avoid complex terms or confusing grammatical structures.
|
||||||
The output should be Persian sentences only.
|
The output should be Persian sentences only.
|
||||||
Each sentence should either be factual and express a minimal fact, or be conditional and express a minimal rule. A minimal fact or rule is a fact or rule that cannot be expressed in two separate and independent sentences.
|
Each sentence should either be factual and express a minimal fact, or be conditional and express a minimal rule. A minimal fact or rule is a fact or rule that cannot be expressed in two separate and independent sentences.
|
||||||
|
start the output with this word: "HAMTAA"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SYS_PROMPT_modified ="""
|
SYS_PROMPT_modified ="""
|
||||||
Edit the text below carefully for punctuation (including punctuation, spaces and half-spaces), and spelling errors, and perform a complete editorial review considering all editing points. The output should only be the corrected text without any additional explanation.
|
Edit the text below carefully for punctuation (including punctuation, spaces and half-spaces), and spelling errors, and perform a complete editorial review considering all editing points. The output should only be the corrected text without any additional explanation.
|
||||||
The text should be preserved completely and only edited. Do not alter the sentence structure, add or remove words, or change the original meaning in any way. Please pay attention to correcting homophone words and fix these words based on the meaning of the sentence.
|
The text should be preserved completely and only edited. Do not alter the sentence structure, add or remove words, or change the original meaning in any way. Please pay attention to correcting homophone words and fix these words based on the meaning of the sentence.
|
||||||
|
start the output with this word: "HAMTAA"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SYS_PROMPT_translated ="""
|
SYS_PROMPT_translated ="""
|
||||||
|
@ -66,7 +77,7 @@ Proper noun: do not translate those words that are proper nouns.
|
||||||
Cultural Sensitivity: Translate any technical terms, idiomatic expressions, and culturally specific references appropriately to ensure clarity and relevance in Persian.
|
Cultural Sensitivity: Translate any technical terms, idiomatic expressions, and culturally specific references appropriately to ensure clarity and relevance in Persian.
|
||||||
The final English translation should be professional, natural, and suitable for its intended audience, which is law's audiance.
|
The final English translation should be professional, natural, and suitable for its intended audience, which is law's audiance.
|
||||||
|
|
||||||
Note that the output must be nothing but the final traslation
|
Note that the output must be nothing but the final traslation. start the output with this extra word: "HAMTAA"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SYS_PROMPT_fewshot ="""
|
SYS_PROMPT_fewshot ="""
|
||||||
|
@ -78,6 +89,7 @@ If the sentences are factual and no rule-like content, they do not require logic
|
||||||
To understand the output structure, follow the examples below.
|
To understand the output structure, follow the examples below.
|
||||||
|
|
||||||
Note that in some cases, to understand the rules of logical relationships in a section of the law, you must consider other information outside the text as context. If you are given a sentence with context, you may use the context in your logical analysis of the sentence.
|
Note that in some cases, to understand the rules of logical relationships in a section of the law, you must consider other information outside the text as context. If you are given a sentence with context, you may use the context in your logical analysis of the sentence.
|
||||||
|
start the output with this word: "HAMTAA"
|
||||||
do not say nothing other than the output like these examples:
|
do not say nothing other than the output like these examples:
|
||||||
Input:
|
Input:
|
||||||
"سرمایه های صرف شده که در اختیار واحدهای عملیات نفتی گذاشته شده یا میشود، جزء دارایی های واحد مزبور خواهد بود ولی هرگونه نقل و انتقال آنها منوط به اجازه وزارت نفت میباشد."
|
"سرمایه های صرف شده که در اختیار واحدهای عملیات نفتی گذاشته شده یا میشود، جزء دارایی های واحد مزبور خواهد بود ولی هرگونه نقل و انتقال آنها منوط به اجازه وزارت نفت میباشد."
|
||||||
|
@ -195,20 +207,26 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
context = context + " " + a
|
context = context + " " + a
|
||||||
# context1_simpler= simpler(content, context)
|
# context1_simpler= simpler(content, context)
|
||||||
|
# context1_simpler = think_remover(context1_simpler)
|
||||||
# item['simplified_content'] = context1_simpler
|
# item['simplified_content'] = context1_simpler
|
||||||
# context1_modified= modified(content, context)
|
# context1_modified= modified(content, context)
|
||||||
|
# context1_modified = think_remover(context1_modified)
|
||||||
# item['modified_content'] = context1_modified
|
# item['modified_content'] = context1_modified
|
||||||
# context1_translated= translated(content)
|
# context1_translated= translated(content)
|
||||||
|
# context1_translated = think_remover(context1_translated)
|
||||||
# item['translated_content'] = context1_translated
|
# item['translated_content'] = context1_translated
|
||||||
###
|
###
|
||||||
simplified_content = item['simplified_content']
|
simplified_content = item['simplified_content']
|
||||||
simplified_content_rule = fewshot(simplified_content, context)
|
simplified_content_rule = fewshot(simplified_content, context)
|
||||||
|
simplified_content_rule = think_remover(simplified_content_rule)
|
||||||
item['simplified_content_rule'] = simplified_content_rule
|
item['simplified_content_rule'] = simplified_content_rule
|
||||||
modified_content = item['modified_content']
|
modified_content = item['modified_content']
|
||||||
modified_content_rule = fewshot(modified_content, context)
|
modified_content_rule = fewshot(modified_content, context)
|
||||||
|
modified_content_rule = think_remover(modified_content_rule)
|
||||||
item['modified_content_rule'] = modified_content_rule
|
item['modified_content_rule'] = modified_content_rule
|
||||||
translated_content = item['translated_content']
|
translated_content = item['translated_content']
|
||||||
translated_content_rule = fewshot(translated_content, context)
|
translated_content_rule = fewshot(translated_content, context)
|
||||||
|
translated_content_rule = think_remover(translated_content_rule)
|
||||||
item['translated_content_rule'] = translated_content_rule
|
item['translated_content_rule'] = translated_content_rule
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -48,12 +48,13 @@ SYS_PROMPT_simpler = """
|
||||||
Analyze the input sentence and divide it into several simpler and clearer sentences. The new sentences should retain the main meaning of the original sentence but be easier to understand. Use short and direct sentences and avoid complex terms or confusing grammatical structures.
|
Analyze the input sentence and divide it into several simpler and clearer sentences. The new sentences should retain the main meaning of the original sentence but be easier to understand. Use short and direct sentences and avoid complex terms or confusing grammatical structures.
|
||||||
The output should be Persian sentences only.
|
The output should be Persian sentences only.
|
||||||
Each sentence should either be factual and express a minimal fact, or be conditional and express a minimal rule. A minimal fact or rule is a fact or rule that cannot be expressed in two separate and independent sentences.
|
Each sentence should either be factual and express a minimal fact, or be conditional and express a minimal rule. A minimal fact or rule is a fact or rule that cannot be expressed in two separate and independent sentences.
|
||||||
|
start the output with this word: "HAMTAA"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SYS_PROMPT_modified ="""
|
SYS_PROMPT_modified ="""
|
||||||
Edit the text below carefully for punctuation (including punctuation, spaces and half-spaces), and spelling errors, and perform a complete editorial review considering all editing points. The output should only be the corrected text without any additional explanation.
|
Edit the text below carefully for punctuation (including punctuation, spaces and half-spaces), and spelling errors, and perform a complete editorial review considering all editing points. The output should only be the corrected text without any additional explanation.
|
||||||
The text should be preserved completely and only edited. Do not alter the sentence structure, add or remove words, or change the original meaning in any way. Please pay attention to correcting homophone words and fix these words based on the meaning of the sentence.
|
The text should be preserved completely and only edited. Do not alter the sentence structure, add or remove words, or change the original meaning in any way. Please pay attention to correcting homophone words and fix these words based on the meaning of the sentence.
|
||||||
|
start the output with this word: "HAMTAA"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SYS_PROMPT_translated ="""
|
SYS_PROMPT_translated ="""
|
||||||
|
@ -66,7 +67,7 @@ Proper noun: do not translate those words that are proper nouns.
|
||||||
Cultural Sensitivity: Translate any technical terms, idiomatic expressions, and culturally specific references appropriately to ensure clarity and relevance in Persian.
|
Cultural Sensitivity: Translate any technical terms, idiomatic expressions, and culturally specific references appropriately to ensure clarity and relevance in Persian.
|
||||||
The final English translation should be professional, natural, and suitable for its intended audience, which is law's audiance.
|
The final English translation should be professional, natural, and suitable for its intended audience, which is law's audiance.
|
||||||
|
|
||||||
Note that the output must be nothing but the final traslation
|
Note that the output must be nothing but the final traslation. start the output with this word: "HAMTAA"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def generate(system_prompt, formatted_prompt):
|
def generate(system_prompt, formatted_prompt):
|
||||||
|
@ -112,6 +113,16 @@ def translated(text):
|
||||||
"""
|
"""
|
||||||
return generate(SYS_PROMPT_translated, user_prompt_with_context)
|
return generate(SYS_PROMPT_translated, user_prompt_with_context)
|
||||||
|
|
||||||
|
def think_remover(text):
|
||||||
|
if 'HAMTAA' in text:
|
||||||
|
part_after_HAMTAA = text.split('HAMTAA')[1]
|
||||||
|
return part_after_HAMTAA
|
||||||
|
if '</think>' in text:
|
||||||
|
part_after_tag = text.split('</think>')[1]
|
||||||
|
return part_after_tag
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print('start')
|
print('start')
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -130,10 +141,13 @@ if __name__ == "__main__":
|
||||||
else:
|
else:
|
||||||
context = context + " " + a
|
context = context + " " + a
|
||||||
context1_simpler= simpler(content, context)
|
context1_simpler= simpler(content, context)
|
||||||
|
context1_simpler = think_remover(context1_simpler)
|
||||||
item['simplified_content'] = context1_simpler
|
item['simplified_content'] = context1_simpler
|
||||||
context1_modified= modified(content, context)
|
context1_modified= modified(content, context)
|
||||||
|
context1_modified = think_remover(context1_modified)
|
||||||
item['modified_content'] = context1_modified
|
item['modified_content'] = context1_modified
|
||||||
context1_translated= translated(content)
|
context1_translated= translated(content)
|
||||||
|
context1_translated = think_remover(context1_translated)
|
||||||
item['translated_content'] = context1_translated
|
item['translated_content'] = context1_translated
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -21,7 +21,8 @@
|
||||||
"parents_content": "",
|
"parents_content": "",
|
||||||
"meaning_change": [],
|
"meaning_change": [],
|
||||||
"referential term": []
|
"referential term": []
|
||||||
}
|
},
|
||||||
|
"correct_rule" : "no-rule"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "qs752668",
|
"id": "qs752668",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user