I create an app that makes sentences from cards, but since each card is a different word, the constructed sentence is more often than not not worded correctly. To correct the errors, I use torch and model pipeline(“text2text-generation”, model=“cointegrated/rut5-base-paraphraser”), and pymorphy2 for full correction. I embed Python code into the mobile app using chaquopy, but this way my app weighs about 2GB, which is not good. Is there any way to reduce the amount of space it takes up?
import pymorphy2
from transformers import pipeline
corrector = pipeline("text2text-generation", model="cointegrated/rut5-base-paraphraser")
morph = pymorphy2.MorphAnalyzer()
def correct_text(text):
result = corrector(text)
corrected_text = result[0]['generated_text']
original_words = text.split()
corrected_words = corrected_text.split()
final_words = []
for orig_word in original_words:
orig_normal = morph.parse(orig_word)[0].normal_form
replacement_found = False
for corrected_word in corrected_words:
corrected_parse = morph.parse(corrected_word)[0]
corrected_normal = corrected_parse.normal_form
corrected_nomn = corrected_parse.inflect({'nomn'})
corrected_nomn_word = corrected_nomn.word if corrected_nomn else None
if orig_normal == corrected_normal:
final_words.append(corrected_word)
replacement_found = True
break
elif corrected_nomn_word == orig_normal:
final_words.append(corrected_word)
replacement_found = True
break
elif corrected_normal in orig_word or orig_word in corrected_normal:
final_words.append(corrected_word)
replacement_found = True
break
if not replacement_found:
final_words.append(orig_word)
return ' '.join(final_words)