Source code for src.algorithms.TextComparator

# importing standard libraries
import string

# importing external libraries
import nltk

nltk.download('stopwords')
from nltk.corpus import stopwords

# getting stopwords
stop_words = stopwords.words('russian')
# getting punctuation marks
punctuation_marks = string.punctuation


# a class for working with texts

[docs]
class TextComparator:


[docs]
    @staticmethod
    def count_percent_of_transformed_words(input_text_before_changing, input_text_after_changing):
        global stop_words, punctuation_marks

        if type(input_text_before_changing) != str or type(input_text_after_changing) != str:
            raise TypeError("Input must be a strings")

        # getting all the words of the text
        tokens = []
        for sentence in input_text_before_changing, input_text_after_changing:
            split_text = sentence.lower().split()
            words_of_text = []
            for part_of_text in split_text:
                if part_of_text not in stop_words:
                    part_of_text_without_punctuation_marks = ''
                    for p in part_of_text:
                        if p not in punctuation_marks:
                            part_of_text_without_punctuation_marks += p
                    words_of_text.append(part_of_text_without_punctuation_marks)
            tokens.append(words_of_text)

        words_of_text_before_changing = tokens[0]
        words_of_text_after_changing = tokens[1]

        # criterion 1: number of added words
        count_of_added_words = 0
        for word in words_of_text_after_changing:
            if word not in words_of_text_before_changing:
                count_of_added_words += 1

        # criterion 2: number of deleted words
        count_of_deleted_words = 0
        for word in words_of_text_before_changing:
            if word not in words_of_text_after_changing:
                count_of_deleted_words += 1

        return round(((count_of_added_words + count_of_deleted_words) / (
                len(words_of_text_after_changing) + len(words_of_text_before_changing))) * 100, 2)
Source code for src.algorithms.TextComparator

Edulytica

Navigation

Related Topics