Module stylotool.src.freestylo.PolysyndetonAnnotation

Classes

class PolysyndetonAnnotation (text: freestylo.TextObject.TextObject, min_length=2, conj=['and', 'or', 'but', 'nor'], sentence_end_tokens=['.', '?', '!', ':', ';', '...'], punct_pos='PUNCT')

This class is used to find polysyndeton candidates in a text. It uses the TextObject class to store the text and its annotations.

Constructor for the PolysyndetonAnnotation class.

Parameters

text : TextObject
The text to be analyzed.
min_length : int, optional
The minimum length of the polysyndeton candidates.
conj : list, optional
A list of conjunctions that should be considered when looking for polysyndeton.
sentence_end_tokens : list, optional
A list of tokens that indicate the end of a sentence.
punct_pos : str, optional
The part of speech tag for punctuation.
Expand source code
class PolysyndetonAnnotation:
    """
    This class is used to find polysyndeton candidates in a text.
    It uses the TextObject class to store the text and its annotations.
    """
    def __init__(self, text : TextObject, min_length=2, conj = ["and", "or", "but", "nor"], sentence_end_tokens=[".", "?", "!", ":", ";", "..."], punct_pos="PUNCT"):
        """
        Constructor for the PolysyndetonAnnotation class.

        Parameters
        ----------
        text : TextObject
            The text to be analyzed.
        min_length : int, optional
            The minimum length of the polysyndeton candidates.
        conj : list, optional
            A list of conjunctions that should be considered when looking for polysyndeton.
        sentence_end_tokens : list, optional
            A list of tokens that indicate the end of a sentence.
        punct_pos : str, optional
            The part of speech tag for punctuation.
        """

        self.text = text
        self.candidates = []
        self.min_length = min_length
        self.conj = conj
        self.sentence_end_tokens = sentence_end_tokens
        self.punct_pos = punct_pos

    def split_in_phrases(self):
        """
        This method splits the text into phrases.

        Returns
        -------
        list
            A list of lists, each containing the start and end index of a phrase.
        """
        
        phrases_in_sentences = []
        phrases = []
        current_sentence_start = 0
        current_phrase_start = 0
        for i, token in enumerate(self.text.tokens):
            if token in self.sentence_end_tokens:
                phrases.append([current_phrase_start, i])
                current_phrase_start = i+1
                current_sentence_start = i+1
                phrases_in_sentences.append(phrases)
                phrases = []
            elif token in self.conj and i-current_phrase_start > 1:
                phrases.append([current_phrase_start, i])
                current_phrase_start = i
        return phrases_in_sentences

    def check_add_candidate(self, candidates, candidate):
        """
        This method checks if the candidate is long enough to be a polysyndeton candidate.

        Parameters
        ----------
        candidates : list
            A list of polysyndeton candidates.
        """
        if len(candidate.ids) >= self.min_length:
            candidates.append(candidate)
        return candidates



    def find_candidates(self):
        """
        This method finds polysyndeton candidates in the text.
        """
        candidates = []
        sentences = self.split_in_phrases()
        for sentence in sentences:
            current_candidate = PolysyndetonCandidate([], "")
            current_word = ""
            for phrase in sentence:
                word = self.text.tokens[phrase[0]]
                if word != current_candidate.word:
                    candidates = self.check_add_candidate(candidates, current_candidate)
                    current_candidate = PolysyndetonCandidate([phrase], word)
                else:
                    current_candidate.ids.append(phrase)
            candidates = self.check_add_candidate(candidates, current_candidate)

        self.candidates = []
        for candidate in candidates:
            if candidate.word in self.conj:
                self.candidates.append(candidate)


    def serialize(self) -> list:
        """
        This method serializes the polysyndeton candidates.

        Returns
        -------
        list
            A list of dictionaries, each containing the ids, word, and score of a polysyndeton candidate.
        """
        candidates = []
        for c in self.candidates:
            candidates.append({
                "ids": c.ids,
                "score": c.score,
                "word": c.word})
        return candidates

Methods

def check_add_candidate(self, candidates, candidate)

This method checks if the candidate is long enough to be a polysyndeton candidate.

Parameters

candidates : list
A list of polysyndeton candidates.
def find_candidates(self)

This method finds polysyndeton candidates in the text.

def serialize(self) ‑> list

This method serializes the polysyndeton candidates.

Returns

list
A list of dictionaries, each containing the ids, word, and score of a polysyndeton candidate.
def split_in_phrases(self)

This method splits the text into phrases.

Returns

list
A list of lists, each containing the start and end index of a phrase.
class PolysyndetonCandidate (ids, word)

This class represents a polysyndeton candidate.

Constructor for the PolysyndetonCandidate class.

Parameters

ids : list
A list of token ids that form the candidate.
word : str
The word that the candidate ends with.
Expand source code
class PolysyndetonCandidate():
    """
    This class represents a polysyndeton candidate.
    """
    def __init__(self, ids, word):
        """
        Constructor for the PolysyndetonCandidate class.

        Parameters
        ----------
        ids : list
            A list of token ids that form the candidate.
        word : str
            The word that the candidate ends with.
        """
        self.ids = ids
        self.word = word

    @property
    def score(self):
        """
        This property returns the score of the polysyndeton candidate.
        """
        return len(self.ids)

Instance variables

prop score

This property returns the score of the polysyndeton candidate.

Expand source code
@property
def score(self):
    """
    This property returns the score of the polysyndeton candidate.
    """
    return len(self.ids)