Module stylotool.src.freestylo.AlliterationAnnotation
Classes
class AlliterationAnnotation (text: freestylo.TextObject.TextObject, max_skip=2, min_length=3, skip_tokens=['.', ',', ':', ';', '!', '?', '…', '(', ')', '[', ']', '{', '}', '„', '“', '‚', '‘:', '‘', '’'])
-
This class is used to find alliterations candidates in a text. It uses the TextObject class to store the text and its annotations.
Parameters
text
:TextObject
- The text to be analyzed.
max_skip
:int
, optionalmin_length
:int
, optionalskip_tokens
:list
, optional- A list of tokens that should be skipped when looking for alliterations.
Expand source code
class AlliterationAnnotation: """ This class is used to find alliterations candidates in a text. It uses the TextObject class to store the text and its annotations. """ def __init__(self, text : TextObject, max_skip = 2, min_length=3, skip_tokens=[".", ",", ":", ";", "!", "?", "…", "(", ")", "[", "]", "{", "}", "„", "“", "‚", "‘:", "‘", "’"]): """ Parameters ---------- text : TextObject The text to be analyzed. max_skip : int, optional min_length : int, optional skip_tokens : list, optional A list of tokens that should be skipped when looking for alliterations. """ self.text = text self.candidates = [] self.max_skip = max_skip self.min_length = min_length self.skip_tokens = skip_tokens def find_candidates(self): """ This method finds alliteration candidates in the text. """ tokens = self.text.tokens open_candidates = {} i = 0 for i in range(len(tokens)): token = tokens[i] token_char = token[0].lower() # check if there is an alliteration candidate with the current character if not token_char.isalpha(): continue # if not, create a new one if token_char not in open_candidates: open_candidates[token_char] = [AlliterationCandidate([i], token_char), 0] continue # if yes, add the current token to the candidate candidate = open_candidates[token_char][0] candidate.ids.append(i) # close candidates keys_to_delete = [] for key in open_candidates: candidate_pair = open_candidates[key] candidate = candidate_pair[0] if token_char in self.skip_tokens: candidate_pair[1] += 1 if i - candidate.ids[-1] >= self.max_skip+1+candidate_pair[1]: if len(candidate.ids) > self.min_length: self.candidates.append(candidate) keys_to_delete.append(key) for key_del in keys_to_delete: del open_candidates[key_del] # get the remaining ones for key in open_candidates: candidate = open_candidates[key][0] if len(candidate.ids) > self.min_length: self.candidates.append(candidate) def serialize(self) -> list: """ This method serializes the alliteration candidates into a list of dictionaries. Returns ------- list A list of dictionaries containing the ids, length and character of the alliteration candidates. """ candidates = [] for c in self.candidates: candidates.append({ "ids": c.ids, "length": c.length, "char": c.char}) return candidates
Methods
def find_candidates(self)
-
This method finds alliteration candidates in the text.
def serialize(self) ‑> list
-
This method serializes the alliteration candidates into a list of dictionaries.
Returns
list
- A list of dictionaries containing the ids, length and character of the alliteration candidates.
class AlliterationCandidate (ids, char)
-
This class represents an alliteration candidate.
Parameters
ids
:list
- A list of token ids that form the alliteration candidate.
char
:str
- The character that the candidate starts with.
Expand source code
class AlliterationCandidate(): """ This class represents an alliteration candidate. """ def __init__(self, ids, char): """ Parameters ---------- ids : list A list of token ids that form the alliteration candidate. char : str The character that the candidate starts with. """ self.ids = ids self.char = char @property def score(self): """ This property returns the score of the alliteration candidate. """ return len(self.ids) @property def length(self): """ This property returns the length of the alliteration candidate. """ return len(self.ids)
Instance variables
prop length
-
This property returns the length of the alliteration candidate.
Expand source code
@property def length(self): """ This property returns the length of the alliteration candidate. """ return len(self.ids)
prop score
-
This property returns the score of the alliteration candidate.
Expand source code
@property def score(self): """ This property returns the score of the alliteration candidate. """ return len(self.ids)