Coverage for lingpy/algorithm/cython/_talign.py : 98%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# we start with basic alignment functions seqA, seqB, M, # length of seqA N, # length of seqB gop, scale, scorer ): """ Carry out global alignment of two sequences.
Parameters ---------- seqA, seqB : list The sequences to be aligned, passed as lists. M, N : int The length of the two sequences. gop : int The gap opening penalty. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences.
Returns ------- alignment : tuple The aligned sequences and the similarity score.
Notes ----- This algorithm carries out classical Needleman-Wunsch alignment (:evobib:`Needleman1970`).
See also -------- ~lingpy.algorithm.cython.talign.semi_globalign ~lingpy.algorithm.cython.talign.localign ~lingpy.algorithm.cython.talign.dialign
"""
# declare integers # [autouncomment] cdef int i,j
# declare floats # [autouncomment] cdef float gapA,gapB,match,sim
# declare lists
# create matrix and traceback
# modify matrix and traceback
# start the loop
# calculate costs for gapA else:
# calculate costs for gapB else:
# get the score
# determine minimal cost else:
# get the similarity
# carry out the traceback else:
# turn alignments back
# return alignments
seqA, seqB, M, # length of seqA N, # length of seqB gop, scale, scorer ): """ Carry out semi-global alignment of two sequences.
Parameters ---------- seqA, seqB : list The sequences to be aligned, passed as lists. M, N : int The length of the two sequences. gop : int The gap opening penalty. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences.
Returns ------- alignment : tuple The aligned sequences and the similarity score.
Notes ----- This algorithm carries out semi-global alignment (:evobib:`Durbin2002`).
See also -------- ~lingpy.algorithm.cython.talign.globalign ~lingpy.algorithm.cython.talign.localign ~lingpy.algorithm.cython.talign.dialign
"""
# declare integers # [autouncomment] cdef int i,j
# declare floats # [autouncomment] cdef float gapA,gapB,match,sim
# declare lists
# create matrix and traceback
# modify matrix and traceback
# start the loop
# calculate costs for gapA else:
# calculate costs for gapB else:
# calculate costs for match
# get the score
# determine minimal cost else:
# get the similarity
# carry out the traceback else:
# turn alignments back
# return alignments
seqA, seqB, M, # length of seqA N, # length of seqB gop, scale, scorer ): """ Carry out semi-global alignment of two sequences.
Parameters ---------- seqA, seqB : list The sequences to be aligned, passed as lists. M, N : int The length of the two sequences. gop : int The gap opening penalty. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences.
Returns ------- alignment : tuple The aligned sequences and the similarity score.
Notes ----- This algorithm carries out local alignment (:evobib:`Smith1981`).
See also -------- ~lingpy.algorithm.cython.talign.globalign ~lingpy.algorithm.cython.talign.semi_globalign ~lingpy.algorithm.cython.talign.dialign """
# declare integers # [autouncomment] cdef int i,j,k,l
# declare floats # [autouncomment] cdef float gapA,gapB,match,sim
# declare char-character # [autouncomment] cdef str x
# declare lists
# create matrix and traceback
# set similarity to zero
# start the loop
# calculate costs for gapA else:
# calculate costs for gapB else:
# calculate costs for match
# get the score
# determine minimal cost else:
# get the similarity
# reset i,j
# append stuff to almA and almB
# append empty seq for alms to almA and almB
almB[1] += [seqB[i-1]] i -= 1
else:
# revert the alms
# append the rest
# return alignments
seqA, seqB, M, # length of seqA N, # length of seqB scale, scorer ): """ Carry out dialign alignment of two sequences.
Parameters ---------- seqA, seqB : list The sequences to be aligned, passed as lists. M, N : int The length of the two sequences. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences.
Returns ------- alignment : tuple The aligned sequences and the similarity score.
Notes ----- This algorithm carries out dialign alignment (:evobib:`Morgenstern1996`).
See also -------- ~lingpy.algorithm.cython.talign.globalign ~lingpy.algorithm.cython.talign.semi_globalign ~lingpy.algorithm.cython.talign.localign """
# declare integers # [autouncomment] cdef int i,j,k,l,o,p
# declare floats # [autouncomment] cdef float gapA,gapB,match,sim,tmp_match
# declare lists
# create matrix and traceback
# modify matrix and traceback
# start the loop
# calculate costs for gapA
# calculate costs for gapB
# calculate costs for match # get temporary match
# determine minimal cost else:
# get the similarity
# carry out the traceback else:
# turn alignments back
# return alignments
seqA, seqB, gop, scale, scorer, mode, distance = 0 ): """ Align a pair of sequences.
Parameters ---------- seqA, seqB : list The sequences to be aligned, passed as lists. gop : int The gap opening penalty. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences. mode : { "global", "local", "overlap", "dialign" } Select the mode for the alignment analysis ("overlap" refers to semi-global alignments). distance : (default=0) Select whether you want distances or similarities to be returned (0 indicates similarities, 1 indicates distances, 2 indicates both).
Returns ------- alignment : tuple The aligned sequences and the similarity score.
Notes ----- This is a utility function that allows calls any of the four classical alignment functions (:py:class:`lingpy.algorithm.cython.talign.globalign` :py:class:`lingpy.algorithm.cython.talign.semi_globalign`, :py:class:`lingpy.algorithm.cython.talign.lotalign`, :py:class:`lingpy.algorithm.cython.talign.dialign`,) and their secondary counterparts.
See also -------- ~lingpy.algorithm.cython.talign.align_pairwise ~lingpy.algorithm.cython.talign.align_pairs
Returns ------- alignment : tuple The aligned sequences and the similarity or distance scores, or both.
""" # define basic types # [autouncomment] cdef int i # [autouncomment] cdef list almA,almB # [autouncomment] cdef float sim,dist,simA,simB
# get length of seqA,seqB
# determine the mode
# carry out the alignment seqA, seqB, M, N, gop, scale, scorer )
# carry out the alignment seqA, seqB, M, N, gop, scale, scorer )
# carry out the alignment seqA, seqB, M, N, gop, scale, scorer )
seqA, seqB, M, N, scale, scorer )
# calculate distance, if this is needed
else: else: return almA,almB,sim
seqs, gop, scale, scorer, mode ): """ Align all sequences pairwise.
Parameters ---------- seqs : list The sequences to be aligned, passed as lists. gop : int The gap opening penalty. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences. mode : { "global", "local", "overlap", "dialign" } Select the mode for the alignment analysis ("overlap" refers to semi-global alignments).
Returns ------- alignments : list A of tuples, containing the aligned sequences, the similarity and the distance scores.
Notes ----- This function aligns all possible pairs between the sequences you pass to it. It is important for multiple alignment, where it can be used to construct the guide tree.
See also -------- ~lingpy.algorithm.cython.talign.align_pair ~lingpy.algorithm.cython.talign.align_pairs """ # define basic stuff
# [autouncomment] cdef int i,j,k,lenA,lenB # [autouncomment] cdef list almA,almB,seqA,seqB # [autouncomment] cdef float sim,simA,simB,dist
# get self-scores
# start loop seqA, seqB, lenA, lenB, gop, scale, scorer )
# get the distance
# append it to list (almA,almB,sim,dist) ) (seqA,seqA,sims[i],0.0) )
# start loop
# check for secondary structures seqA, seqB, lenA, lenB, gop, scale, scorer )
# get the distance
# append it to list (almA,almB,sim,dist) ) (seqA,seqA,sims[i],0.0) )
# start loop
seqA, seqB, lenA, lenB, gop, scale, scorer )
# get the distance
# append it to list (almA,almB,sim,dist) ) (seqA,seqA,sims[i],0.0) )
# start loop
seqA, seqB, lenA, lenB, scale, scorer )
# get the distance
# append it to list (almA,almB,sim,dist) ) (seqA,seqA,sims[i],0.0) )
seqs, gop, scale, scorer, mode, distance = 0 ): """ Align multiple sequence pairs.
Parameters ---------- seqs : list The sequences to be aligned, passed as lists. gop : int The gap opening penalty. scale : float The gap extension scale. scorer : { dict, ~lingpy.algorithm.cython.misc.ScoreDict } The scoring dictionary containing scores for all possible segment combinations in the two sequences. mode : { "global", "local", "overlap", "dialign" } Select the mode for the alignment analysis ("overlap" refers to semi-global alignments). distance : (default=0) Indicate whether distances or similarities should be returned.
Returns ------- alignments : list A of tuples, containing the aligned sequences, and the similarity or the distance scores.
Notes ----- This function aligns all pairs which are passed to it.
See also -------- ~lingpy.algorithm.cython.talign.align_pair ~lingpy.algorithm.cython.talign.align_pairwise
""" # basic defs # [autouncomment] cdef int i,j,M,N,lP # [autouncomment] cdef list seqA,seqB,almA,almB # [autouncomment] cdef float sim
# get basic params
# check for restricted prostrings
# carry out alignments # get sequences
# get length of seqs
seqA, seqB, M, N, gop, scale, scorer ) seqA, seqB, M, N, gop, scale, scorer )
seqA, seqB, M, N, gop, scale, scorer )
seqA, seqB, M, N, scale, scorer )
# calculate distances if option is chose
else: alignments.append((almA,almB,sim,dist)) else:
# specific methods for the alignment of profiles profileA, profileB, gop, scale, scorer, mode, gap_weight ): """ Align two profiles using the basic modes.
Parameters ---------- profileA, profileB : list Two-dimensional for each of the profiles. gop : int The gap opening penalty. scale : float The gap extension scale by which consecutive gaps are reduced. LingPy uses a scale rather than a constant gap extension penalty. scorer : { dict, :py:class:`lingpy.algorithm.cython.misc.ScoreDict` } The scoring function which needs to provide scores for all segments in the two profiles. mode : { "global", "overlap", "dialign" } Select one of the four basic modes for alignment analyses. gap_weight : float This handles the weight that is given to gaps in a column. If you set it to 0, for example, this means that all gaps will be ignored when determining the score for two columns in the profile.
Notes ----- This function computes alignments of two profiles of multiple sequences (see :evobib:`Durbin2002` for details on profiles) and is important for multiple alignment analyses.
Returns ------- alignment : tuple The aligned profiles, and the overall similarity of the profiles.
See also -------- ~lingpy.algorithm.cython.talign.score_profile ~lingpy.algorithm.cython.talign.swap_score_profile """
# basic defs # [autouncomment] cdef int i,j,k,l,M,N,O,P # [autouncomment] cdef float sim,count # [autouncomment] cdef str charA,charB # [autouncomment] cdef list listA,listB,almA,almB
else:
listA, listB, M, N, gop, scale, tmp_scorer ) listA, listB, M, N, gop, scale, tmp_scorer ) listA, listB, M, N, scale, tmp_scorer )
colA, colB, scorer, gop, gap_weight = 0.0 ): """ Basic function for the scoring of profiles.
Parameters ---------- colA, colB : list The two columns of a profile. scorer : { dict, :py:class:`lingpy.algorithm.cython.misc.ScoreDict` } The scoring function which needs to provide scores for all segments in the two profiles. gap_weight : (default=0.0) This handles the weight that is given to gaps in a column. If you set it to 0, for example, this means that all gaps will be ignored when determining the score for two columns in the profile.
Notes ----- This function handles how profiles are scored.
Returns ------- score : float The score for the profile
See also -------- ~lingpy.algorithm.cython.talign.align_profile ~lingpy.algorithm.cython.talign.swap_score_profile """ # basic definitions # [autouncomment] cdef int i,j # [autouncomment] cdef str charA,charB
# define the initial score
# set a counter
# iterate over all chars else:
colA, colB, scorer, gap_weight = 0.0, swap_penalty = -1 ): """ Basic function for the scoring of profiles in swapped sequences.
Parameters ---------- colA, colB : list The two columns of a profile. scorer : { dict, :py:class:`lingpy.algorithm.cython.misc.ScoreDict` } The scoring function which needs to provide scores for all segments in the two profiles. gap_weight : (default=0.0) This handles the weight that is given to gaps in a column. If you set it to 0, for example, this means that all gaps will be ignored when determining the score for two columns in the profile. swap_penalty : (default=-5) The swap penalty applied to swapped columns.
Notes ----- This function handles how profiles with swapped segments are scored.
Returns ------- score : float The score for the profile.
See also -------- ~lingpy.algorithm.cython.talign.align_profile ~lingpy.algorithm.cython.talign.score_profile
""" # basic definitions # [autouncomment] cdef int i,j # [autouncomment] cdef str charA,charB
# define the initial score
# set a counter
# iterate over all chars score += 0.0 counter += 1.0 else: else:
|