Coverage for lingpy/convert/strings.py : 99%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# *-* coding: utf-8 *-* Basic functions for the conversion of Python-internal data into strings. """
""" Convert a scoring function to a string. """
# get sorted representation of characters
# write stuff to string
_arange='{stamp}{comment}\n{meta}{comment}\n{body}', merge=False): """ Function converts an MSA object into a string. """
else:
""" Merge a given string according to the merging dictionary m. """
# define global vars for alignment and taxa for convenience else:
# add merge in output as feature except:
else: else:
# if wordlist ist set to True, don't write the header line and put the # after comment # get formatter
# get formatter else: # get formatter
else:
else:
else:
str(i + 1) for i in range(alm_len) ])
else: else: else:
else:
stamp=stamp, meta=meta, body=body, comment=comment )
matrix, taxa=None, stamp='', filename='', taxlen=10, comment='#' ): """ Convert matrix to dst-format.
Parameters ---------- taxa : {None, list} List of taxon names corresponding to the distances. Make sure that you only use alphanumeric characters and the understroke for assigning the taxon names. Especially avoid the usage of brackets, since this will confuse many phylogenetic programs. stamp : str (default='') Convenience stamp passed as a comment that can be used to indicate how the matrix was created. filename : str If you specify a filename, the data will be written to file. taxlen : int (default=10) Indicate how long the taxon names are allowed to be. The Phylip package only allows taxon names consisting of maximally 10 characters. Other packages, however, allow more. If Phylip compatibility is not important for you and you just want to allow for as long taxon names as possible, set this value to 0. comment : str (default = '#') The comment character to be used when adding additional information in the "stamp".
Returns ------- output : {str or file} Depending on your settings, this function returns a string in DST (=Phylip) format, or a file containing the string.
"""
# check for zero-taxlen else:
matrix[i]]) else: util.write_text_file(filename + '.dst', out)
taxa, paps, missing=0, filename='', datatype='STANDARD' ): """ Function converts a list of paps into nexus file format.
Parameters ---------- taxa : list List of taxa. paps : {list, dict} A two-dimensional list with the first dimension being identical to the number of taxa and the second dimension being identical to the number of paps. If a dictionary is passed, each key represents a given pap. The following two structures will thus be treated identically::
>>> paps = [[1,0],[1,0],[1,0]] # two languages, three paps >>> paps = {1:[1,0], 2:[1,0], 3:[1,0]} # two languages, three paps
missing : {str, int} (default=0) Indicate how missing characters are represented in the original data.
"""
# get longest taxon
# check whether paps are dict or list else:
# create reference # create the matrix
len(taxa), len(paps), missing, matrix, ref_string, datatype ) filename + '.nex', out.format(len(taxa), len(paps), missing, matrix, ref_string, datatype))
taxa, paps, filename='' ): """ Write paps created by the Wordlist class to a csv-file. """
key, '\t'.join(str(i) for i in paps[key]) )
""" Convert the data in a given wordlist to NEXUS-format for multistate analyses in PAUP.
Parameters ---------- taxa : list The list of taxa that shall be written to file. matrix : list The multi-state matrix with the first dimension indicating the taxa, and the second their states. filename : str (default="") If not specified, the filename of the Wordlist will be taken, otherwise, it specifies the name of the file to which the data will be written. """
# set up the nexus template
BEGIN DATA; DIMENSIONS ntax={ntax} NCHAR={nchar}; FORMAT RESPECTCASE DATATYPE=STANDARD symbols="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP0123456789" GAP=? MISSING={missing} interleave=yes; OPTIONS MSTAXA = POLYMORPH;
MATRIX
{matrix}
END; """
# calculate maximal length of taxon strings
# calculate the matrix-text in the nexus template
filename, nexus.format( ntax=len(taxa), nchar=len(matrix[0]), matrix=matrix_text, missing=missing ) ) else: raise ValueError("[!] A wrong filename was specified!")
wordlist, mode='mrbayes', filename="mrbayes.nex", ref="cogid", missing="?", gap="-", custom=None, custom_name='lingpy', commands=None, commands_name="mrbayes"): """Write a nexus file for phylogenetic analyses.
Parameters ---------- wordlist : lingpy.basic.wordlist.Wordlist A Wordlist object containing cognate IDs. mode : str (default="mrbayes") The name of the output nexus style. Valid values are: * 'MRBAYES': a MrBayes formatted nexus file * 'BEAST': a BEAST formatted nexus file * 'BEASTWORDS': a BEAST-formatted nexus for word-partitioned analyses. filename : str (default=None) Name of the file to which the nexus file will be written. If set to c{None}, then this function will not write the nexus ontent to a file, but simply return the content as a string. ref: str (default="cogid") Column in which you store the cognate sets in your data. gap : str (default="-") The symbol for gaps (not relevant for linguistic analyses). missing : str (default="?") The symbol for missing characters. custom : list {default=None) This information allows to add custom information to the nexus file, like, for example, the structure of the characters, their original concept, or their type, and it will be written into a custom block in the nexus file. The name of the custom block can be specified with help of the `custom_name` keyword. The content is a list of strings which will be written line by line into the custom block. custom_name : str (default="lingpy") The name of the custom block which will be written to the file. commands : list (default=None) If specified, will write an additional block containing commands for phylogenetic software. The commands are passed as a list, containing strings. The name of the block is given by the keywords commands_name. commands_name : str (default="mrbayes") Determines how the block will be called to which the commands will be written.
Returns ------- nexus : str A string containing nexus file output """
# check for valid mode
# check for valid template else: # pragma: no cover raise IOError("Unknown template %s" % template)
# check that `ref` is a valid column
# commands
# retrieve the matrix x[0] for x in vals if x][0]][wordlist._rowIdx]) for (cogid, vals) in etd.items()], key=lambda x: x[1]) # and missing data.. col=t, entry=wordlist._row_name, flat=True)] for t in wordlist.cols}
# add ascertainment character for mode=BEAST # fill matrix # add ascertainment character for mode=BEASTWORDS. Note that if # a given word:language is missing, then its ascertainment # character is the `missing` character. missing_[t] else [missing]
# parse characters into `charsets` (a dict of word=>siteindex positions), # and `chars` (a list of characters). # add label for ascertainment character in BEAST mode # add label for per-word ascertainment characters in BEASTWORDS # finally add label.
# create character labels block if needed else:
# create charsets block c, min(m), max(m)) for (c, m) in charsets.items() ]
# convert state matrix to string. '({0})'.format(c) if len(c) > 1 else str(c) for c in m ]) + '\n'
# TODO: symbols could be more than "01" but we this function doesn't handle # multistate data so we just specify them here.
matrix=_matrix, ntax=wordlist.width, nchar=len(matrix[0]), gap=gap, missing=missing, dtype='RESTRICTION' if mode == 'MRBAYES' else 'STANDARD', commands=_commands, custom=_custom, symbols=symbols, chars=charblock ) |