constants module¶
-
mavis.validate.constants.
DEFAULTS
= WeakMavisNamespace(_defns={'aligner': 'the aligner to use to map the contigs/reads back to the reference e.g blat or bwa', 'assembly_include_flanking_pairs': 'if true then when the split reads are assembled, any flanking read pairs will also be added', 'assembly_include_half_mapped_reads': 'if true then when the split reads are assembled, any half-mapped read mates will also be added', 'assembly_max_kmer_size': 'the minimum between this and the smallest length input sequence is used as the kmer size for assembling the DeBruijn Graph. If this is not set (any value less than 0 is considered not set) the default is the 75%% of the minimum length input sequence', 'assembly_max_kmer_strict': 'If True then any sequences input to the assembly algorithm that cannot create a kmer of this size will be discarded. If False, then the kmer size will be reduced to the minimum input and all input sequences will be used in the assembly algorithm', 'assembly_max_paths': 'the maximum number of paths to resolve. This is used to limit when there is a messy assembly graph to resolve. The assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater than the given setting.', 'assembly_min_uniq': 'Minimum percent uniq required to keep separate assembled contigs. If contigs are more similar then the lower scoring, then shorter, contig is dropped', 'assembly_min_edge_weight': 'Discards all edges with a weight/frequency less than this from the DeBruijn graph', 'assembly_min_exact_match_to_remap': 'The minimum length of exact matches to initiate remapping a read to a contig', 'assembly_min_nc_edge_weight': 'Discards all non-cutting edges with a weight/frequency less than this from the DeBruijn graph', 'assembly_min_remap_coverage': 'Minimum fraction of the contig sequence which the remapped sequences must align over', 'assembly_min_remapped_seq': 'The minimum input sequences that must remap for an assembled contig to be used', 'assembly_min_tgt_to_exclude_half_map': 'The minimum number of split reads aligning to both breakpoints in order to exclude half-mapped reads from the assembly input', 'assembly_strand_concordance': 'When the number of remapped reads from each strand are compared, the ratio must be above this number to decide on the strand', 'blat_min_identity': 'The minimum percent identity match required for blat results when aligning contigs', 'blat_limit_top_aln': 'Number of results to return from blat (ranking based on score)', 'call_error': 'buffer zone for the evidence window', 'contig_aln_max_event_size': 'relates to determining breakpoints when pairing contig alignments. For any given read in a putative pair the soft clipping is extended to include any events of greater than this size. The softclipping is added to the side of the alignment as indicated by the breakpoint we are assigning pairs to', 'contig_aln_merge_inner_anchor': 'the minimum number of consecutive exact match base pairs to not merge events within a contig alignment', 'contig_aln_merge_outer_anchor': 'minimum consecutively aligned exact matches to anchor an end for merging internal events', 'contig_aln_min_anchor_size': 'the minimum number of aligned bases for a contig (M or =) in order to simplify. Do not have to be consecutive.', 'contig_aln_min_query_consumption': 'minimum fraction of the original query sequence that must be used by the read(s) of the alignment', 'contig_aln_min_extend_overlap': 'minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment', 'contig_aln_min_score': 'minimum score for a contig to be used as evidence in a call by contig', 'fetch_min_bin_size': 'the minimum size of any bin for reading from a bam file. Increasing this number will result in smaller bins being merged or less bins being created (depending on the fetch method)', 'fetch_reads_bins': 'number of bins to split an evidence window into to ensure more even sampling of high coverage regions', 'fetch_reads_limit': 'maximum number of reads, cap, to loop over for any given evidence window', 'trans_fetch_reads_limit': 'Related to :term:`fetch_reads_limit`. Overrides fetch_reads_limit for transcriptome libraries when set. If this has a value of None then fetch_reads_limit will be used for transcriptome libraries instead', 'filter_secondary_alignments': 'filter secondary alignments when gathering read evidence', 'fuzzy_mismatch_number': 'The number of events/mismatches allowed to be considered a fuzzy match', 'max_sc_preceeding_anchor': 'when remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of where we expect it. For example for a softclipped read on a breakpoint with a left orientation this limits the amount of softclipping that is allowed on the right. If this is set to None then there is no limit on softclipping', 'min_anchor_exact': 'Applies to re-aligning softclipped reads to the opposing breakpoint. The minimum number of consecutive exact matches to anchor a read to initiate targeted realignment', 'min_anchor_fuzzy': 'Applies to re-aligning softclipped reads to the opposing breakpoint. The minimum length of a fuzzy match to anchor a read to initiate targeted realignment', 'min_anchor_match': 'Minimum percent match for a read to be kept as evidence', 'min_double_aligned_to_estimate_insertion_size': 'The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping', 'min_flanking_pairs_resolution': 'the minimum number of flanking reads required to call a breakpoint by flanking evidence', 'min_linking_split_reads': 'The minimum number of split reads which aligned to both breakpoints', 'min_mapping_quality': 'the minimum mapping quality of reads to be used as evidence', 'trans_min_mapping_quality': 'Related to :term:`min_mapping_quality`. Overrides the min_mapping_quality if the library is a transcriptome and this is set to any number not None. If this value is None, min_mapping_quality is used for transcriptomes aswell as genomes', 'min_non_target_aligned_split_reads': 'The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local alignment to the target region to call a breakpoint by split read evidence', 'min_sample_size_to_apply_percentage': 'Minimum number of aligned bases to compute a match percent. If there are less than this number of aligned bases (match or mismatch) the percent comparator is not used', 'min_softclipping': 'minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence', 'min_spanning_reads_resolution': 'Minimum number of spanning reads required to call an event by spanning evidence', 'min_splits_reads_resolution': 'minimum number of split reads required to call a breakpoint by split reads', 'stdev_count_abnormal': 'the number of standard deviations away from the normal considered expected and therefore not qualifying as flanking reads', 'strand_determining_read': '1 or 2. The read in the pair which determines if (assuming a stranded protocol) the first or second read in the pair matches the strand sequenced', 'outer_window_min_event_size': 'the minimum size of an event in order for flanking read evidence to be collected', 'write_evidence_files': 'write the intermediate bam and bed files containing the raw evidence collected and contigs aligned. Not required for subsequent steps but can be useful in debugging and deep investigation of events', 'clean_aligner_files': 'Remove the aligner output files after the validation stage is complete. Not required for subsequent steps but can be useful in debugging and deep investigation of events'}, _types={'aligner': MavisNamespace(BLAT='blat', BWA_MEM='bwa mem', __name__='~mavis.align.SUPPORTED_ALIGNER', _defns={}, _types={'BWA_MEM': <class 'str'>, 'BLAT': <class 'str'>}), 'assembly_include_flanking_pairs': <function cast_boolean>, 'assembly_include_half_mapped_reads': <function cast_boolean>, 'assembly_max_kmer_size': <class 'int'>, 'assembly_max_kmer_strict': <function cast_boolean>, 'assembly_max_paths': <class 'int'>, 'assembly_min_uniq': <function float_fraction>, 'assembly_min_edge_weight': <class 'int'>, 'assembly_min_exact_match_to_remap': <class 'int'>, 'assembly_min_nc_edge_weight': <class 'int'>, 'assembly_min_remap_coverage': <function float_fraction>, 'assembly_min_remapped_seq': <class 'int'>, 'assembly_min_tgt_to_exclude_half_map': <class 'int'>, 'assembly_strand_concordance': <function float_fraction>, 'blat_min_identity': <function float_fraction>, 'blat_limit_top_aln': <class 'int'>, 'call_error': <class 'int'>, 'contig_aln_max_event_size': <class 'int'>, 'contig_aln_merge_inner_anchor': <class 'int'>, 'contig_aln_merge_outer_anchor': <class 'int'>, 'contig_aln_min_anchor_size': <class 'int'>, 'contig_aln_min_query_consumption': <function float_fraction>, 'contig_aln_min_extend_overlap': <class 'int'>, 'contig_aln_min_score': <function float_fraction>, 'fetch_min_bin_size': <class 'int'>, 'fetch_reads_bins': <class 'int'>, 'fetch_reads_limit': <class 'int'>, 'trans_fetch_reads_limit': <function nullable_int>, 'filter_secondary_alignments': <function cast_boolean>, 'fuzzy_mismatch_number': <class 'int'>, 'max_sc_preceeding_anchor': <class 'int'>, 'min_anchor_exact': <class 'int'>, 'min_anchor_fuzzy': <class 'int'>, 'min_anchor_match': <function float_fraction>, 'min_double_aligned_to_estimate_insertion_size': <class 'int'>, 'min_flanking_pairs_resolution': <class 'int'>, 'min_linking_split_reads': <class 'int'>, 'min_mapping_quality': <class 'int'>, 'trans_min_mapping_quality': <function nullable_int>, 'min_non_target_aligned_split_reads': <class 'int'>, 'min_sample_size_to_apply_percentage': <class 'int'>, 'min_softclipping': <class 'int'>, 'min_spanning_reads_resolution': <class 'int'>, 'min_splits_reads_resolution': <class 'int'>, 'stdev_count_abnormal': <class 'float'>, 'strand_determining_read': <class 'int'>, 'outer_window_min_event_size': <class 'int'>, 'write_evidence_files': <function cast_boolean>, 'clean_aligner_files': <function cast_boolean>}, aligner='blat', assembly_include_flanking_pairs=True, assembly_include_half_mapped_reads=True, assembly_max_kmer_size=-1, assembly_max_kmer_strict=True, assembly_max_paths=8, assembly_min_edge_weight=2, assembly_min_exact_match_to_remap=15, assembly_min_nc_edge_weight=3, assembly_min_remap_coverage=0.9, assembly_min_remapped_seq=3, assembly_min_tgt_to_exclude_half_map=7, assembly_min_uniq=0.1, assembly_strand_concordance=0.51, blat_limit_top_aln=10, blat_min_identity=0.9, call_error=10, clean_aligner_files=False, contig_aln_max_event_size=50, contig_aln_merge_inner_anchor=20, contig_aln_merge_outer_anchor=15, contig_aln_min_anchor_size=50, contig_aln_min_extend_overlap=10, contig_aln_min_query_consumption=0.9, contig_aln_min_score=0.9, fetch_min_bin_size=50, fetch_reads_bins=5, fetch_reads_limit=3000, filter_secondary_alignments=True, fuzzy_mismatch_number=1, max_sc_preceeding_anchor=6, min_anchor_exact=6, min_anchor_fuzzy=10, min_anchor_match=0.9, min_double_aligned_to_estimate_insertion_size=2, min_flanking_pairs_resolution=10, min_linking_split_reads=2, min_mapping_quality=5, min_non_target_aligned_split_reads=1, min_sample_size_to_apply_percentage=10, min_softclipping=6, min_spanning_reads_resolution=5, min_splits_reads_resolution=3, outer_window_min_event_size=125, stdev_count_abnormal=3.0, strand_determining_read=2, trans_fetch_reads_limit=12000, trans_min_mapping_quality=1, write_evidence_files=True)¶ - aligner
- assembly_include_flanking_pairs
- assembly_include_half_mapped_reads
- assembly_max_kmer_size
- assembly_max_kmer_strict
- assembly_max_paths
- assembly_min_edge_weight
- assembly_min_exact_match_to_remap
- assembly_min_nc_edge_weight
- assembly_min_remap_coverage
- assembly_min_remapped_seq
- assembly_min_tgt_to_exclude_half_map
- assembly_min_uniq
- assembly_strand_concordance
- blat_limit_top_aln
- blat_min_identity
- call_error
- contig_aln_max_event_size
- contig_aln_merge_inner_anchor
- contig_aln_merge_outer_anchor
- contig_aln_min_anchor_size
- contig_aln_min_extend_overlap
- contig_aln_min_query_consumption
- contig_aln_min_score
- fetch_min_bin_size
- fetch_reads_bins
- fetch_reads_limit
- filter_secondary_alignments
- fuzzy_mismatch_number
- max_sc_preceeding_anchor
- min_anchor_exact
- min_anchor_fuzzy
- min_anchor_match
- min_double_aligned_to_estimate_insertion_size
- min_flanking_pairs_resolution
- min_linking_split_reads
- min_mapping_quality
- min_non_target_aligned_split_reads
- min_sample_size_to_apply_percentage
- min_softclipping
- min_spanning_reads_resolution
- min_splits_reads_resolution
- outer_window_min_event_size
- stdev_count_abnormal
- strand_determining_read