Source code for pytransit.analysis.rankproduct

import sys

try:
    import wx
    WX_VERSION = int(wx.version()[0])
    hasWx = True

except Exception as e:
    hasWx = False
    WX_VERSION = 0

if hasWx:
    import wx.xrc
    from wx.lib.buttons import GenBitmapTextButton
    from pubsub import pub
    import wx.adv


import os
import time
import math
import random
import numpy
import scipy.stats
import datetime


from pytransit.analysis import base
import pytransit.transit_tools as transit_tools
import pytransit.tnseq_tools as tnseq_tools
import pytransit.norm_tools as norm_tools
import pytransit.stat_tools as stat_tools



############# GUI ELEMENTS ##################

short_name = "rankproduct"
long_name = "Rank Product"
short_desc = "Rank Product test for determining conditional essentiality."
long_desc = "Differential Comparison based on ranks"
transposons = ["himar1", "tn5"]
columns = ["Orf","Name","Desc","Sites","Mean Ctrl","Mean Exp","log2FC","Obs RP","Expected RP","q-value"]



############# Analysis Method ##############

[docs]class RankProductAnalysis(base.TransitAnalysis): def __init__(self): base.TransitAnalysis.__init__(self, short_name, long_name, short_desc, long_desc, transposons, RankProductMethod, RankProductGUI, [RankProductFile])
################## FILE ###################
[docs]class RankProductFile(base.TransitFile): def __init__(self): base.TransitFile.__init__(self, "#RankProduct", columns)
############# GUI ##################
[docs]class RankProductGUI(base.AnalysisGUI):
[docs] def definePanel(self, wxobj): self.wxobj = wxobj rankproductPanel = wx.Panel( self.wxobj.optionsWindow, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.TAB_TRAVERSAL ) rankproductSizer = wx.BoxSizer( wx.VERTICAL ) rankproductLabel = wx.StaticText( rankproductPanel, wx.ID_ANY, u"rankproduct Options", wx.DefaultPosition, (160,-1), 0 ) rankproductLabel.SetFont( wx.Font( 10, wx.DEFAULT, wx.NORMAL, wx.BOLD) ) rankproductSizer.Add( rankproductLabel, 0, wx.ALL|wx.ALIGN_CENTER_HORIZONTAL, 5 ) rankproductLabelSizer = wx.BoxSizer( wx.VERTICAL ) mainSizer1 = wx.BoxSizer( wx.VERTICAL ) # SAMPLES (rankproductSampleLabel, self.wxobj.rankproductSampleText, sampleSizer) = self.defineTextBox(rankproductPanel, u"Samples:", u"10000", "Number of samples to take when estimating the theoretical rankproduct distribution. Larger samples give more accurate estimates at the cost of computation time.") mainSizer1.Add(sampleSizer, 1, wx.EXPAND, 5 ) # NORMALIZATION # Norm rankproductNormChoiceChoices = [ u"TTR", u"nzmean", u"totreads", u'zinfnb', u'quantile', u"betageom", u"nonorm" ] (rankproductNormLabel, self.wxobj.rankproductNormChoice, normSizer) = self.defineChoiceBox(rankproductPanel, u"Normalization:", rankproductNormChoiceChoices, "Choice of normalization method. The default choice, 'TTR', normalizes datasets to have the same expected count (while not being sensative to outliers). Read documentation for a description other methods. ") mainSizer1.Add(normSizer, 1, wx.EXPAND, 5 ) rankproductSizer.Add( mainSizer1, 1, wx.EXPAND, 5 ) rankproductButton = wx.Button( rankproductPanel, wx.ID_ANY, u"Run rankproduct", wx.DefaultPosition, wx.DefaultSize, 0 ) rankproductSizer.Add( rankproductButton, 0, wx.ALL|wx.ALIGN_CENTER_HORIZONTAL, 5 ) rankproductPanel.SetSizer( rankproductSizer ) rankproductPanel.Layout() rankproductSizer.Fit( rankproductPanel ) #Connect events rankproductButton.Bind( wx.EVT_BUTTON, self.wxobj.RunMethod ) self.panel = rankproductPanel
########## CLASS #######################
[docs]class RankProductMethod(base.DualConditionMethod): """ rankproduct """ def __init__(self, ctrldata, expdata, annotation_path, output_file, normalization="TTR", samples=10000, adaptive=False, doHistogram=False, replicates="Sum", LOESS=False, ignoreCodon=True, NTerminus=0.0, CTerminus=0.0, wxobj=None): base.DualConditionMethod.__init__(self, short_name, long_name, short_desc, long_desc, ctrldata, expdata, annotation_path, output_file, normalization=normalization, replicates=replicates, LOESS=LOESS, NTerminus=NTerminus, CTerminus=CTerminus, wxobj=wxobj) self.samples = samples self.adaptive = adaptive self.doHistogram = doHistogram
[docs] @classmethod def fromGUI(self, wxobj): """ """ #Get Annotation file annotationPath = wxobj.annotation if not transit_tools.validate_annotation(annotationPath): return None #Get selected files ctrldata = wxobj.ctrlSelected() expdata = wxobj.expSelected() if not transit_tools.validate_both_datasets(ctrldata, expdata): return None #Validate transposon types if not transit_tools.validate_transposons_used(ctrldata+expdata, transposons): return None #Read the parameters from the wxPython widgets ignoreCodon = True samples = int(wxobj.rankproductSampleText.GetValue()) normalization = wxobj.rankproductNormChoice.GetString(wxobj.rankproductNormChoice.GetCurrentSelection()) replicates="Sum" adaptive = False doHistogram = False NTerminus = float(wxobj.globalNTerminusText.GetValue()) CTerminus = float(wxobj.globalCTerminusText.GetValue()) LOESS = False #Get output path defaultFileName = "rankproduct_output.dat" defaultDir = os.getcwd() output_path = wxobj.SaveFile(defaultDir, defaultFileName) if not output_path: return None output_file = open(output_path, "w") return self(ctrldata, expdata, annotationPath, output_file, normalization, samples, adaptive, doHistogram, replicates, LOESS, ignoreCodon, NTerminus, CTerminus, wxobj)
[docs] @classmethod def fromargs(self, rawargs): (args, kwargs) = transit_tools.cleanargs(rawargs) ctrldata = args[0].split(",") expdata = args[1].split(",") annotationPath = args[2] output_path = args[3] output_file = open(output_path, "w") normalization = kwargs.get("n", "TTR") samples = int(kwargs.get("s", 100)) adaptive = kwargs.get("a", False) doHistogram = kwargs.get("h", False) replicates = kwargs.get("r", "Sum") LOESS = kwargs.get("l", False) ignoreCodon = True NTerminus = float(kwargs.get("iN", 0.00)) CTerminus = float(kwargs.get("iC", 0.00)) return self(ctrldata, expdata, annotationPath, output_file, normalization, samples, adaptive, doHistogram, replicates, LOESS, ignoreCodon, NTerminus, CTerminus)
[docs] def Run(self): self.transit_message("Starting rankproduct Method") start_time = time.time() Kctrl = len(self.ctrldata) Kexp = len(self.expdata) #Get orf data self.transit_message("Getting Data") (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj) if self.normalization != "none": self.transit_message("Normalizing using: %s" % self.normalization) (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata+self.expdata, self.annotation_path) Gctrl= tnseq_tools.Genes(self.ctrldata + self.expdata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data[:Kctrl,:], position=position) Gexp= tnseq_tools.Genes(self.ctrldata + self.expdata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data[Kctrl:,:], position=position) Ngenes = len(Gctrl) # Get the average counts for all the genes, in each replicate meanCtrl = numpy.zeros((Kctrl, Ngenes)) meanExp = numpy.zeros((Kexp, Ngenes)) for i in range(Ngenes): if numpy.any(Gctrl[i].reads): meanCtrl[:,i] = numpy.mean(Gctrl[i].reads,1) else: meanCtrl[:,i] = numpy.zeros(Kctrl) # if numpy.any(Gexp[i].reads): meanExp[:,i] = numpy.mean(Gexp[i].reads,1) else: meanExp[:,i] = numpy.zeros(Kexp) # Calculate a logFC2 between Experimental and Control # Then calculates it's rank, and observed rankProduct logFC2 = numpy.log2((meanExp+0.0001)/(meanCtrl+0.0001)) rank = numpy.array([scipy.stats.rankdata(Lvec) for Lvec in logFC2]) obsRP = numpy.power(numpy.prod(rank,0), 1.0/Kctrl) permutations = numpy.zeros((self.samples, Ngenes)) tempranks = scipy.array([numpy.arange(1,Ngenes+1) for rep in range(Kctrl)]) for s in range(self.samples): rankperm = numpy.array([numpy.random.permutation(tr) for tr in tempranks]) permutations[s] = numpy.power(numpy.prod(rankperm,0), 1.0/Kctrl) rankRP = numpy.argsort(obsRP) + 1 #rankproduct data = [] count = 0 self.progress_range(Ngenes) for i,gene in enumerate(Gctrl): count+=1 meanctrl = numpy.mean(Gctrl[i].reads) meanexp = numpy.mean(Gexp[i].reads) log2fc = numpy.log2((meanexp+0.0001)/(meanctrl+0.0001)) countbetter = numpy.sum(permutations <= obsRP[i]) pval = countbetter/float(self.samples*Ngenes) e_val = countbetter/float(self.samples) q_paper = e_val/float(rankRP[i]) data.append([gene.orf, gene.name, gene.desc, gene.n, meanctrl, meanexp, log2fc, obsRP[i], e_val, q_paper, pval]) # Update Progress text = "Running rankproduct Method... %5.1f%%" % (100.0*count/Ngenes) self.progress_update(text, count) # self.transit_message("") # Printing empty line to flush stdout self.transit_message("Performing Benjamini-Hochberg Correction") data.sort() q_bh = stat_tools.BH_fdr_correction([row[-1] for row in data]) self.output.write("#RankProduct\n") if self.wxobj: members = sorted([attr for attr in dir(self) if not callable(getattr(self,attr)) and not attr.startswith("__")]) memberstr = "" for m in members: memberstr += "%s = %s, " % (m, getattr(self, m)) self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s\n" % (",".join(self.ctrldata).encode('utf-8'), self.annotation_path.encode('utf-8'), self.output.name.encode('utf-8'))) else: self.output.write("#Console: python3 %s\n" % " ".join(sys.argv)) self.output.write("#Data: %s\n" % (",".join(self.ctrldata).encode('utf-8'))) self.output.write("#Annotation path: %s\n" % self.annotation_path.encode('utf-8')) self.output.write("#Time: %s\n" % (time.time() - start_time)) self.output.write("#%s\n" % (columns)) for i,row in enumerate(data): (orf, name, desc, n, mean1, mean2, log2FCgene, obsRPgene, e_val, q_paper, pval) = row self.output.write("%s\t%s\t%s\t%d\t%1.1f\t%1.1f\t%1.2f\t%1.8f\t%1.1f\t%1.8f\n" % (orf, name, desc, n, mean1, mean2,log2FCgene, obsRPgene, e_val, q_paper)) self.output.close() self.transit_message("Adding File: %s" % (self.output.name)) self.add_file(filetype="RankProduct") self.finish() self.transit_message("Finished rankproduct Method")
[docs] @classmethod def usage_string(self): return """python3 %s rankproduct <comma-separated .wig control files> <comma-separated .wig experimental files> <annotation .prot_table or GFF3> <output file> [Optional Arguments] Optional Arguments: -s <integer> := Number of samples. Default: -s 100 -n <string> := Normalization method. Default: -n TTR -h := Output histogram of the permutations for each gene. Default: Turned Off. -a := Perform adaptive rankproduct. Default: Turned Off. -l := Perform LOESS Correction; Helps remove possible genomic position bias. Default: Turned Off. -iN <float> := Ignore TAs occuring at given fraction (as integer) of the N terminus. Default: -iN 0 -iC <float> := Ignore TAs occuring at given fraction (as integer) of the C terminus. Default: -iC 0 """ % (sys.argv[0])
if __name__ == "__main__": (args, kwargs) = transit_tools.cleanargs(sys.argv) #TODO: Figure out issue with inputs (transit requires initial method name, running as script does not !!!!) G = RankProductMethod.fromargs(sys.argv[1:]) G.console_message("Printing the member variables:") G.print_members() print("") print("Running:") G.Run()