Source code for fluxpyt.bootstrap

# -*- coding: utf-8 -*-
"""
Created on Wed Aug 23 12:06:00 2017

@author: Trunil
"""

import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd



[docs]def bootstrap(**kws): ''' inputs: kws: rxnList: Optional. List of reactions for which bootstrapping is to be done. It should be in form of python list ylim = Optional. It should be a tuple. e.g. (0,10). Example: bootstrap('tca',rxnList=['V01','V02'],ylim=(0,10)) ''' np.random.seed(1) #for reproducibility print('\n\n\nBootstrapping...') #folder = 'F:\Box Sync\Trunil documents\projects\python\MFA_toolbox\\tca' filename = 'flux_std.pckl' #filepath = folder + '/' + filename f = open(filename,'rb') data = pickle.load(f) f.close() try: rxnList = kws['rxnList'] except: rxnList = [] if len(rxnList) == 0: rxnList = data[0] d1 = list(data[2]) d2 = [] #all flux values for r in d1: r1 = [float(x) for x in r] d2.append(r1) #%% bootstrap file = open('optimization_data.pckl', 'rb') obj = pickle.load(file) file.close() optimal_solution = obj[12] file = open('confidence_intervals_bootstrap.csv','w') file.write('rxnID,-95% CI,-68% CI,median,+68% CI,+95% CI\n') stats1=[] rxnIds = data[0] rxnInd = [rxnIds.index(x) for x in rxnList] ignore=[x for x in range(len(rxnIds)) if x not in rxnInd] bootstrap_df = pd.DataFrame({}) # pandas dataframe to store bootstrap data for k in range(len(d2)): print(k+1,'of',len(d2)) if k in rxnInd: opt_val = optimal_solution[1][k] sel_rxn_vals = d2[k] l = len(sel_rxn_vals) medians = [] sixty8_lower = [] sixty8_upper = [] ninty5_lower = [] ninty5_upper = [] df_list = [] #for making dataframe for n in range(1000): sample = np.random.choice(sel_rxn_vals,size=l) df_list.extend(sample) m = np.median(sample) medians.append(m) ninty5_lower.append(np.percentile(sample,2.5)) #lower 95ci ninty5_upper.append(np.percentile(sample,97.5)) #upper 95%ci sixty8_lower.append(np.percentile(sample,16)) # lower 68% ci sixty8_upper.append(np.percentile(sample,84)) # upper 68% ci #sample_df = pd.DataFrame(df_list,columns=[rxnIds[k]]) #bootstrap_df = bootstrap_df.append(sample_df,ignore_index=True) bootstrap_df[rxnIds[k]] = df_list # min_val_68 = '{:0.2f}'.format(min_val_68) file.write(rxnIds[k]) file.write(',') file.write(str('{:0.4f}'.format(np.median(ninty5_lower)))) file.write(',') file.write(str('{:0.4f}'.format(np.median(sixty8_lower)))) file.write(',') file.write(str('{:0.4f}'.format(np.median(medians)))) file.write(',') file.write(str('{:0.4f}'.format(np.median(sixty8_upper)))) file.write(',') file.write(str('{:0.4f}'.format(np.median(ninty5_upper)))) file.write('\n') if k not in ignore: item = {} item["label"] = rxnIds[k] # not required item["med"] = np.median(medians) item["q1"] = np.median(sixty8_lower) item["q3"] = np.median(sixty8_upper) item["mean"] = opt_val item["whislo"] = np.median(ninty5_lower) # required item["whishi"] = np.median(ninty5_upper) # required item["fliers"] = [] # required if showfliers=True item['linestyle'] = '-' stats1.append(item) from operator import itemgetter stats1 = sorted(stats1, key=itemgetter('label')) print('\n\n',bootstrap_df.head()) bootstrap_df.to_pickle('bootstrap_dataframe.pckl') #%%plot figure fig, axes = plt.subplots(1,1,figsize=(max(len(rxnList)/2.5,6),10)) # fig = plt.figure(figsize=(8, 6)) # gs = gridspec.GridSpec(1, 2, width_ratios=[3, 1]) whiskerprops = dict(linestyle='-',color='k') medianprops = dict(color='k',linewidth=0.5) boxprops = dict(color='k',linewidth=0.5) #first part bx1 = axes.bxp(stats1, boxprops=boxprops, whiskerprops=whiskerprops, medianprops=medianprops, patch_artist=True, showmeans=False) axes.set_ylabel('Flux values',fontsize=14) try: ylim = kws['ylim'] axes.set_ylim(ylim) except: pass # fill with colors c1 = '#708090 '*len(d2) colors = c1.split() for patch, color in zip(bx1['boxes'], colors): patch.set_facecolor(color) plt.savefig('monte_carlo_fig.png',format='png',dpi=1000) plt.show() file.close()