Source code for quanfima.utils

import os
import operator
import numpy as np
from scipy import ndimage as ndi
from skimage import morphology
from matplotlib import colors
from statsmodels.stats.multicomp import MultiComparison
from statsmodels.stats.libqsturng import psturng


[docs]def prepare_data(data, dilate_iterations=1, sigma=0.5): """Returns the given binary data, its skeleton and the thickened skeleton. The skeleton of a given 2D or 3D array is computed, then it is thickened using morphological dilation with `dilate_iterations` and smoothed with help of Gaussian filter of specified `sigma`. Parameters ---------- data : ndarray 2D or 3D binary array which will be processed. dilate_iterations : integer Indicates the number of iterations for thickenning the skeleton. sigma : float Indicates the sigma of Gaussian filter used in smoothing of skeleton. Returns ------- arrays : tuple of 2D or 3D arrays The original array, its skeleton and the thickened skeleton. """ data_8bit = data.astype(np.uint8) data_8bit = ndi.binary_fill_holes(data_8bit).astype(np.uint8) if data.ndim == 3: skeleton = morphology.skeletonize_3d(data_8bit) elif data.ndim == 2: skeleton = morphology.skeletonize(data_8bit) else: raise ValueError('Incorrect number of data dimensions, it supports from 2 to 3 dimensions.') skeleton_thick = ndi.binary_dilation(skeleton, iterations=dilate_iterations).astype(np.float32) skeleton_thick = ndi.filters.gaussian_filter(skeleton_thick, sigma) return (data, skeleton, skeleton_thick)
[docs]def geo2rgb(lat, azth, azth_max=np.pi, lat_max=np.pi): """Translates geo-coordinates to color in RGB color space. Parameters ---------- lat : float Indicates latitude or elevation component of a given geo-coordinates. azth : float Indicates azimuth component of a given geo-coordinates. azth_max : float Indicates the normalization value for the azimuth component. lat_max : float Indicates the normalization value for the latitude or elevation component. Returns ------- array : tuple of values The tuple of RGB values [R, G, B]. """ return colors.hsv_to_rgb([azth/azth_max, lat/lat_max, 1.0])
[docs]def calculate_tukey_posthoc(df, column, type_column='type', verbose=True, write=False, name=None, output_dir=None): """Computes p-values using ANOVA with post-hoc Tukey HSD for a given DataFrame. Estimates p-values for a given DataFrame assuming that the sample type is named as `type_column`. Parameters ---------- df : pandas DataFrame Contains the table of values ans corresponding types or classes. column : str Indicates the column of values. type_column : str Indicates the column of sample kind. verbose : boolean Specifies if the output should be printed into a terminal. write : boolean Specifies if the output should be written into a text file. name : str Indicates the name of the output file. output_dir : str Indicates the output dir where the file will be written. Returns ------- dict : sample typles and p-values The dict of sample types and cooresponding p-values. """ mc = MultiComparison(df[column], df[type_column]) tt = mc.tukeyhsd() st_range = np.abs(tt.meandiffs) / tt.std_pairs fout = None if write and output_dir: if not os.path.exists(output_dir): os.makedirs(output_dir) fout = open(os.path.join(output_dir, name + '.txt'), 'w') print os.path.join(output_dir, name + '.txt') if write: print >> fout, 'Tukey post-hoc ({0})'.format(column) print >> fout, tt print >> fout, mc.groupsunique if verbose: print 'Tukey post-hoc ({0})'.format(column) print tt print mc.groupsunique pvals = psturng(st_range, len(tt.groupsunique), tt.df_total) out = {} groups = mc.groupsunique g1idxs, g2idxs = mc.pairindices for g1i, g2i, p in zip(g1idxs, g2idxs, pvals): gname = '{}-{}'.format(groups[g1i], groups[g2i]) out[gname] = p min_item = min(out.iteritems(), key=operator.itemgetter(1)) for grp, p in out.items(): if fout and write: print >> fout, '{}: {}'.format(grp, p) if verbose: print grp, ': ', p return out, min_item