Source code for utool.util_csv

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals

try:
    import numpy as np
except ImportError:
    pass
from six.moves import zip, map
import six
from utool import util_type
from utool import util_inject
from utool import util_dev

print, rrr, profile = util_inject.inject2(__name__)


[docs]class CSV(util_dev.NiceRepr): def __init__(self, row_data, row_headers=None, col_headers=None): self.row_data = row_data if col_headers is None: self.header = row_data[0] else: self.header = col_headers self.header_tags = [[x] for x in self.header] self.short_header = None # FIXME: finish row/col header integration self.row_headers = row_headers def __nice__(self): import utool as ut if self.short_header is None: header_str = ', '.join( [ut.truncate_str(h, maxlen=15, truncmsg='~//~') for h in self.header] ) else: header_str = ', '.join(self.short_header) return '(shape=%s: cols=%s)' % (self.shape, header_str)
[docs] @classmethod def from_fpath(cls, fpath, **kwargs): self = cls(read_csv(fpath, **kwargs)) return self
@property def shape(self): return len(self.row_data), len(self.header) def __str__(self): return self.nice_table() def _strip_self(self): self.row_data = [[c.strip(' ') for c in r] for r in self.row_data] self.header = self.row_data[0] self.header_tags = [[x] for x in self.header]
[docs] def tabulate(self): import tabulate import utool as ut tabular_data = [ ut.flatten([[r], d]) for r, d in zip(self.row_headers, self.row_data) ] return tabulate.tabulate(tabular_data, [''] + self.header, 'fancy_grid')
[docs] def transpose(self): import utool as ut row_dataT = ut.listT(self.row_data) return CSV(row_dataT, row_headers=self.header, col_headers=self.row_headers)
[docs] def nice_table(self): import utool as ut return ut.make_csv_table(ut.listT(self.row_data), raw=True)
[docs] def nice_table2(self, **kwargs): import utool as ut return ut.make_csv_table( ut.listT(self.row_data), column_lbls=self.header, row_lbls=self.row_headers, **kwargs )
[docs] def raw_table(self): return '\n'.join([','.join([y for y in x]) for x in self.row_data])
[docs] def fuzzy_filter_columns(self, fuzzy_headers): import utool as ut col_flags = ut.filterflags_general_tags( self.header_tags, logic='or', in_any=fuzzy_headers ) self.header = ut.compress(self.header, col_flags) self.header_tags = ut.compress(self.header_tags, col_flags) self.row_data = ut.listT(ut.compress(ut.listT(self.row_data), col_flags)) if self.short_header is not None: self.short_header = ut.compress(self.short_header, col_flags)
def __getitem__(self, pat): colx = self.fuzzy_find_colx(pat) return self.take_column(colx)
[docs] def fuzzy_reorder_columns(self, fuzzy_headers, inplace=True): import utool as ut specified_xs = [self.fuzzy_find_colx(pat) for pat in fuzzy_headers] otherxs = ut.index_complement(specified_xs, len(self.header_tags)) new_order = specified_xs + otherxs return self.permute_columns(new_order)
[docs] def permute_columns(self, new_order, inplace=True): import utool as ut self.header = ut.take(self.header, new_order) self.header_tags = ut.take(self.header_tags, new_order) self.row_data = ut.listT(ut.take(ut.listT(self.row_data), new_order)) if self.short_header is not None: self.short_header = ut.take(self.short_header, new_order) return self
[docs] def fuzzy_find_colxs(self, pat): import utool as ut colxs = ut.where(ut.filterflags_general_tags(self.header_tags, in_any=[pat])) return colxs
[docs] def fuzzy_find_colx(self, pat): colxs = self.fuzzy_find_colxs(pat) assert len(colxs) == 1, 'cannot find column matching %r' % (pat,) return colxs[0]
[docs] def take_fuzzy_column(self, pat): import utool as ut colx = self.fuzzy_find_colx(pat) self.take_column(colx) return ut.take_column(self.row_data, colx)
[docs] def take_column(self, colx, with_header=True): import utool as ut if with_header: return ut.take_column(self.row_data, colx) else: return ut.take_column(self.row_data[1:], colx)
[docs] def compress_rows(self, flags, with_header=True, inplace=True): if not inplace: import copy self = copy.deepcopy(self) import utool as ut if with_header: assert flags[0] is True self.row_data = ut.compress(self.row_data, flags) else: self.row_data = self.row_data[0:1] + ut.compress(self.row_data[1:], flags) return self
[docs] def compress_cols(self, flags): pass
[docs]def numpy_to_csv(arr, col_lbls=None, header='', col_type=None): col_list = arr.T.tolist() return make_csv_table(col_list, col_lbls, header, col_type)
[docs]def read_csv(fpath, binary=True): """reads csv in unicode""" import csv import utool as ut # csvfile = open(fpath, 'rb') flags = 'rb' if binary else 'r' with open(fpath, flags) as csvfile: row_iter = csv.reader(csvfile, delimiter=str(','), quotechar=str('|')) row_list = [ut.lmap(ut.ensure_unicode, row) for row in row_iter] return row_list
[docs]def make_standard_csv(column_list, column_lbls=None): from six.moves import cStringIO as StringIO import utool as ut import csv stream = StringIO() row_list = ut.listT(column_list) if six.PY2: row_list = [[ut.ensure_unicode(c).encode('utf-8') for c in r] for r in row_list] if column_lbls is not None: column_lbls = [ut.ensure_unicode(c).encode('utf-8') for c in column_lbls] writer = csv.writer(stream, dialect=csv.excel) if column_lbls is not None: writer.writerow(column_lbls) writer.writerows(row_list) csv_str = stream.getvalue() return csv_str
[docs]def make_csv_table( column_list=[], column_lbls=None, header='', column_type=None, row_lbls=None, transpose=False, precision=2, use_lbl_width=True, comma_repl='<com>', raw=False, new=False, standardize=False, ): """ Creates a csv table with aligned columns make_csv_table Args: column_list (list): column_lbls (None): header (str): column_type (None): row_lbls (None): transpose (bool): Returns: str: csv_text Example: >>> # ENABLE_DOCTEST >>> from utool.util_csv import * # NOQA >>> column_list = [[1, 2, 3], ['A', 'B', 'C']] >>> column_lbls = ['num', 'alpha'] >>> header = '# Test CSV' >>> column_type = (int, str) >>> row_lbls = None >>> transpose = False >>> csv_text = make_csv_table(column_list, column_lbls, header, column_type, row_lbls, transpose) >>> result = csv_text >>> print(result) # Test CSV # num_rows=3 # num, alpha 1, A 2, B 3, C """ import utool as ut assert comma_repl.find(',') == -1, 'comma_repl cannot contain a comma!' if transpose: column_lbls, row_lbls = row_lbls, column_lbls column_list = list(map(list, zip(*column_list))) if row_lbls is not None: if isinstance(column_list, np.ndarray): column_list = column_list.tolist() if isinstance(row_lbls, np.ndarray): row_lbls = row_lbls.tolist() column_list = [row_lbls] + column_list column_lbls = ['ROWLBL'] + list(map(six.text_type, column_lbls)) if column_type is not None: column_type = [six.text_type] + column_type if len(column_list) == 0: print('[csv] No columns') return header column_len = [len(col) for col in column_list] num_data = column_len[0] if num_data == 0: # print('[csv.make_csv_table()] No data. (header=%r)' % (header,)) return header if any([num_data != clen for clen in column_len]): print('[csv] column_lbls = %r ' % (column_lbls,)) print('[csv] column_len = %r ' % (column_len,)) print('[csv] inconsistent column lengths') return header if column_type is None: column_type = list(map(type, ut.get_list_column(column_list, 0))) # column_type = [type(col[0]) for col in column_list] csv_rows = [] if new: csv_rows.append(header) elif not raw: csv_rows.append(header) if not standardize: csv_rows.append('# num_rows=%r' % num_data) column_maxlen = [] column_str_list = [] if column_lbls is None: column_lbls = [''] * len(column_list) def _toint(c): if c is None: return 'None' try: if np.isnan(c): return 'nan' except TypeError as ex: print('------') print('[csv] TypeError %r ' % ex) print('[csv] _toint(c) failed') print('[csv] c = %r ' % c) print('[csv] type(c) = %r ' % type(c)) print('------') raise return ('%d') % int(c) import uuid textable_types = [uuid.UUID, six.text_type] try: if standardize: def csv_format(r): text = ut.repr2(r, precision=precision) # text = six.text_type(r) # Check if needs escape escape_chars = ['"', ' ', ','] if any([c in text for c in escape_chars]): # escape quotes with quotes text = text.replace('"', '""') # encapsulate with quotes text = '"' + text + '"' return text for col, lbl, coltype in zip(column_list, column_lbls, column_type): col_str = [csv_format(r) for r in col] column_str_list.append(col_str) pass else: # Loop over every column for col, lbl, coltype in zip(column_list, column_lbls, column_type): # Loop over every row in the column (using list comprehension) if coltype is list or util_type.is_list(coltype): col_str = [ six.text_type(c).replace(',', ' ').replace('.', '<dot>') for c in col ] elif ( coltype is float or util_type.is_float(coltype) or coltype == np.float32 or util_type.is_valid_floattype(coltype) ): precision_fmtstr = '%.' + six.text_type(precision) + 'f' col_str = [ 'None' if r is None else precision_fmtstr % float(r) for r in col ] # col_ = [r if r is None else float(r) for r in col] # col_str = [ut.repr2(r, precision=2) for r in col_] elif coltype is int or util_type.is_int(coltype) or coltype == np.int64: col_str = [_toint(c) for c in (col)] elif coltype in textable_types or util_type.is_str(coltype): col_str = [six.text_type(c).replace(',', comma_repl) for c in col] else: print('[csv] is_unknown coltype=%r' % (coltype,)) try: col_str = [six.text_type(c) for c in (col)] except UnicodeDecodeError: try: col_str = [ut.ensure_unicode(c) for c in (col)] except Exception: col_str = [repr(c) for c in (col)] column_str_list.append(col_str) for col_str, lbl in zip(column_str_list, column_lbls): col_lens = [len(s) for s in (col_str)] max_len = max(col_lens) if use_lbl_width: # The column label counts towards the column width max_len = max(len(lbl), max_len) column_maxlen.append(max_len) except Exception as ex: # ut.embed() ut.printex(ex, keys=['col', 'lbl', 'coltype']) raise def _fmtfn(maxlen): return ''.join(['%', six.text_type(maxlen + 2), 's']) fmtstr = ','.join([_fmtfn(maxlen) for maxlen in column_maxlen]) try: if new: csv_rows.append('# ' + fmtstr % tuple(column_lbls)) elif not raw: csv_rows.append('# ' + fmtstr % tuple(column_lbls)) # csv_rows.append('# ' + fmtstr % column_lbls) except Exception as ex: # print(len(column_list)) # ut.embed() ut.printex(ex, keys=['fmtstr', 'column_lbls']) raise for row in zip(*column_str_list): csv_rows.append(' ' + fmtstr % row) csv_text = '\n'.join(csv_rows) return csv_text
if __name__ == '__main__': """ CommandLine: python -c "import utool, utool.util_csv; utool.doctest_funcs(utool.util_csv, allexamples=True)" python -c "import utool, utool.util_csv; utool.doctest_funcs(utool.util_csv)" python -m utool.util_csv python -m utool.util_csv --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()