Source code for kvh

import sys
import codecs
from re import match

#import pdb

def isstr(s):
    return isinstance(s, str)
class Obj():
    def __init__(**kwargs):
        self.__dict__.update(kwrags)
[docs]def kvh2tlist(fp, lev=[0], indent=[0], strip=False): """ Read a kvh file from fp stream descriptor and organize its content in list of tuples [(k1,v1), (k2,[(k2.1, v2.1)])] If fp is a string, it is used in open() operator """ # check the stream open_here=False; if isstr(fp): fp=codecs.open(fp, "r", encoding="utf-8-sig"); fp.seek(0); open_here=True; # error control if lev[0] < 0 or indent[0] < 0: raise NameError("lev=%d, indent=%d both must be positive"%(lev[0], indent[0])); if lev[0] < indent[0]: raise NameError("lev=%d, indent=%d, lev must be greater or equal to indent"%(lev[0], indent[0])); if fp != sys.stdin and lev[0] > fp.tell(): raise NameError("lev=%d, file position=%d, lev must be less or equal to file position"%(lev[0], fp.tell())); if fp != sys.stdin and indent[0] > fp.tell(): raise NameError("indent=%d, file position=%d, indent must be less or equal to file position"%(indent[0], fp.tell())); # algorithm: # advance to requested indent (=level) # if not sucsessful return an empty list # read a key # if sep==\t read value # elif sep==\n # recursive call with increased indentation # if no result at the level+1 put empty value # else put empty value tlist=[]; key=""; val=""; while True: # current position is supposed to point to the begining of a key # so go through an appropriate tab number for the current level while indent[0] < lev[0]: char=fp.read(1); if char!="\t": if char!="": fp.seek(-1,1); break; indent[0]+=1; if indent[0] < lev[0]: # we could not read till the requested level # so the current level is finished; if open_here: fp.close(); return tlist; (key,sep)=kvh_read_key(fp, strip); if sep=="\t": tlist.append((key, kvh_read_val(fp, strip))); indent[0]=0; elif sep=="\n": lev[0]+=1; indent[0]=0; nextlist=kvh2tlist(fp, lev, indent, strip); lev[0]-=1; if len(nextlist)==0: # no value and no deeper level tlist.append((key, "")); else: tlist.append((key, nextlist)); else: # we are at the end of file if indent[0] or key: tlist.append((key, "")); indent[0]=0; lev[0]=0; if open_here: fp.close(); return tlist;
[docs]def kvh_read_key(fp, strip=False): """Read a string from the current position till the first unescaped \t, \n or the end of stream fp. :returns: tuple (key, sep), sep=None at the end of the stream """ #pdb.set_trace();## key=""; while True: char=fp.read(1); if char=="\\": # try to read next char if any nextchar=fp.read(1); if nextchar=="": # end of file return (key.strip() if strip else key, None); else: # just add escaped char key+=nextchar; elif char=="\t" or char=="\n": return (key.strip() if strip else key, char); elif char=="": return (key.strip() if strip else key, None); else: # just add a plain char key+=char;
[docs]def kvh_read_val(fp, strip=False): """ Read a string from current position till the first unescaped \n or the end of file. Return the read string.""" val=""; while True: char=fp.read(1); if char=="\\": # try to read next char if any nextchar=fp.read(1); if nextchar=="": # end of file return val.strip() if strip else val; else: # just add escaped char val+=nextchar; elif char=="\n" or char=="": return val.strip() if strip else val; else: # just add a plain char val+=char;
[docs]def kvh_tlist2dict(tlist): """ Translate a tlist structure read from a kvh file to a hierarchical dictionnary. Repeated keys at the same level of a dictionnary are silently overwritten""" return dict((k,(v if isstr(v) else kvh_tlist2dict(v))) for (k,v) in tlist);
[docs]def kvh_tlist2obj(tlist): """ Translate a tlist structure read from a kvh file to a hierarchical dictionnary. Repeated keys at the same level of a dictionnary are silently overwritten""" return Obj(**dict((k,(v if isstr(v) else kvh_tlist2obj(v))) for (k,v) in tlist));
[docs]def kvh2dict(fp, strip=False): r""" Read a kvh file from fp pointer then translate its tlist structure to a returned hierarchical dictionnary. Repeated keys at the same level of a dictionnary are silently overwritten""" return kvh_tlist2dict(kvh2tlist(fp, strip=strip));
[docs]def kvh2obj(fp, strip=False): r""" Read a kvh file from fp pointer then translate its tlist structure to a returned object hierarchy. Repeated fields at the same level of an object are silently overwritten""" return kvh_tlist2obj(kvh2tlist(fp, strip=strip));
[docs]def dict2kvh(d, fp=sys.stdout, indent=0): r"""dict2kvh(d, fp=sys.stdout, indent=0) Write a nested dictionary on the stream fp (stdout by default). """ open_here=False; if isstr(fp): open_here=True; fp=open(fp, "w"); for (k,v) in d.items(): fp.write("%s%s" % ("\t"*indent, escape(str(k), "\t\\\n"))); if type(v) == type({}) or type({}) in type(v).__bases__: # recursive call with incremented indentation fp.write("\n"); dict2kvh(v, fp, indent+1); elif "__dict__" in dir(v) and v.__dict__: # recursive call with incremented indentation fp.write("\n"); dict2kvh(v.__dict__, fp, indent+1); else: fp.write("\t%s\n" % escape(str(v), "\\\n")); if open_here: fp.close();
[docs]def tlist2kvh(d, fp=sys.stdout, indent=0): r"""tlist2kvh(d, fp=sys.stdout, indent=0) Write a (hierarchichal) list of 2-tuples on the stream fp (stdout by default). """ open_here=False; if isstr(fp): open_here=True; fp=open(fp, "w"); for (k,v) in d: fp.write("%s%s" % ("\t"*indent, escape(str(k), "\t\\\n"))); if type(v) == type([]): # recursive call with incremented indentation fp.write("\n"); tlist2kvh(v, fp, indent+1); else: fp.write("\t%s\n" % escape(str(v), "\\\n")); if open_here: fp.close();
[docs]def kvh_getv_by_k(kvt, kl): r"""kvh_getv_by_k(kvt, kl)->None|String|kvh tlist get value from kvt (kvh tlist) according to the key hierarchy defined in the list of keys kl. Return None if no key is found """ for (k,v) in kvt: if k==kl[0]: # found if len(kl) == 1: return(v); elif len(kl) > 1: # recursive call return(kvh_getv_by_k(v, kl[1:]));
[docs]def escape(s, spch="|&;<>()$`\\\"' \t\n*?[#~=%", ech="\\"): r"""escape(s, spch="|&;<>()$`\\\"' \t\n*?[#~=%", ech="\\") escape special characters in s. The special characters are listed in spch. Escaping is done by putting an ech string before them. Default spch and ech corresponds to quoting Shell arguments in accordance with http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html Example: os.system("ls %s" % escape(file_name_with_all_meta_chars_but_newline)); .. note: 1. Escaped <newline> is removed by a shell if not put in a single-quotted string (' ') 2. A single-quote character even escaped cannot appear in a single-quotted string """ return "".join((ech+c if c in spch else c) for c in s);
[docs]def kvh_get_matrix(fp, keys): """Get matrix or vector whose key suite is in a list keys from a kvh file given in fp (file pointer of file name). For big kvh files, this function can be much faster than kvh2tlist()+kvh_getv_by_k() Return a matrix which is a list of lists (rows). The first item in each row is the row name. In case of matrix (i.e. "row_col" is present in kvh file), the very first row contain column names.""" if isinstance(fp, str): with open(fp, "r") as fp: cont=fp.readlines() else: cont=fp.readlines() # keys can be a list of subfield keys. ncont=len(cont) # get start line number by grep successively all fields in v # and the indent indent=0 nstart=0 #pdb.set_trace() for k in keys: for i in range(nstart, ncont): if match(r"^\t{%d,}%s\r?$"%(indent,escape(k)), cont[i]): nstart=i+1 o=match(r"^\t*", cont[i]) indent=len(o.group(0)) break else: raise NameError("The key '%s' was not found in kvh file '%s'."%(k, fp.name)) # get end number of the matrix row in the kvh nend=ncont for i in range(nstart, ncont): if match(r"^\t{0,%d}[^\t]"%indent, cont[i]): nend=i break # get matrix d=[s[(indent+1):].rstrip("\r\n").split("\t") for s in cont[nstart:nend]] return d