# ICE Revision: $Id: $
"""
Collects data about runs in a small SQLite database
"""
# don't look at it too closely. It's my first sqlite-code
import sqlite3
from os import path
import datetime
import re
import sys
from PyFoam.Error import error
from .CSVCollection import CSVCollection
from PyFoam.ThirdParty.six import print_,iteritems,integer_types
from PyFoam.ThirdParty.six import u as uniCode
[docs]class RunDatabase(object):
"""
Database with information about runs. To be queried etc
"""
separator="//"
def __init__(self,
name,
create=False,
verbose=False):
""":param name: name of the file
:param create: should the database be created if it does not exist"""
self.verbose=verbose
if not path.exists(name):
if create==False:
error("Database",name,"does not exist")
else:
self.initDatabase(name)
self.db=sqlite3.connect(name)
self.db.row_factory=sqlite3.Row
[docs] def initDatabase(self,name):
"""Create a new database file"""
db=sqlite3.connect(name)
with db:
db.row_factory=sqlite3.Row
cursor=db.cursor()
cursor.execute("CREATE TABLE theRuns(runId INTEGER PRIMARY KEY, "+
self.__normalize("insertionTime")+" TIMESTAMP)")
cursor.close()
[docs] def add(self,data):
"""Add a dictionary with data to the database"""
self.__adaptDatabase(data)
runData=dict([("insertionTime",datetime.datetime.now())]+ \
[(k,v) for k,v in iteritems(data) if type(v)!=dict])
runID=self.__addContent("theRuns",runData)
subtables=dict([(k,v) for k,v in iteritems(data) if type(v)==dict])
for tn,content in iteritems(subtables):
self.__addContent(tn+"Data",
dict(list(self.__flattenDict(content).items())+
[("runId",runID)]))
self.db.commit()
specialChars={
'[':'bro',
']':'brc',
'{':'cro',
'}':'crc',
'(':'pro',
')':'prc',
'|':'pip',
}
specialString="_specialChar"
def __normalize(self,s):
"""Normalize a column-name so that the case-insensitve column-names of SQlite
are no problem"""
if s in ["runId","dataId"]:
return s
result=""
for c in s:
if c.isupper() or c=="_":
result+="_"+c.lower()
elif c in RunDatabase.specialChars:
result+=RunDatabase.specialString+RunDatabase.specialChars[c]
else:
result+=c
return result
def __denormalize(self,s):
"""Denormalize the column name that was normalized by _normalize"""
while s.find(RunDatabase.specialString)>=0:
pre,post=s.split(RunDatabase.specialString,maxsplit=1)
spec=post[0:3]
for k,v in iteritems(RunDatabase.specialChars):
if spec==v:
s=pre+k+post[3:]
break
else:
error("No special character for encoding",spec,"found")
result=""
underFound=False
for c in s:
if underFound:
underFound=False
result+=c.upper()
elif c=="_":
underFound=True
else:
result+=c
if underFound:
error("String",s,"was not correctly encoded")
return result
def __addContent(self,table,data):
cursor=self.db.cursor()
runData={}
for k,v in iteritems(data):
if k=="runId":
runData[k]=v
elif isinstance(v,integer_types+(float,)):
runData[k]=float(v)
else:
runData[k]=uniCode(str(v))
cols=self.__getColumns(table)[1:]
addData=[]
for c in cols:
try:
addData.append(runData[c])
except KeyError:
addData.append(None)
addData=tuple(addData)
cSQL = "insert into "+table+" ("+ \
",".join(['"'+self.__normalize(c)+'"' for c in cols])+ \
") values ("+",".join(["?"]*len(addData))+")"
if self.verbose:
print_("Execute SQL",cSQL,"with",addData)
try:
cursor.execute(cSQL, addData)
except Exception:
e = sys.exc_info()[1] # Needed because python 2.5 does not support 'as e'
print_("SQL-Expression:",cSQL)
print_("AddData:",addData)
raise e
lastrow=cursor.lastrowid
cursor.close()
return lastrow
def __adaptDatabase(self,data):
"""Make sure that all the required columns and tables are there"""
c=self.db.execute('SELECT name FROM sqlite_master WHERE type = "table"')
tables=[ x["name"] for x in c.fetchall() ]
indata=dict([(k,v) for k,v in iteritems(data) if type(v)!=dict])
subtables=dict([(k,v) for k,v in iteritems(data) if type(v)==dict])
self.__addColumnsToTable("theRuns",indata)
for tn,content in iteritems(subtables):
if tn+"Data" not in tables:
if self.verbose:
print_("Adding table",tn)
self.db.execute("CREATE TABLE "+tn+"Data (dataId INTEGER PRIMARY KEY, runId INTEGER)")
self.__addColumnsToTable(tn+"Data",
self.__flattenDict(content))
def __flattenDict(self,oData,prefix=""):
data=[(prefix+k,v) for k,v in iteritems(oData) if type(v)!=dict]
subtables=dict([(k,v) for k,v in iteritems(oData) if type(v)==dict])
for name,val in iteritems(subtables):
data+=list(self.__flattenDict(val,prefix+name+self.separator).items())
if self.verbose:
print_("Flattened",oData,"to",data)
return dict(data)
def __getColumns(self,tablename):
c=self.db.execute('SELECT * from '+tablename)
result=[]
for desc in c.description:
if desc[0] in ['dataId','runId']:
result.append(desc[0])
else:
result.append(self.__denormalize(desc[0]))
return result
def __addColumnsToTable(self,table,data):
columns=self.__getColumns(table)
for k,v in iteritems(data):
if k not in columns:
if self.verbose:
print_("Adding:",k,"to",table,"(normalized:",
self.__normalize(k),")")
if isinstance(v,integer_types+(float,)):
self.db.execute('ALTER TABLE "%s" ADD COLUMN "%s" REAL' %
(table,self.__normalize(k)))
else:
self.db.execute('ALTER TABLE "%s" ADD COLUMN "%s" TEXT' %
(table,self.__normalize(k)))
[docs] def dumpToCSV(self,
fname,
selection=None,
disableRunData=None,
pandasFormat=True,
excel=False):
"""Dump the contents of the database to a csv-file
:param name: the CSV-file
:param selection: list of regular expressions. Only data
entries fitting those will be added to the CSV-file (except
for the basic run). If unset all data will be written"""
file=CSVCollection(fname)
runCursor=self.db.cursor()
runCursor.execute("SELECT * from theRuns")
c=self.db.execute('SELECT name FROM sqlite_master WHERE type = "table"')
tables=[ x["name"] for x in c.fetchall() ]
allData=set()
writtenData=set()
disabledStandard=set()
for d in runCursor:
id=d['runId']
if self.verbose:
print_("Dumping run",id)
for k in list(d.keys()):
writeEntry=True
if disableRunData:
for e in disableRunData:
exp=re.compile(e)
if not exp.search(self.__denormalize(k)) is None:
writeEntry=False
break
if writeEntry:
file[k]=d[k]
else:
disabledStandard.add(k)
for t in tables:
if t=="theRuns":
namePrefix="runInfo"
else:
namePrefix=t[:-4]
dataCursor=self.db.cursor()
dataCursor.execute("SELECT * FROM "+t+" WHERE runId=?",
(str(id),))
data=dataCursor.fetchall()
if len(data)>1:
error(len(data),"data items found for id ",id,
"in table",t,".Need exactly 1")
elif len(data)<1:
continue
for k in list(data[0].keys()):
if k in ["dataId","runId"]:
continue
if k in disabledStandard:
continue
name=namePrefix+self.separator+self.__denormalize(k)
allData.add(name)
writeEntry=True
if selection:
writeEntry=False
for e in selection:
exp=re.compile(e)
if exp.search(name):
writeEntry=True
break
if writeEntry:
writtenData.add(name)
file[name]=data[0][k]
file.write()
if self.verbose:
sep="\n "
if allData==writtenData:
print_("Added all data entries:",sep,sep.join(sorted(allData)),sep="")
else:
print_("Added parameters:",sep,sep.join(sorted(writtenData)),
"\nUnwritten data:",sep,sep.join(sorted(allData-writtenData)),sep="")
if len(disabledStandard)>0:
print_("Disabled standard entries:",sep,sep.join(sorted(disabledStandard)),sep="")
f=file(pandasFormat)
if excel:
file(True).to_excel(fname)
if not f is None:
return f
else:
# retry by forcing to numpy
return file(False)
# Should work with Python3 and Python2