"""This module has a class JSONAble for serialization of tables/list of dicts to and from JSON encoding
Created on 2020-09-03
@author: wf
"""
import datetime
import json
import re
import sys
from lodstorage.lod import LOD
[docs]
class JSONAbleSettings:
"""
settings for JSONAble - put in a separate class so they would not be
serialized
"""
indent = 4
"""
regular expression to be used for conversion from singleQuote to doubleQuote
see https://stackoverflow.com/a/50257217/1497139
"""
singleQuoteRegex = re.compile("(?<!\\\\)'")
[docs]
class JSONAble(object):
"""
mixin to allow classes to be JSON serializable see
- https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable
"""
def __init__(self):
"""
Constructor
"""
[docs]
@classmethod
def getPluralname(cls):
return "%ss" % cls.__name__
[docs]
@staticmethod
def singleQuoteToDoubleQuote(singleQuoted, useRegex=False):
"""
convert a single quoted string to a double quoted one
Args:
singleQuoted (str): a single quoted string e.g.
.. highlight:: json
{'cities': [{'name': "Upper Hell's Gate"}]}
useRegex (boolean): True if a regular expression shall be used for matching
Returns:
string: the double quoted version of the string
Note:
see
- https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q
"""
if useRegex:
doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingRegex(singleQuoted)
else:
doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingBracketLoop(
singleQuoted
)
return doubleQuoted
[docs]
@staticmethod
def singleQuoteToDoubleQuoteUsingRegex(singleQuoted):
"""
convert a single quoted string to a double quoted one using a regular expression
Args:
singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
useRegex(boolean): True if a regular expression shall be used for matching
Returns:
string: the double quoted version of the string e.g.
Note:
see https://stackoverflow.com/a/50257217/1497139
"""
doubleQuoted = JSONAbleSettings.singleQuoteRegex.sub('"', singleQuoted)
return doubleQuoted
[docs]
@staticmethod
def singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted):
"""
convert a single quoted string to a double quoted one using a regular expression
Args:
singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
useRegex(boolean): True if a regular expression shall be used for matching
Returns:
string: the double quoted version of the string e.g.
Note:
see https://stackoverflow.com/a/63862387/1497139
"""
cList = list(singleQuoted)
inDouble = False
inSingle = False
for i, c in enumerate(cList):
# print ("%d:%s %r %r" %(i,c,inSingle,inDouble))
if c == "'":
if not inDouble:
inSingle = not inSingle
cList[i] = '"'
elif c == '"':
inDouble = not inDouble
inSingle = False
doubleQuoted = "".join(cList)
return doubleQuoted
[docs]
def getJsonTypeSamples(self):
"""
does my class provide a "getSamples" method?
"""
if hasattr(self, "__class__"):
cls = self.__class__
if isinstance(self, JSONAbleList) and not hasattr(cls, "getSamples"):
cls = self.clazz
return JSONAble.getJsonTypeSamplesForClass(cls)
return None
[docs]
@staticmethod
def getJsonTypeSamplesForClass(cls):
"""
return the type samples for the given class
Return:
list: a list of dict that specify the types by example
"""
if hasattr(cls, "getSamples"):
getSamples = getattr(cls, "getSamples")
if callable(getSamples):
return getSamples()
return None
[docs]
@staticmethod
def readJsonFromFile(jsonFilePath):
"""
read json string from the given jsonFilePath
Args:
jsonFilePath(string): the path of the file where to read the result from
Returns:
the JSON string read from the file
"""
with open(jsonFilePath, "r") as jsonFile:
jsonStr = jsonFile.read()
return jsonStr
[docs]
@staticmethod
def storeJsonToFile(jsonStr, jsonFilePath):
"""
store the given json string to the given jsonFilePath
Args:
jsonStr(string): the string to store
jsonFilePath(string): the path of the file where to store the result
"""
with open(jsonFilePath, "w") as jsonFile:
jsonFile.write(jsonStr)
[docs]
def checkExtension(self, jsonFile: str, extension: str = ".json") -> str:
"""
make sure the jsonFile has the given extension e.g. ".json"
Args:
jsonFile(str): the jsonFile name - potentially without ".json" suffix
Returns:
str: the jsonFile name with ".json" as an extension guaranteed
"""
if not jsonFile.endswith(extension):
jsonFile = f"{jsonFile}{extension}"
return jsonFile
[docs]
def storeToJsonFile(
self, jsonFile: str, extension: str = ".json", limitToSampleFields: bool = False
):
"""
store me to the given jsonFile
Args:
jsonFile(str): the JSON file name (optionally without extension)
exension(str): the extension to use if not part of the jsonFile name
limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.
"""
jsonFile = self.checkExtension(jsonFile, extension)
JSONAble.storeJsonToFile(self.toJSON(limitToSampleFields), jsonFile)
[docs]
def restoreFromJsonFile(self, jsonFile: str):
"""
restore me from the given jsonFile
Args:
jsonFile(string): the jsonFile to restore me from
"""
jsonFile = self.checkExtension(jsonFile)
jsonStr = JSONAble.readJsonFromFile(jsonFile)
self.fromJson(jsonStr)
[docs]
def fromJson(self, jsonStr):
"""
initialize me from the given JSON string
Args:
jsonStr(str): the JSON string
"""
jsonMap = json.loads(jsonStr)
self.fromDict(jsonMap)
[docs]
def fromDict(self, data: dict):
"""
initialize me from the given data
Args:
data(dict): the dictionary to initialize me from
"""
# https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python-taking-union-o
for key in data.keys():
value = data[key]
setattr(self, key, value)
[docs]
def toJsonAbleValue(self, v):
"""
return the JSON able value of the given value v
Args:
v(object): the value to convert
"""
# objects have __dict__ hash tables which can be JSON-converted
if hasattr(v, "__dict__"):
return v.__dict__
elif isinstance(v, datetime.datetime):
return v.isoformat()
elif isinstance(v, datetime.date):
return v.isoformat()
else:
return ""
[docs]
def toJSON(self, limitToSampleFields: bool = False):
"""
Args:
limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.
Returns:
a recursive JSON dump of the dicts of my objects
"""
data = {}
if limitToSampleFields:
samples = self.getJsonTypeSamples()
sampleFields = LOD.getFields(samples)
if isinstance(self, JSONAbleList):
limitedRecords = []
for record in self.__dict__[self.listName]:
limitedRecord = {}
for key, value in record.__dict__.items():
if key in sampleFields:
limitedRecord[key] = value
limitedRecords.append(limitedRecord)
data[self.listName] = limitedRecords
else:
for key, value in self.__dict__.items():
if key in sampleFields:
data[key] = value
else:
data = self
jsonStr = json.dumps(
data,
default=lambda v: self.toJsonAbleValue(v),
sort_keys=True,
indent=JSONAbleSettings.indent,
)
return jsonStr
[docs]
def getJSONValue(self, v):
"""
get the value of the given v as JSON
Args:
v(object): the value to get
Returns:
the the value making sure objects are return as dicts
"""
if hasattr(v, "asJSON"):
return v.asJSON(asString=False)
elif type(v) is dict:
return self.reprDict(v)
elif type(v) is list:
vlist = []
for vitem in v:
vlist.append(self.getJSONValue(vitem))
return vlist
elif isinstance(v, datetime.datetime):
return v.isoformat()
elif isinstance(v, datetime.date):
return v.isoformat()
elif isinstance(v, bool):
# convert True,False to -> true,false
return str(v).lower()
else:
return v
[docs]
def reprDict(self, srcDict):
"""
get the given srcDict as new dict with fields being converted with getJSONValue
Args:
scrcDict(dict): the source dictionary
Returns
dict: the converted dictionary
"""
d = dict()
for a, v in srcDict.items():
d[a] = self.getJSONValue(v)
return d
[docs]
def asJSON(self, asString=True, data=None):
"""
recursively return my dict elements
Args:
asString(boolean): if True return my result as a string
"""
if data is None:
data = self.__dict__
jsonDict = self.reprDict(data)
if asString:
jsonStr = str(jsonDict)
jsonStr = JSONAble.singleQuoteToDoubleQuote(jsonStr)
return jsonStr
return jsonDict
[docs]
class JSONAbleList(JSONAble):
"""
Container class
"""
def __init__(
self,
listName: str = None,
clazz=None,
tableName: str = None,
initList: bool = True,
handleInvalidListTypes=False,
filterInvalidListTypes=False,
):
"""
Constructor
Args:
listName(str): the name of the list attribute to be used for storing the List
clazz(class): a class to be used for Object relational mapping (if any)
tableName(str): the name of the "table" to be used
initList(bool): True if the list should be initialized
handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
filterInvalidListTypes(bool): True if invalidListTypes should be deleted
"""
self.clazz = clazz
self.handleInvalidListTypes = handleInvalidListTypes
self.filterInvalidListTypes = filterInvalidListTypes
if listName is None:
if self.clazz is not None:
listName = self.clazz.getPluralname()
else:
listName = self.__class__.name.lower()
self.listName = listName
if tableName is None:
self.tableName = listName
else:
self.tableName = tableName
if initList:
self.__dict__[self.listName] = []
[docs]
def getList(self):
"""
get my list
"""
return self.__dict__[self.listName]
[docs]
def setListFromLoD(self, lod: list) -> list:
"""
set my list from the given list of dicts
Args:
lod(list) a raw record list of dicts
Returns:
list: a list of dicts if no clazz is set
otherwise a list of objects
"""
# non OO mode
if self.clazz is None:
result = lod
self.__dict__[self.listName] = result
else:
# ORM mode
# TODO - handle errors
self.fromLoD(lod, append=False)
return self.getList()
[docs]
def getLoDfromJson(self, jsonStr: str, types=None, listName: str = None):
"""
get a list of Dicts form the given JSON String
Args:
jsonStr(str): the JSON string
fixType(Types): the types to be fixed
Returns:
list: a list of dicts
"""
# read a data structe from the given JSON string
lodOrDict = json.loads(jsonStr)
# it should be a list only of dict with my list
if not isinstance(lodOrDict, dict) and listName is not None:
lod = lodOrDict
else:
if self.listName in lodOrDict:
# get the relevant list of dicts
lod = lodOrDict[self.listName]
else:
msg = f"invalid JSON for getLoD from Json\nexpecting a list of dicts or a dict with '{self.listName}' as list\nfound a dict with keys: {lodOrDict.keys()} instead"
raise Exception(msg)
if types is not None:
types.fixTypes(lod, self.listName)
return lod
[docs]
def fromLoD(self, lod, append: bool = True, debug: bool = False):
"""
load my entityList from the given list of dicts
Args:
lod(list): the list of dicts to load
append(bool): if True append to my existing entries
Return:
list: a list of errors (if any)
"""
errors = []
entityList = self.getList()
if not append:
del entityList[:]
if self.handleInvalidListTypes:
LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes)
for record in lod:
# call the constructor to get a new instance
try:
entity = self.clazz()
entity.fromDict(record)
entityList.append(entity)
except Exception as ex:
error = {self.listName: record, "error": ex}
errors.append(error)
if debug:
print(error)
return errors
[docs]
def getLookup(self, attrName: str, withDuplicates: bool = False):
"""
create a lookup dictionary by the given attribute name
Args:
attrName(str): the attribute to lookup
withDuplicates(bool): whether to retain single values or lists
Return:
a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates
"""
return LOD.getLookup(self.getList(), attrName, withDuplicates)
[docs]
def getJsonData(self):
"""
get my Jsondata
"""
jsonData = {self.listName: self.__dict__[self.listName]}
return jsonData
[docs]
def toJsonAbleValue(self, v):
"""
make sure we don't store our meta information
clazz, tableName and listName but just the list we are holding
"""
if v == self:
return self.getJsonData()
else:
return super().toJsonAbleValue(v)
[docs]
def fromJson(self, jsonStr, types=None):
"""
initialize me from the given JSON string
Args:
jsonStr(str): the JSON string
fixType(Types): the types to be fixed
"""
lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
self.setListFromLoD(lod)
[docs]
def asJSON(self, asString=True):
jsonData = self.getJsonData()
return super().asJSON(asString, data=jsonData)
[docs]
def restoreFromJsonFile(self, jsonFile: str) -> list:
"""
read my list of dicts and restore it
"""
lod = self.readLodFromJsonFile(jsonFile)
return self.setListFromLoD(lod)
[docs]
def restoreFromJsonStr(self, jsonStr: str) -> list:
"""
restore me from the given jsonStr
Args:
jsonStr(str): the json string to restore me from
"""
lod = self.readLodFromJsonStr(jsonStr)
return self.setListFromLoD(lod)
[docs]
def readLodFromJsonFile(self, jsonFile: str, extension: str = ".json"):
"""
read the list of dicts from the given jsonFile
Args:
jsonFile(string): the jsonFile to read from
Returns:
list: a list of dicts
"""
jsonFile = self.checkExtension(jsonFile, extension)
jsonStr = JSONAble.readJsonFromFile(jsonFile)
lod = self.readLodFromJsonStr(jsonStr)
return lod
[docs]
def readLodFromJsonStr(self, jsonStr) -> list:
"""
restore me from the given jsonStr
Args:
storeFilePrefix(string): the prefix for the JSON file name
"""
if self.clazz is None:
typeSamples = self.getJsonTypeSamples()
else:
typeSamples = self.clazz.getSamples()
if typeSamples is None:
types = None
else:
types = Types(
self.listName, warnOnUnsupportedTypes=not self.handleInvalidListTypes
)
types.getTypes(self.listName, typeSamples, len(typeSamples))
lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
return lod
[docs]
class Types(JSONAble):
"""
Types
holds entity meta Info
:ivar name(string): entity name = table name
"""
typeName2Type = {
"bool": bool,
"date": datetime.date,
"datetime": datetime.datetime,
"float": float,
"int": int,
"str": str,
}
def __init__(self, name: str, warnOnUnsupportedTypes=True, debug=False):
"""
Constructor
Args:
name(str): the name of the type map
warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
debug(bool): if True - debugging information should be shown
"""
self.name = name
self.warnOnUnsupportedTypes = warnOnUnsupportedTypes
self.debug = debug
self.typeMap = {}
[docs]
@staticmethod
def forTable(
instance, listName: str, warnOnUnsupportedTypes: bool = True, debug=False
):
"""
get the types for the list of Dicts (table) in the given instance with the given listName
Args:
instance(object): the instance to inspect
listName(string): the list of dicts to inspect
warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
debug(bool): True if debuggin information should be shown
Returns:
Types: a types object
"""
clazz = type(instance)
types = Types(
clazz.__name__, warnOnUnsupportedTypes=warnOnUnsupportedTypes, debug=debug
)
types.getTypes(listName, instance.__dict__[listName])
return types
[docs]
def addType(self, listName, field, valueType):
"""
add the python type for the given field to the typeMap
Args:
listName(string): the name of the list of the field
field(string): the name of the field
valueType(type): the python type of the field
"""
if listName not in self.typeMap:
self.typeMap[listName] = {}
typeMap = self.typeMap[listName]
if not field in typeMap:
typeMap[field] = valueType
[docs]
def getTypes(self, listName: str, sampleRecords: list, limit: int = 10):
"""
determine the types for the given sample records
Args:
listName(str): the name of the list
sampleRecords(list): a list of items
limit(int): the maximum number of items to check
"""
for sampleRecord in sampleRecords[:limit]:
items = sampleRecord.items()
self.getTypesForItems(listName, items, warnOnNone=len(sampleRecords) == 1)
[docs]
def getTypesForItems(self, listName: str, items: list, warnOnNone: bool = False):
"""
get the types for the given items
side effect is setting my types
Args:
listName(str): the name of the list
items(list): a list of items
warnOnNone(bool): if TRUE warn if an item value is None
"""
for key, value in items:
valueType = None
if value is None:
if warnOnNone and self.debug:
print(
f"Warning sampleRecord field {key} is None - using string as type"
)
valueType = str
else:
valueType = type(value)
if valueType == str:
pass
elif valueType == int:
pass
elif valueType == float:
pass
elif valueType == bool:
pass
elif valueType == datetime.date:
pass
elif valueType == datetime.datetime:
pass
else:
if valueType is not None:
msg = f"warning: unsupported type {str(valueType)} for field {key}"
if self.debug and self.warnOnUnsupportedTypes:
print(msg)
if valueType is not None:
self.addType(listName, key, valueType.__name__)
[docs]
def fixTypes(self, lod: list, listName: str):
"""
fix the types in the given data structure
Args:
lod(list): a list of dicts
listName(str): the types to lookup by list name
"""
for listName in self.typeMap:
self.fixListOfDicts(self.typeMap[listName], lod)
[docs]
def getType(self, typeName):
"""
get the type for the given type name
"""
if typeName in Types.typeName2Type:
return Types.typeName2Type[typeName]
else:
if self.debug:
print("Warning unsupported type %s" % typeName)
return None
[docs]
def fixListOfDicts(self, typeMap, listOfDicts):
"""
fix the type in the given list of Dicts
"""
for record in listOfDicts:
for keyValue in record.items():
key, value = keyValue
if value is None:
record[key] = None
elif key in typeMap:
valueType = self.getType(typeMap[key])
if valueType == bool:
if type(value) == str:
b = value in ["True", "TRUE", "true"]
else:
b = value
record[key] = b
elif valueType == datetime.date:
dt = datetime.datetime.strptime(value, "%Y-%m-%d")
record[key] = dt.date()
elif valueType == datetime.datetime:
# see https://stackoverflow.com/questions/127803/how-do-i-parse-an-iso-8601-formatted-date
if isinstance(value, str):
if sys.version_info >= (3, 7):
dtime = datetime.datetime.fromisoformat(value)
else:
dtime = datetime.datetime.strptime(
value, "%Y-%m-%dT%H:%M:%S.%f"
)
else:
# TODO: error handling
dtime = None
record[key] = dtime