Coverage for /Users/Dave/git_repos/_packages_/python/fundamentals/fundamentals/mysql/convert_dictionary_to_mysql_table.py : 7%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Convert a python dictionary into rows of a mysql table*
6:Author:
7 David Young
9:Date Created:
10 June 21, 2016
11"""
12################# GLOBAL IMPORTS ####################
13from builtins import zip
14from builtins import str
15from builtins import range
16import sys
17import os
18os.environ['TERM'] = 'vt100'
19import re
20import yaml
21import time
22import datetime
23import collections as c
24import pymysql as mdb
25from fundamentals import tools, times
26from fundamentals.mysql import writequery, table_exists, readquery
27import six
30def convert_dictionary_to_mysql_table(
31 log,
32 dictionary,
33 dbTableName,
34 uniqueKeyList=[],
35 dbConn=False,
36 createHelperTables=False,
37 dateModified=False,
38 returnInsertOnly=False,
39 replace=False,
40 batchInserts=True,
41 reDatetime=False,
42 skipChecks=False,
43 dateCreated=True):
44 """convert dictionary to mysql table
46 **Key Arguments:**
47 - ``log`` -- logger
48 - ``dictionary`` -- python dictionary
49 - ``dbConn`` -- the db connection
50 - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist)
51 - ``uniqueKeyList`` - a lists column names that need combined to create the primary key
52 - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc
53 - ``returnInsertOnly`` -- returns only the insert command (does not execute it)
54 - ``dateModified`` -- add a modification date and updated flag to the mysql table
55 - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required)
56 - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples
57 - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``)
58 - ``skipChecks`` -- skip reliability checks. Less robust but a little faster.
59 - ``dateCreated`` -- add a timestamp for dateCreated?
61 **Return:**
62 - ``returnInsertOnly`` -- the insert statement if requested
64 **Usage:**
66 To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist:
68 .. code-block:: python
70 from fundamentals.mysql import convert_dictionary_to_mysql_table
71 dictionary = {"a newKey": "cool", "and another": "super cool",
72 "uniquekey1": "cheese", "uniqueKey2": "burgers"}
74 convert_dictionary_to_mysql_table(
75 dbConn=dbConn,
76 log=log,
77 dictionary=dictionary,
78 dbTableName="testing_table",
79 uniqueKeyList=["uniquekey1", "uniqueKey2"],
80 dateModified=False,
81 returnInsertOnly=False,
82 replace=True
83 )
85 Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database:
87 insertCommand, valueTuple = convert_dictionary_to_mysql_table(
88 dbConn=dbConn,
89 log=log,
90 dictionary=dictionary,
91 dbTableName="testing_table",
92 uniqueKeyList=["uniquekey1", "uniqueKey2"],
93 dateModified=False,
94 returnInsertOnly=True,
95 replace=False,
96 batchInserts=True
97 )
99 print(insertCommand, valueTuple)
101 # OUT: 'INSERT IGNORE INTO `testing_table`
102 # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES
103 # (%s, %s, %s, %s, %s)', ('cool', 'super cool',
104 # '2016-06-21T12:08:59', 'burgers', 'cheese')
106 You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found:
108 inserts = convert_dictionary_to_mysql_table(
109 dbConn=dbConn,
110 log=log,
111 dictionary=dictionary,
112 dbTableName="testing_table",
113 uniqueKeyList=["uniquekey1", "uniqueKey2"],
114 dateModified=False,
115 returnInsertOnly=True,
116 replace=True,
117 batchInserts=False
118 )
120 print(inserts)
122 # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1)
123 # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese")
124 # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super
125 # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers",
126 # uniquekey1="cheese"
127 """
129 log.debug('starting the ``convert_dictionary_to_mysql_table`` function')
131 if not reDatetime:
132 reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
134 if not replace:
135 insertVerb = "INSERT"
136 else:
137 insertVerb = "INSERT IGNORE"
139 if returnInsertOnly == False:
140 # TEST THE ARGUMENTS
141 if str(type(dbConn).__name__) != "Connection":
142 message = 'Please use a valid MySQL DB connection.'
143 log.critical(message)
144 raise TypeError(message)
146 if not isinstance(dictionary, dict):
147 message = 'Please make sure "dictionary" argument is a dict type.'
148 log.critical(message)
149 raise TypeError(message)
151 if not isinstance(uniqueKeyList, list):
152 message = 'Please make sure "uniqueKeyList" is a list'
153 log.critical(message)
154 raise TypeError(message)
156 for i in uniqueKeyList:
157 if i not in list(dictionary.keys()):
158 message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert'
159 log.critical(message)
160 raise ValueError(message)
162 for k, v in list(dictionary.items()):
163 # log.debug('k: %s, v: %s' % (k, v,))
164 if isinstance(v, list) and len(v) != 2:
165 message = 'Please make sure the list values in "dictionary" 2 items in length'
166 log.critical("%s: in %s we have a %s (%s)" %
167 (message, k, v, type(v)))
168 raise ValueError(message)
169 if isinstance(v, list):
170 if not (isinstance(v[0], six.string_types) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], int) or isinstance(v[0], datetime.date) or v[0] == None):
171 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool'
172 log.critical("%s: in %s we have a %s (%s)" %
173 (message, k, v, type(v)))
174 raise ValueError(message)
175 else:
176 if not (isinstance(v, six.string_types) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, datetime.date) or v == None or "int" in str(type(v))):
177 this = type(v)
178 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals(
179 )
180 log.critical("%s: in %s we have a %s (%s)" %
181 (message, k, v, type(v)))
182 raise ValueError(message)
184 if not isinstance(createHelperTables, bool):
185 message = 'Please make sure "createHelperTables" is a True or False'
186 log.critical(message)
187 raise TypeError(message)
189 # TEST IF TABLE EXISTS
190 if not skipChecks:
191 tableExists = table_exists.table_exists(
192 dbConn=dbConn,
193 log=log,
194 dbTableName=dbTableName
195 )
196 else:
197 tableExists = False
199 # CREATE THE TABLE IF IT DOES NOT EXIST
200 if tableExists is False:
201 sqlQuery = """
202 CREATE TABLE IF NOT EXISTS `%(dbTableName)s`
203 (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
204 `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
205 `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
206 `updated` tinyint(4) DEFAULT '0',
207 PRIMARY KEY (`primaryId`))
208 ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
209 """ % locals()
210 writequery(
211 log=log,
212 sqlQuery=sqlQuery,
213 dbConn=dbConn,
215 )
217 qCreateColumn = ''
218 formattedKey = ''
219 formattedKeyList = []
220 myValues = []
222 # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this?
223 if dateModified:
224 dictionary['dateLastModified'] = [
225 str(times.get_now_sql_datetime()), "date row was modified"]
226 if replace == False:
227 dictionary['updated'] = [0, "this row has been updated"]
228 else:
229 dictionary['updated'] = [1, "this row has been updated"]
231 # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE
232 # NAME OF THE KEY, IF IT DOES NOT EXIST
233 count = len(dictionary)
234 i = 1
235 for (key, value) in list(dictionary.items()):
236 if (isinstance(value, list) and value[0] is None):
237 del dictionary[key]
238 # SORT THE DICTIONARY BY KEY
239 odictionary = c.OrderedDict(sorted(dictionary.items()))
240 for (key, value) in list(odictionary.items()):
242 formattedKey = key.replace(" ", "_").replace("-", "_")
243 # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST
244 if formattedKey == u"dec":
245 formattedKey = u"decl"
246 if formattedKey == u"DEC":
247 formattedKey = u"DECL"
249 formattedKeyList.extend([formattedKey])
250 if len(key) > 0:
251 # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A
252 # STRING TO MYSQL)
253 if isinstance(value, list) and (isinstance(value[0], list)):
254 value[0] = yaml.dump(value[0])
255 value[0] = str(value[0])
256 # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL
257 # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL
258 # COMMAND LATER
259 if isinstance(value, str):
260 value = value.replace('\\', '\\\\')
261 value = value.replace('"', '\\"')
262 try:
263 udata = value.decode("utf-8", "ignore")
264 value = udata.encode("ascii", "ignore")
265 except:
266 pass
268 # log.debug('udata: %(udata)s' % locals())
270 if isinstance(value, list) and isinstance(value[0], str):
271 myValues.extend(['%s' % value[0].strip()])
272 elif isinstance(value, list):
273 myValues.extend(['%s' % (value[0], )])
274 else:
275 myValues.extend(['%s' % (value, )])
277 if returnInsertOnly == False:
278 # CHECK IF COLUMN EXISTS YET
279 colExists = \
280 "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \
281 formattedKey + "'AND TABLE_NAME='" + dbTableName + """'"""
282 try:
283 # log.debug('checking if the column '+formattedKey+' exists
284 # in the '+dbTableName+' table')
286 rows = readquery(
287 log=log,
288 sqlQuery=colExists,
289 dbConn=dbConn,
290 )
291 except Exception as e:
292 log.error('something went wrong' + str(e) + '\n')
294 # IF COLUMN DOESN'T EXIT - GENERATE IT
295 if len(rows) == 0:
296 qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % (
297 dbTableName, formattedKey)
298 if not isinstance(value, list):
299 value = [value]
300 if reDatetime.search(str(value[0])):
301 # log.debug('Ok - a datetime string was found')
302 qCreateColumn += '` datetime DEFAULT NULL'
303 elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \
304 == 'feedName' or formattedKey == 'title':
305 qCreateColumn += '` varchar(100) DEFAULT NULL'
306 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) < 30:
307 qCreateColumn += '` varchar(100) DEFAULT NULL'
308 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) >= 30 and len(value[0]) < 80:
309 qCreateColumn += '` varchar(100) DEFAULT NULL'
310 elif isinstance(value[0], ("".__class__, u"".__class__)):
311 columnLength = 450 + len(value[0]) * 2
312 qCreateColumn += '` varchar(' + str(
313 columnLength) + ') DEFAULT NULL'
314 elif isinstance(value[0], int) and abs(value[0]) <= 9:
315 qCreateColumn += '` tinyint DEFAULT NULL'
316 elif isinstance(value[0], int):
317 qCreateColumn += '` int DEFAULT NULL'
318 elif isinstance(value[0], float) or isinstance(value[0], int):
319 qCreateColumn += '` double DEFAULT NULL'
320 elif isinstance(value[0], bool):
321 qCreateColumn += '` tinyint DEFAULT NULL'
322 elif isinstance(value[0], list):
323 qCreateColumn += '` varchar(1024) DEFAULT NULL'
324 else:
325 # log.debug('Do not know what format to add this key in
326 # MySQL - removing from dictionary: %s, %s'
327 # % (key, type(value[0])))
328 formattedKeyList.pop()
329 myValues.pop()
330 qCreateColumn = None
331 if qCreateColumn:
332 # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR
333 # MYSQL
334 if key is not formattedKey:
335 qCreateColumn += " COMMENT 'original keyword: " + \
336 key + """'"""
337 # CREATE THE COLUMN IF IT DOES NOT EXIST
338 try:
339 log.info('creating the ' +
340 formattedKey + ' column in the ' + dbTableName + ' table')
341 writequery(
342 log=log,
343 sqlQuery=qCreateColumn,
344 dbConn=dbConn
345 )
347 except Exception as e:
348 # log.debug('qCreateColumn: %s' % (qCreateColumn,
349 # ))
350 log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName
351 + ' table -- ' + str(e) + '\n')
353 if returnInsertOnly == False:
354 # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST
355 if len(uniqueKeyList):
356 for i in range(len(uniqueKeyList)):
357 uniqueKeyList[i] = uniqueKeyList[
358 i].replace(" ", "_").replace("-", "_")
359 if uniqueKeyList[i] == u"dec":
360 uniqueKeyList[i] = u"decl"
361 if uniqueKeyList[i] == u"DEC":
362 uniqueKeyList[i] = u"DECL"
364 indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_")
365 for i in range(len(uniqueKeyList) - 1):
366 indexName += '_' + uniqueKeyList[i + 1]
368 indexName = indexName.lower().replace(" ", " ").replace(" ", "_")
370 sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \
371 dbTableName + """' AND INDEX_NAME = '""" + indexName + """'"""
372 rows = readquery(
373 log=log,
374 sqlQuery=sqlQuery,
375 dbConn=dbConn,
376 quiet=False
377 )
379 exists = rows[0]['COUNT(*)']
380 # log.debug('uniqueKeyList: %s' % (uniqueKeyList,))
381 if exists == 0:
382 if isinstance(uniqueKeyList, list):
383 uniqueKeyList = ','.join(uniqueKeyList)
385 addUniqueKey = 'ALTER TABLE `' + dbTableName + \
386 '` ADD unique ' + indexName + \
387 """ (""" + uniqueKeyList + ')'
388 # log.debug('HERE IS THE COMMAND:'+addUniqueKey)
389 writequery(
390 log=log,
391 sqlQuery=addUniqueKey,
392 dbConn=dbConn
393 )
395 if returnInsertOnly == True and batchInserts == True:
396 myKeys = '`,`'.join(formattedKeyList)
397 valueString = ("%s, " * len(myValues))[:-2]
398 insertCommand = insertVerb + """ INTO `""" + dbTableName + \
399 """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
400 valueString + """, NOW())"""
401 mv = []
402 mv[:] = [None if m == u"None" else m for m in myValues]
403 valueTuple = tuple(mv)
405 dup = ""
406 if replace:
407 dup = " ON DUPLICATE KEY UPDATE "
408 for k, v in zip(formattedKeyList, mv):
409 dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()
411 insertCommand = insertCommand + dup
413 insertCommand = insertCommand.replace('\\""', '\\" "')
414 insertCommand = insertCommand.replace('""', "null")
415 insertCommand = insertCommand.replace('!!python/unicode:', '')
416 insertCommand = insertCommand.replace('!!python/unicode', '')
417 insertCommand = insertCommand.replace('"None"', 'null')
418 insertCommand = insertCommand.replace('"null"', 'null')
420 if not dateCreated:
421 insertCommand = insertCommand.replace(
422 ", dateCreated)", ")").replace(", NOW())", ")")
424 return insertCommand, valueTuple
426 # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES
427 myKeys = '`,`'.join(formattedKeyList)
428 myValues = '" ,"'.join(myValues)
429 # log.debug(myValues+" ------ PRESTRIP")
430 # REMOVE SOME CONVERSION NOISE
431 myValues = myValues.replace('time.struct_time', '')
432 myValues = myValues.replace(
433 '- !!python/object/new:feedparser.FeedParserDict', '')
434 myValues = myValues.replace(
435 '!!python/object/new:feedparser.FeedParserDict', '')
436 myValues = myValues.replace('dictitems:', '')
437 myValues = myValues.replace('dictitems', '')
438 myValues = myValues.replace('!!python/unicode:', '')
439 myValues = myValues.replace('!!python/unicode', '')
440 myValues = myValues.replace('"None"', 'null')
441 myValues = myValues.replace('"null"', 'null')
442 # myValues = myValues.replace('"None', 'null')
444 if myValues[-4:] != 'null':
445 myValues += '"'
447 dup = ""
448 if replace:
449 dupValues = ('"' + myValues).split(" ,")
450 dupKeys = formattedKeyList
451 dup = dup + " ON DUPLICATE KEY UPDATE "
452 for k, v in zip(dupKeys, dupValues):
453 dup = """%(dup)s `%(k)s`=%(v)s,""" % locals()
455 if dateModified:
456 dup = """%(dup)s updated=IF(""" % locals()
457 for k, v in zip(dupKeys, dupValues):
458 if v == "null":
459 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
460 else:
461 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
462 dup = dup[:-5] + ", 0, 1), dateLastModified=IF("
463 for k, v in zip(dupKeys, dupValues):
464 if v == "null":
465 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
466 else:
467 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
468 dup = dup[:-5] + ", dateLastModified, NOW())"
469 else:
470 dup = dup[:-1]
472 # log.debug(myValues+" ------ POSTSTRIP")
473 addValue = insertVerb + """ INTO `""" + dbTableName + \
474 """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \
475 myValues + """, NOW()) %(dup)s """ % locals()
477 if not dateCreated:
478 addValue = addValue.replace(
479 ", dateCreated)", ")").replace(", NOW())", ")", 1)
481 addValue = addValue.replace('\\""', '\\" "')
482 addValue = addValue.replace('""', "null")
483 addValue = addValue.replace('!!python/unicode:', '')
484 addValue = addValue.replace('!!python/unicode', '')
485 addValue = addValue.replace('"None"', 'null')
486 addValue = addValue.replace('"null"', 'null')
487 # log.debug(addValue)
489 if returnInsertOnly == True:
490 return addValue
492 message = ""
493 try:
494 # log.debug('adding new data to the %s table; query: %s' %
495 # (dbTableName, addValue))"
496 writequery(
497 log=log,
498 sqlQuery=addValue,
499 dbConn=dbConn
500 )
502 except Exception as e:
503 log.error("could not add new data added to the table '" +
504 dbTableName + "' : " + str(e) + '\n')
506 log.debug('completed the ``convert_dictionary_to_mysql_table`` function')
507 return None, None