/search.css" rel="stylesheet" type="text/css"/> /search.js">
| Classes | Job Modules | Data Objects | Services | Algorithms | Tools | Packages | Directories | Tracs |

In This Package:

dbcf.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003 Compares schema between common tables from multiple Databases. 
00004 DB are specified via dbconf section names in ~/.my.cnf
00005 
00006 See also :dybgaudi:`Database/DybDbi/tests/test_schema.py`
00007 
00008 Usage::
00009 
00010    ./dbcf.py --help
00011    ./dbcf.py offline_db tmp_nodata tmp_offline_db 
00012    ./dbcf.py offline_db tmp_nodata -t CalibFeeSpecVld 
00013    ./dbcf.py offline_db tmp_nodata -t CalibFeeSpecVld -f SUBSITE 
00014    ./dbcf.py offline_db tmp_nodata -t CalibFeeSpecVld -f SUBSITE -a Type
00015    ./dbcf.py offline_db tmp_nodata -t CalibFeeSpecVld -f SUBSITE -a Type
00016    ./dbcf.py offline_db tmp_nodata -f SUBSITE :
00017 
00018 """
00019 import logging, argparse
00020 from pprint import pformat
00021 from DybPython import DB
00022 log = logging.getLogger(__name__)
00023 
00024 class DBCF(list):
00025     """
00026     DB Schema comparison between tables in Databases identified
00027     by a list of dbconf sectnames.
00028     Table schema comparisons are made for tables that appear 
00029     in all specified Databases
00030 
00031     The basis for comparison is the tuple of fieldatt dicts returned by
00032     MySQL-python in response to "describe TableName", the fieldattr for each 
00033     Field has the form::
00034 
00035         {'Default': None,
00036          'Extra': '',
00037          'Field': 'SEQNO',
00038          'Key': 'PRI',
00039          'Null': 'NO',
00040          'Type': 'int(11)'}
00041 
00042 
00043     Suspect a MySQL version difference in 'Default' reporting 
00044     in offline_db getting '' but in belle7 getting None, resulting 
00045     in many  mismatches (despite same creation commands): 
00046 
00047     PhysAdVld                      TIMEEND              Default    {'tmp_nodata': None, 'offline_db': ''}  
00048 
00049     """
00050     def __init__(self, *dbconf, **kwa ):
00051          """
00052          Establish names of common set of tables that exist in all Databases
00053          """
00054          list.__init__(self, dbconf )
00055 
00056          dbs = map( DB, self )
00057 
00058          tc = set()    
00059          for db in dbs: 
00060              t = set(db.showtables)
00061              if len(tc) == 0:
00062                  tc = set(t)
00063              else:
00064                  tc.intersection_update(t)    ## collect intersected tables
00065 
00066          self.opts = kwa.get('opts', {})
00067          self.dbs = dbs
00068          self.tc = tc
00069 
00070 
00071     def table(self, t ):
00072         """
00073         :param t: table name
00074 
00075         Obtain the descriptions from each Database and rekey  
00076         to return a dict-of-dict-of-dict keyed by fieldname/fieldattr/dbconf, 
00077         The rearrangement puts the dbconf at the base of the structure 
00078         giving access to all values in all db in one place
00079 
00080         ddd is an intermediate dict-of-dict-of-dict keyed on dbconf/fieldname/fieldattr
00081         """
00082         assert t in self.tc, "table %s is not in common " % t     
00083 
00084         # convert per field tuple-of-dicts into dict-of-dicts keyed on fieldname/fieldattr
00085         rejig = lambda x:dict(map(lambda _:(_['Field'],_) , x ))
00086  
00087         dd = []      
00088         ddd = {}     
00089         for n,db in zip(self,self.dbs):
00090             d = db("describe " + t )
00091             dd.append(d)
00092             ddd[n] = rejig(d)
00093         assert len(set(map(len,dd))) == 1 , "mismatch in field length %s " % t  
00094 
00095         kk = None
00096         for n in ddd:
00097            ks = sorted(ddd[n].keys())
00098            if not kk:
00099               kk = ks
00100            else:
00101               assert ks == kk,  "field key mismatch %s \n%r \n%r " % ( t , kk, ks )  
00102 
00103         # rekey to put the sectname at base giving access 
00104         # to all values in all db in one place
00105         msm = {}  
00106         for n in ddd.keys():
00107             for fn in ddd[n]:
00108                 if not fn in msm:msm[fn] = {}
00109                 for fa in ddd[n][fn]:
00110                     if not fa in msm[fn]:msm[fn][fa] = {}
00111 
00112                     v = ddd[n][fn][fa] 
00113                     if fa == 'Default' and not self.opts.get('nofix'): 
00114                          if v == None:v = ''                        
00115 
00116                     msm[fn][fa][n] = v
00117                   
00118         # prune when have only one value in leaf dict  
00119         # NB must use longform .keys() as are mutating the dict 
00120         for fn in msm.keys():
00121             for fa in msm[fn].keys():
00122                  vs = set(msm[fn][fa].values())
00123                  if len(vs) == 1:         ## only one value ... so prune
00124                      del msm[fn][fa]
00125             if len(msm[fn]) == 0:         ## prune up here too
00126                 del msm[fn] 
00127 
00128         return msm 
00129 
00130 
00131     def mismatch(self):
00132         """
00133         Returns dict-of-dict-of-dict-of-dict keyed by tablename/fieldname/fieldattr/dbconf, 
00134         for example from tablename level::
00135        
00136               'SimPmtSpecVld': {'INSERTDATE': {'Default': {'offline_db': '', 'tmp_nodata': None}},
00137                                 'TIMEEND':    {'Default': {'offline_db': '', 'tmp_nodata': None},
00138                                                    'Key': {'offline_db': 'MUL', 'tmp_nodata': ''}},
00139                                 'TIMESTART': {'Default': {'offline_db': '', 'tmp_nodata': None},
00140                                                   'Key': {'offline_db': 'MUL', 'tmp_nodata': ''}},
00141                               'VERSIONDATE': {'Default': {'offline_db': '', 'tmp_nodata': None}}}}
00142 
00143         The dddd is pruned to only contain "paths" with mismatches
00144         """
00145         msm = {}
00146 
00147         ot = self.opts.get('table',None)
00148         op = self.opts.get('skip',None)
00149         skip = op.split(",") if op else [] 
00150 
00151         for t in self.tc:
00152             if ot and t != ot:
00153                 continue  
00154             if t in skip:
00155                 log.warn("skipping %s as has fieldname mismatch needing correction before can apply other checks " % t )
00156             else:
00157                 msm[t] =  self.table( t )
00158         return msm
00159     
00160 
00161     def present(self, msm):
00162 
00163         of = self.opts.get('field',None)
00164         oa = self.opts.get('attr', None)
00165 
00166         for tn in msm:
00167             for fn in msm[tn]:
00168                 if of and fn != of:continue 
00169                 for fa in msm[tn][fn]:
00170                     if oa and fa != oa:continue 
00171                     print "%-30s %-20s %-10s %r " % ( tn,fn,fa, msm[tn][fn][fa] )
00172 
00173     def __call__(self):
00174         msm = self.mismatch()
00175         #print pformat(msm)
00176         self.present(msm)
00177         return msm
00178 
00179     def args_(cls):
00180         ap = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
00181         ap.add_argument('dbconf',   nargs="+", help='Two or more dbconf section names from ~/.my.cnf file')
00182         ap.add_argument('-l','--loglevel',     help='logging level INFO,WARN,DEBUG... Default %(default)s ')
00183         ap.add_argument('-t','--table',        help='Restrict comparison to just this table. Default %(default)s ')
00184         ap.add_argument('-f','--field',        help='Restrict comparison to just this field. Default %(default)s ')
00185         ap.add_argument('-a','--attr',         help='Restrict comparison to just this attribute. Default %(default)s ')
00186         ap.add_argument('-s','--skip',         help='Comma delimited list of tables to skip. Default %(default)s ')
00187         ap.add_argument('-N','--nofix', action="store_true"  ,  help='Dont fix "Default" reporting.  Default %(default)s ')
00188         ap.set_defaults(
00189              loglevel="INFO",
00190                table=None,
00191                field=None,
00192                attr=None,
00193                skip=None,
00194                nofix=False,
00195          )
00196         args = ap.parse_args()
00197         loglevel = getattr(logging,args.loglevel.upper())
00198         logging.basicConfig( level=loglevel )
00199         return args
00200     args_ = classmethod( args_ )
00201 
00202 
00203 def main():
00204     args = DBCF.args_()
00205     log.debug("main %r" %  args )
00206     cf = DBCF(*args.dbconf, opts=vars(args))
00207     msm = cf()
00208     return msm
00209 
00210 
00211 if __name__ == '__main__':
00212     main()
00213 
| Classes | Job Modules | Data Objects | Services | Algorithms | Tools | Packages | Directories | Tracs |

Generated on Fri May 16 2014 09:55:40 for DybPython by doxygen 1.7.4