/search.css" rel="stylesheet" type="text/css"/> /search.js">
| Classes | Job Modules | Data Objects | Services | Algorithms | Tools | Packages | Directories | Tracs |

In This Package:

dbicnf.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 """
00003 An example using commandline parsing and pattern match against filenames, allowing smart
00004 DBI writer scripts to be created that minimize code duplication. 
00005 
00006 However make sure that arguments used are still captured into the 
00007 repository either by creating one line scripts that invoke the
00008 flexible scripts. Or arranging for flexible scripts to read driver files.
00009 
00010 """
00011 import os, sys, re, argparse, logging, shlex
00012 from pprint import pformat 
00013 from datetime import datetime
00014 log = logging.getLogger(__name__)
00015 
00016 class TimeAction(argparse.Action):
00017     """Converts string date representations into datetimes """
00018     def __call__(self, parser, ns , values, option_string=None):
00019         log.debug('%r %r %r' % (ns, values, option_string))
00020         values = datetime.strptime( values , ns.timeformat )
00021         setattr(ns, self.dest, values)
00022 
00023 class DbiCnf(dict):
00024     """
00025     DbiCnf is a dict holding parameters that are inputs to defining 
00026     the DBI writer and ingredients like contextrange etc..
00027 
00028     All outputs of this class such as ``timestart``, ``cr`` etc..
00029     are implemented as dynamically invoked properties, meaning 
00030     that the only important state held is in this dict 
00031     in the form of raw python types : str, int, datetime.
00032 
00033     This dict is composed with class defaults, ctor arguments, commandline 
00034     parsed results, path parameter regular expression parsed tokens, 
00035     interactive updating.
00036 
00037     Precedence in decreasing order:
00038 
00039     #. commandline arguments
00040     #. after ctor updates
00041     #. ctor keyword arguments
00042     #. basis defaults in ``DbiCnf.defaults``  
00043 
00044     Usage in writer scripts::
00045 
00046        from DybPython import DbiCnf
00047        cnf = DbiCnf()
00048        cnf()              ## performs the parse  
00049 
00050        from DybDbi import GCalibPmtSpec, CSV
00051        wrt = cnf.writer( GCalibPmtSpec ) 
00052  
00053        src = CSV( cnf.path )
00054        for r in src:
00055            instance = GCalibPmtSpec.Create( **r )
00056            wrt.Write( instance )
00057 
00058        if not cnf.dummy:
00059            assert wrt.close()
00060 
00061     Debugging/checking usage in ipython::
00062 
00063        from DybPython import DbiCnf
00064        cnf = DbiCnf(key=val,key2=val2)
00065        cnf['key3'] = 'val3'
00066 
00067        cnf()    ## performs command line parse
00068        cnf("All_AD1_Data.csv --task 20 --runtimestart 10 --dbconf tmp_offline_db:offline_db ")   ## test parsing gives desired params
00069        print cnf
00070        cnf['runtimestart'] = 10
00071        cnf.timestart            
00072        cnf['runtimestart'] = 1000    
00073        cnf.timestart                 ## will do timestart lookup for the changed run
00074 
00075 
00076     The simplest and recommended usage is to define a standard `.csv` file naming convention.
00077     For example when using the default context pattern::
00078    
00079        "^(?P<site>All|DayaBay|Far|LingAo|Mid|SAB)_(?P<subsite>AD1|AD2|AD3|AD4|All|IWS|OWS|RPC|Unknown)_(?P<simflag>MC|Data)\.csv"
00080 
00081     The tokens `site`, `subsite` and `simflag` are extracted from basenames such as the below by the pattern matching.
00082 
00083     #. SAB_AD1_Data.csv
00084     #. SAB_AD2_Data.csv
00085         
00086     """
00087     ## corresponds to an enum
00088     sites = "All|DayaBay|Far|LingAo|Mid|SAB" 
00089     subsites = "AD1|AD2|AD3|AD4|All|IWS|OWS|RPC|Unknown" 
00090     simflags = "Data|MC"
00091 
00092     def _defaults(self):
00093         return dict( 
00094              loglevel="INFO", 
00095              logpath=None,
00096            logformat='%(asctime)s %(name)s %(levelname)-8s %(message)s',
00097                  task=0, 
00098                  site=None,
00099               subsite=None,
00100               simflag=None,
00101             timeformat="%Y-%m-%d %H:%M:%S", 
00102              timestart=None, 
00103                timeend=None,
00104             runtimestart=None, 
00105               runtimeend=None,
00106                   ctxptn="^(?P<site>%s)_(?P<subsite>%s)_(?P<simflag>%s)\.csv" % ( self.sites, self.subsites, self.simflags ) ,
00107                nomatch=False,
00108                dbconf="tmp_offline_db",
00109                   dbno=0, 
00110                dummy=False,
00111           )
00112     defaults = property( _defaults )
00113 
00114     def argparser_(self):
00115         ap = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars='@', formatter_class=argparse.RawDescriptionHelpFormatter )
00116 
00117         po = ap.add_argument_group("positional")
00118         po.add_argument('path',                help='path to csv file, mandatory argument')
00119 
00120         op = ap.add_argument_group("operational")
00121         op.add_argument('-l','--loglevel',     help='logging level INFO,WARN,DEBUG... Default %(default)s ')
00122         op.add_argument(     "--logpath",      help="Path to write log file to. Default %(default)s ")
00123         op.add_argument(     "--logformat",    help="Used by logger. Default %(default)s ")
00124         op.add_argument('-n','--dummy',        help='Dummy run. Default %(default)s ', action="store_true" )
00125         
00126         ap.add_argument(     '--dbconf',       help='Section of ~/.my.cnf file. Default %(default)s when using runtimestart/runtimeend must be cascade such as tmp_offline_db:offline_db ')
00127 
00128         nu = ap.add_argument_group("not usually changed")
00129         nu.add_argument(     '--timeformat',   help='format for times. Default %(default)s ')
00130         nu.add_argument(     '--task',         help='set non zero for testing non-default algorithms. Default %(default)s ', type=int )
00131         nu.add_argument(     '--dbno',         help='DB number in cascade. Default %(default)s ', type=int )
00132 
00133         cx = ap.add_argument_group("context")
00134         cx.add_argument(     '--site',         help='string to be converted into enum integer with Site.FromString. Default %(default)s ', choices=self.sites.split("|"))
00135         cx.add_argument(     '--subsite',      help='string to be converted into enum integer with DetectorId.FromString. Default %(default)s ', choices=self.subsites.split("|"))
00136         cx.add_argument(     '--simflag',      help='string to be converted into enum integer with SimFlag.FromString. Default %(default)s ', choices=self.simflags.split("|"))
00137         cx.add_argument(     '--ctxptn',       help='Regular expression string to be matched against csv basenames. Often includes `site`, `subsite` and `simflag`. Default %(default)s ')
00138         cx.add_argument('-M','--nomatch',      help='Regular expression string is not forced to match csv basenames. Default %(default)s ', action="store_true" )
00139 
00140         ts = ap.add_mutually_exclusive_group()
00141         ts.add_argument(     '--timestart',    help='contextrange start time in UTC. Default %(default)s corresponds to TimeStamp.GetBOT  ', action=TimeAction )
00142         ts.add_argument(     '--runtimestart', help='contextrange start time in UTC corresponding to timestart for run number passed. Default %(default)s corresponds to use timestart ', type=int )
00143 
00144         te = ap.add_mutually_exclusive_group()
00145         te.add_argument(     '--timeend',      help='contextrange end time in UTC. Default %(default)s corresponds to TimeStamp.GetEOT ', action=TimeAction )
00146         te.add_argument(     '--runtimeend',   help='contextrange end time in UTC corresponding to timeend of run number passed. Default %(default)s corresponds to use timeend ', type=int )
00147 
00148         return ap 
00149 
00150     def __init__(self, *args, **kwa ):
00151         """
00152         :param kwa: ctor keyword arguments override class defaults ``DbiCnf.defaults`` updating into `self` 
00153         """
00154         dict.__init__(self)
00155         self.update( self.defaults )  
00156         self.update( kwa )            
00157  
00158     def __call__(self, args_=None ):
00159         """
00160         :param args_: default of None parses system arguments, define for interactive testing
00161 
00162         Performs parse steps:
00163 
00164         #. populates argparser defaults from `self`
00165         #. parses `args_` or sys.argv 
00166         #. parses args.path obtaining ``pathdict``
00167         #. updates `self` from args.path parsed tokens
00168         #. updates `self` raw initial parsed values 
00169 
00170         First positional argument is interpreted as a path and is pattern matched against 
00171         a regular expression. 
00172         """
00173         ap = self.argparser_()
00174         ap.set_defaults( **self )         
00175 
00176         if args_ and type(args_) == str:
00177             args_ = shlex.split(args_)     ## for interactive testing 
00178 
00179         args = ap.parse_args(args=args_)
00180 
00181         logging.basicConfig( level=getattr(logging,args.loglevel.upper()) )
00182         log.info( "initial args %r " %  args )
00183         
00184         pathdict = self.parse_path( args.path , args.ctxptn, args.nomatch )
00185  
00186         self.update( pathdict )   
00187 
00188         va = vars(args)           
00189         for k,v in va.items(): 
00190             if not v:continue   ## None does not trump pre-existers
00191             self[k] = v 
00192 
00193         if self['dbconf']:
00194             os.environ['DBCONF'] = self['dbconf'] 
00195             log.warn("DBCONF set to %s " % self['dbconf'] )
00196 
00197         self.irl = self._irunlookup()
00198 
00199         ## state for debug only 
00200         self['pathdict'] = pathdict        
00201         self.args = args      
00202 
00203     def __repr__(self):
00204         return self.__class__.__name__ + "\n" + pformat(dict(self))
00205 
00206     def _irunlookup(self):
00207         """
00208         **CAUTION** this takes the runtimestart/runtimeend directly from GDaqRunInfo lookup ...
00209         so they must be UTC ... if that is not the case, that needs to be fixed 
00210         """
00211         rl = filter(None, (self['runtimestart'], self['runtimeend']))
00212         if rl:
00213             from DybDbi import IRunLookup
00214             log.debug("_irunlookup %r " % rl )
00215             irl = IRunLookup(*map(int,rl))
00216             log.debug("IRunLookup %r gives ... \n %s " % ( rl, pformat(irl) ))  
00217         else:
00218             irl = {}
00219         return irl 
00220 
00221     def parse_path(self, path_ , ptn , nomatch ):
00222         """
00223         Extract context metadata from the path using the regular expression string 
00224         supplied. 
00225 
00226         :param path: path to .csv source file
00227         :param ptn: regular expression string that can contain tokens for any config parameters
00228 
00229         :rtype dict: dict of strings extracted from the path 
00230         """ 
00231         path = os.path.expandvars(os.path.expanduser(path_))  ## expand envvars or twiddles
00232         name = os.path.basename( path )
00233         ptn_ = re.compile(ptn)
00234         match = ptn_.match(  name )
00235         pathdict = {}
00236         if match:
00237             pathdict = match.groupdict()
00238         else:
00239             if nomatch:
00240                 log.warning("did not match name %s with pattern %s " % ( name , ptn ) )
00241             else:
00242                 log.fatal("failed to match name %s with pattern %s " % ( name , ptn ) )
00243                 raise Exception
00244         log.debug( "pathdict %r " %  pathdict )
00245         return pathdict 
00246 
00247 
00248     # for consistent structure
00249     path  = property( lambda self:self['path'] ) 
00250     dbno  = property( lambda self:self['dbno'] ) 
00251     dummy = property( lambda self:self['dummy'] ) 
00252 
00253     runtimestart = property(lambda self:int(self['runtimestart']) if self['runtimestart'] else None)
00254     runtimeend   = property(lambda self:int(self['runtimeend']) if self['runtimeend'] else None)
00255 
00256     def _timestart(self):
00257         """
00258         """
00259         from DybDbi import TimeStamp
00260         rts = self.runtimestart
00261         if rts:
00262             if not rts in self.irl:
00263                 self._irunlookup()
00264             timestart = self.irl[rts].vrec.contextrange.timestart
00265         else:
00266             timestart = self['timestart']
00267             timestart = TimeStamp.fromAssumedUTCDatetime( timestart ) if timestart else TimeStamp.GetBOT()
00268         return timestart
00269     timestart = property( _timestart, doc=_timestart.__doc__ )
00270          
00271     def _timeend(self):
00272         """
00273         """
00274         from DybDbi import TimeStamp
00275         rte = self.runtimeend 
00276         if rte:
00277             if not rte in self.irl:
00278                 self._irunlookup()
00279             timeend = self.irl[rte].vrec.contextrange.timeend
00280         else:
00281             timeend   = self['timeend']
00282             timeend = TimeStamp.fromAssumedUTCDatetime( timeend ) if timeend else TimeStamp.GetEOT()
00283         return timeend  
00284     timeend = property( _timeend, doc=_timeend.__doc__ )
00285       
00286     # done longhand to avoid root hijacking --help
00287     def _site(self):
00288         """Convert string site into enum integer"""
00289         from DybDbi import Site
00290         return Site.FromString( self['site'] )
00291     site = property( _site )
00292     def _sitemask(self):
00293         """Convert string site into enum integer
00294            if multi-site masks are needed will have to revisit this
00295         """
00296         from DybDbi import Site
00297         return Site.FromString( self['site'] )
00298     sitemask = property( _sitemask, doc=_sitemask.__doc__ )  ## bit icky 
00299 
00300     def _subsite(self):
00301         """Convert string subsite/DetectorId into enum integer"""
00302         from DybDbi import DetectorId
00303         return DetectorId.FromString( self['subsite'] )
00304     subsite = property( _subsite , doc=_subsite.__doc__)
00305 
00306     def _simflag(self):
00307         """Convert string simflag into enum integer"""
00308         from DybDbi import SimFlag
00309         return SimFlag.FromString( self['simflag'] )
00310     simflag = property( _simflag, doc=_simflag.__doc__ )
00311     def _simmask(self):
00312         """Convert string simflag into enum integer
00313            (note the simflag is interpreted as the mask)
00314         """
00315         from DybDbi import SimFlag
00316         return SimFlag.FromString( self['simflag'] )
00317     simmask = property( _simmask, doc=_simmask.__doc__ )   
00318 
00319     def _cr(self):
00320         """
00321         Convert the strings into enum value, and datetimes into TimeStamps in
00322         order to create the ContextRange instance 
00323 
00324         :return: context range instance 
00325         """ 
00326         from DybDbi import ContextRange
00327         return ContextRange( self.sitemask, self.simmask, self.timestart, self.timeend )
00328     cr = property( _cr , doc=_cr.__doc__ )
00329 
00330 
00331 
00332 
00333 
00334 
00335     def writer(self, kls ):
00336         """
00337         Create a pre-configured DybDbi writer based on 
00338         arguments and source csv filename parsing and 
00339         creates the corresponding DB table if it does not exist.
00340 
00341         :param kls: DybDbi class, eg GCalibPmtHighGain
00342 
00343         """
00344         from DybDbi import TimeStamp
00345 
00346         assert kls.__name__[0] == 'G', "DybDbi classes must start with 'G' unlike %s " % kls.__name__
00347         name = kls.__name__[1:]
00348         assert self.dbno == 0 , "unexpected dbno %s " % self.dbno
00349 
00350         log.info("creating DB tables for %s in dbno %s  " % ( name , self.dbno )) 
00351         kls().CreateDatabaseTables( self.dbno , name )
00352 
00353         wrt = kls.Wrt() 
00354         wrt.ctx( 
00355              contextrange=self.cr, 
00356              dbno=self.dbno ,
00357              versiondate=TimeStamp(0,0),       
00358              subsite=self.subsite,
00359         )
00360         return wrt
00361 
00362 
00363     def logging_(self, args):
00364         """
00365         Hmm need some work ...
00366         """
00367         loglevel = getattr(logging,args.loglevel.upper())
00368 
00369         sh = logging.StreamHandler()
00370         sh.setLevel(loglevel)
00371         fmtr = logging.Formatter(args.logformat)
00372         sh.setFormatter(fmtr)
00373         log.addHandler(sh)
00374 
00375         if args.logpath:
00376             fh = logging.FileHandler(args.logpath,mode="w")
00377             fh.setFormatter(fmtr)
00378             fh.setLevel(loglevel)
00379             log.addHandler(fh)
00380 
00381 
00382 
00383 
00384 if __name__ == '__main__':
00385     pass
00386     cnf = DbiCnf()      
00387     cnf()
00388     log.info( cnf )    
00389 
00390 
| Classes | Job Modules | Data Objects | Services | Algorithms | Tools | Packages | Directories | Tracs |

Generated on Fri May 16 2014 09:55:40 for DybPython by doxygen 1.7.4