/search.css" rel="stylesheet" type="text/css"/> /search.js">
00001 #!/usr/bin/env python 00002 """ 00003 An example using commandline parsing and pattern match against filenames, allowing smart 00004 DBI writer scripts to be created that minimize code duplication. 00005 00006 However make sure that arguments used are still captured into the 00007 repository either by creating one line scripts that invoke the 00008 flexible scripts. Or arranging for flexible scripts to read driver files. 00009 00010 """ 00011 import os, sys, re, argparse, logging, shlex 00012 from pprint import pformat 00013 from datetime import datetime 00014 log = logging.getLogger(__name__) 00015 00016 class TimeAction(argparse.Action): 00017 """Converts string date representations into datetimes """ 00018 def __call__(self, parser, ns , values, option_string=None): 00019 log.debug('%r %r %r' % (ns, values, option_string)) 00020 values = datetime.strptime( values , ns.timeformat ) 00021 setattr(ns, self.dest, values) 00022 00023 class DbiCnf(dict): 00024 """ 00025 DbiCnf is a dict holding parameters that are inputs to defining 00026 the DBI writer and ingredients like contextrange etc.. 00027 00028 All outputs of this class such as ``timestart``, ``cr`` etc.. 00029 are implemented as dynamically invoked properties, meaning 00030 that the only important state held is in this dict 00031 in the form of raw python types : str, int, datetime. 00032 00033 This dict is composed with class defaults, ctor arguments, commandline 00034 parsed results, path parameter regular expression parsed tokens, 00035 interactive updating. 00036 00037 Precedence in decreasing order: 00038 00039 #. commandline arguments 00040 #. after ctor updates 00041 #. ctor keyword arguments 00042 #. basis defaults in ``DbiCnf.defaults`` 00043 00044 Usage in writer scripts:: 00045 00046 from DybPython import DbiCnf 00047 cnf = DbiCnf() 00048 cnf() ## performs the parse 00049 00050 from DybDbi import GCalibPmtSpec, CSV 00051 wrt = cnf.writer( GCalibPmtSpec ) 00052 00053 src = CSV( cnf.path ) 00054 for r in src: 00055 instance = GCalibPmtSpec.Create( **r ) 00056 wrt.Write( instance ) 00057 00058 if not cnf.dummy: 00059 assert wrt.close() 00060 00061 Debugging/checking usage in ipython:: 00062 00063 from DybPython import DbiCnf 00064 cnf = DbiCnf(key=val,key2=val2) 00065 cnf['key3'] = 'val3' 00066 00067 cnf() ## performs command line parse 00068 cnf("All_AD1_Data.csv --task 20 --runtimestart 10 --dbconf tmp_offline_db:offline_db ") ## test parsing gives desired params 00069 print cnf 00070 cnf['runtimestart'] = 10 00071 cnf.timestart 00072 cnf['runtimestart'] = 1000 00073 cnf.timestart ## will do timestart lookup for the changed run 00074 00075 00076 The simplest and recommended usage is to define a standard `.csv` file naming convention. 00077 For example when using the default context pattern:: 00078 00079 "^(?P<site>All|DayaBay|Far|LingAo|Mid|SAB)_(?P<subsite>AD1|AD2|AD3|AD4|All|IWS|OWS|RPC|Unknown)_(?P<simflag>MC|Data)\.csv" 00080 00081 The tokens `site`, `subsite` and `simflag` are extracted from basenames such as the below by the pattern matching. 00082 00083 #. SAB_AD1_Data.csv 00084 #. SAB_AD2_Data.csv 00085 00086 """ 00087 ## corresponds to an enum 00088 sites = "All|DayaBay|Far|LingAo|Mid|SAB" 00089 subsites = "AD1|AD2|AD3|AD4|All|IWS|OWS|RPC|Unknown" 00090 simflags = "Data|MC" 00091 00092 def _defaults(self): 00093 return dict( 00094 loglevel="INFO", 00095 logpath=None, 00096 logformat='%(asctime)s %(name)s %(levelname)-8s %(message)s', 00097 task=0, 00098 site=None, 00099 subsite=None, 00100 simflag=None, 00101 timeformat="%Y-%m-%d %H:%M:%S", 00102 timestart=None, 00103 timeend=None, 00104 runtimestart=None, 00105 runtimeend=None, 00106 ctxptn="^(?P<site>%s)_(?P<subsite>%s)_(?P<simflag>%s)\.csv" % ( self.sites, self.subsites, self.simflags ) , 00107 nomatch=False, 00108 dbconf="tmp_offline_db", 00109 dbno=0, 00110 dummy=False, 00111 ) 00112 defaults = property( _defaults ) 00113 00114 def argparser_(self): 00115 ap = argparse.ArgumentParser(description=__doc__, fromfile_prefix_chars='@', formatter_class=argparse.RawDescriptionHelpFormatter ) 00116 00117 po = ap.add_argument_group("positional") 00118 po.add_argument('path', help='path to csv file, mandatory argument') 00119 00120 op = ap.add_argument_group("operational") 00121 op.add_argument('-l','--loglevel', help='logging level INFO,WARN,DEBUG... Default %(default)s ') 00122 op.add_argument( "--logpath", help="Path to write log file to. Default %(default)s ") 00123 op.add_argument( "--logformat", help="Used by logger. Default %(default)s ") 00124 op.add_argument('-n','--dummy', help='Dummy run. Default %(default)s ', action="store_true" ) 00125 00126 ap.add_argument( '--dbconf', help='Section of ~/.my.cnf file. Default %(default)s when using runtimestart/runtimeend must be cascade such as tmp_offline_db:offline_db ') 00127 00128 nu = ap.add_argument_group("not usually changed") 00129 nu.add_argument( '--timeformat', help='format for times. Default %(default)s ') 00130 nu.add_argument( '--task', help='set non zero for testing non-default algorithms. Default %(default)s ', type=int ) 00131 nu.add_argument( '--dbno', help='DB number in cascade. Default %(default)s ', type=int ) 00132 00133 cx = ap.add_argument_group("context") 00134 cx.add_argument( '--site', help='string to be converted into enum integer with Site.FromString. Default %(default)s ', choices=self.sites.split("|")) 00135 cx.add_argument( '--subsite', help='string to be converted into enum integer with DetectorId.FromString. Default %(default)s ', choices=self.subsites.split("|")) 00136 cx.add_argument( '--simflag', help='string to be converted into enum integer with SimFlag.FromString. Default %(default)s ', choices=self.simflags.split("|")) 00137 cx.add_argument( '--ctxptn', help='Regular expression string to be matched against csv basenames. Often includes `site`, `subsite` and `simflag`. Default %(default)s ') 00138 cx.add_argument('-M','--nomatch', help='Regular expression string is not forced to match csv basenames. Default %(default)s ', action="store_true" ) 00139 00140 ts = ap.add_mutually_exclusive_group() 00141 ts.add_argument( '--timestart', help='contextrange start time in UTC. Default %(default)s corresponds to TimeStamp.GetBOT ', action=TimeAction ) 00142 ts.add_argument( '--runtimestart', help='contextrange start time in UTC corresponding to timestart for run number passed. Default %(default)s corresponds to use timestart ', type=int ) 00143 00144 te = ap.add_mutually_exclusive_group() 00145 te.add_argument( '--timeend', help='contextrange end time in UTC. Default %(default)s corresponds to TimeStamp.GetEOT ', action=TimeAction ) 00146 te.add_argument( '--runtimeend', help='contextrange end time in UTC corresponding to timeend of run number passed. Default %(default)s corresponds to use timeend ', type=int ) 00147 00148 return ap 00149 00150 def __init__(self, *args, **kwa ): 00151 """ 00152 :param kwa: ctor keyword arguments override class defaults ``DbiCnf.defaults`` updating into `self` 00153 """ 00154 dict.__init__(self) 00155 self.update( self.defaults ) 00156 self.update( kwa ) 00157 00158 def __call__(self, args_=None ): 00159 """ 00160 :param args_: default of None parses system arguments, define for interactive testing 00161 00162 Performs parse steps: 00163 00164 #. populates argparser defaults from `self` 00165 #. parses `args_` or sys.argv 00166 #. parses args.path obtaining ``pathdict`` 00167 #. updates `self` from args.path parsed tokens 00168 #. updates `self` raw initial parsed values 00169 00170 First positional argument is interpreted as a path and is pattern matched against 00171 a regular expression. 00172 """ 00173 ap = self.argparser_() 00174 ap.set_defaults( **self ) 00175 00176 if args_ and type(args_) == str: 00177 args_ = shlex.split(args_) ## for interactive testing 00178 00179 args = ap.parse_args(args=args_) 00180 00181 logging.basicConfig( level=getattr(logging,args.loglevel.upper()) ) 00182 log.info( "initial args %r " % args ) 00183 00184 pathdict = self.parse_path( args.path , args.ctxptn, args.nomatch ) 00185 00186 self.update( pathdict ) 00187 00188 va = vars(args) 00189 for k,v in va.items(): 00190 if not v:continue ## None does not trump pre-existers 00191 self[k] = v 00192 00193 if self['dbconf']: 00194 os.environ['DBCONF'] = self['dbconf'] 00195 log.warn("DBCONF set to %s " % self['dbconf'] ) 00196 00197 self.irl = self._irunlookup() 00198 00199 ## state for debug only 00200 self['pathdict'] = pathdict 00201 self.args = args 00202 00203 def __repr__(self): 00204 return self.__class__.__name__ + "\n" + pformat(dict(self)) 00205 00206 def _irunlookup(self): 00207 """ 00208 **CAUTION** this takes the runtimestart/runtimeend directly from GDaqRunInfo lookup ... 00209 so they must be UTC ... if that is not the case, that needs to be fixed 00210 """ 00211 rl = filter(None, (self['runtimestart'], self['runtimeend'])) 00212 if rl: 00213 from DybDbi import IRunLookup 00214 log.debug("_irunlookup %r " % rl ) 00215 irl = IRunLookup(*map(int,rl)) 00216 log.debug("IRunLookup %r gives ... \n %s " % ( rl, pformat(irl) )) 00217 else: 00218 irl = {} 00219 return irl 00220 00221 def parse_path(self, path_ , ptn , nomatch ): 00222 """ 00223 Extract context metadata from the path using the regular expression string 00224 supplied. 00225 00226 :param path: path to .csv source file 00227 :param ptn: regular expression string that can contain tokens for any config parameters 00228 00229 :rtype dict: dict of strings extracted from the path 00230 """ 00231 path = os.path.expandvars(os.path.expanduser(path_)) ## expand envvars or twiddles 00232 name = os.path.basename( path ) 00233 ptn_ = re.compile(ptn) 00234 match = ptn_.match( name ) 00235 pathdict = {} 00236 if match: 00237 pathdict = match.groupdict() 00238 else: 00239 if nomatch: 00240 log.warning("did not match name %s with pattern %s " % ( name , ptn ) ) 00241 else: 00242 log.fatal("failed to match name %s with pattern %s " % ( name , ptn ) ) 00243 raise Exception 00244 log.debug( "pathdict %r " % pathdict ) 00245 return pathdict 00246 00247 00248 # for consistent structure 00249 path = property( lambda self:self['path'] ) 00250 dbno = property( lambda self:self['dbno'] ) 00251 dummy = property( lambda self:self['dummy'] ) 00252 00253 runtimestart = property(lambda self:int(self['runtimestart']) if self['runtimestart'] else None) 00254 runtimeend = property(lambda self:int(self['runtimeend']) if self['runtimeend'] else None) 00255 00256 def _timestart(self): 00257 """ 00258 """ 00259 from DybDbi import TimeStamp 00260 rts = self.runtimestart 00261 if rts: 00262 if not rts in self.irl: 00263 self._irunlookup() 00264 timestart = self.irl[rts].vrec.contextrange.timestart 00265 else: 00266 timestart = self['timestart'] 00267 timestart = TimeStamp.fromAssumedUTCDatetime( timestart ) if timestart else TimeStamp.GetBOT() 00268 return timestart 00269 timestart = property( _timestart, doc=_timestart.__doc__ ) 00270 00271 def _timeend(self): 00272 """ 00273 """ 00274 from DybDbi import TimeStamp 00275 rte = self.runtimeend 00276 if rte: 00277 if not rte in self.irl: 00278 self._irunlookup() 00279 timeend = self.irl[rte].vrec.contextrange.timeend 00280 else: 00281 timeend = self['timeend'] 00282 timeend = TimeStamp.fromAssumedUTCDatetime( timeend ) if timeend else TimeStamp.GetEOT() 00283 return timeend 00284 timeend = property( _timeend, doc=_timeend.__doc__ ) 00285 00286 # done longhand to avoid root hijacking --help 00287 def _site(self): 00288 """Convert string site into enum integer""" 00289 from DybDbi import Site 00290 return Site.FromString( self['site'] ) 00291 site = property( _site ) 00292 def _sitemask(self): 00293 """Convert string site into enum integer 00294 if multi-site masks are needed will have to revisit this 00295 """ 00296 from DybDbi import Site 00297 return Site.FromString( self['site'] ) 00298 sitemask = property( _sitemask, doc=_sitemask.__doc__ ) ## bit icky 00299 00300 def _subsite(self): 00301 """Convert string subsite/DetectorId into enum integer""" 00302 from DybDbi import DetectorId 00303 return DetectorId.FromString( self['subsite'] ) 00304 subsite = property( _subsite , doc=_subsite.__doc__) 00305 00306 def _simflag(self): 00307 """Convert string simflag into enum integer""" 00308 from DybDbi import SimFlag 00309 return SimFlag.FromString( self['simflag'] ) 00310 simflag = property( _simflag, doc=_simflag.__doc__ ) 00311 def _simmask(self): 00312 """Convert string simflag into enum integer 00313 (note the simflag is interpreted as the mask) 00314 """ 00315 from DybDbi import SimFlag 00316 return SimFlag.FromString( self['simflag'] ) 00317 simmask = property( _simmask, doc=_simmask.__doc__ ) 00318 00319 def _cr(self): 00320 """ 00321 Convert the strings into enum value, and datetimes into TimeStamps in 00322 order to create the ContextRange instance 00323 00324 :return: context range instance 00325 """ 00326 from DybDbi import ContextRange 00327 return ContextRange( self.sitemask, self.simmask, self.timestart, self.timeend ) 00328 cr = property( _cr , doc=_cr.__doc__ ) 00329 00330 00331 00332 00333 00334 00335 def writer(self, kls ): 00336 """ 00337 Create a pre-configured DybDbi writer based on 00338 arguments and source csv filename parsing and 00339 creates the corresponding DB table if it does not exist. 00340 00341 :param kls: DybDbi class, eg GCalibPmtHighGain 00342 00343 """ 00344 from DybDbi import TimeStamp 00345 00346 assert kls.__name__[0] == 'G', "DybDbi classes must start with 'G' unlike %s " % kls.__name__ 00347 name = kls.__name__[1:] 00348 assert self.dbno == 0 , "unexpected dbno %s " % self.dbno 00349 00350 log.info("creating DB tables for %s in dbno %s " % ( name , self.dbno )) 00351 kls().CreateDatabaseTables( self.dbno , name ) 00352 00353 wrt = kls.Wrt() 00354 wrt.ctx( 00355 contextrange=self.cr, 00356 dbno=self.dbno , 00357 versiondate=TimeStamp(0,0), 00358 subsite=self.subsite, 00359 ) 00360 return wrt 00361 00362 00363 def logging_(self, args): 00364 """ 00365 Hmm need some work ... 00366 """ 00367 loglevel = getattr(logging,args.loglevel.upper()) 00368 00369 sh = logging.StreamHandler() 00370 sh.setLevel(loglevel) 00371 fmtr = logging.Formatter(args.logformat) 00372 sh.setFormatter(fmtr) 00373 log.addHandler(sh) 00374 00375 if args.logpath: 00376 fh = logging.FileHandler(args.logpath,mode="w") 00377 fh.setFormatter(fmtr) 00378 fh.setLevel(loglevel) 00379 log.addHandler(fh) 00380 00381 00382 00383 00384 if __name__ == '__main__': 00385 pass 00386 cnf = DbiCnf() 00387 cnf() 00388 log.info( cnf ) 00389 00390