# Author: Noel Dawe <Noel.Dawe@cern.ch>
"""
This module provides the main GRL class and utility functions
"""
from . import info
import sys
import os
import copy
import urllib2
from pprint import pprint
from operator import sub, or_, and_, xor, itemgetter
from .sorteddict import SortedDict
import bisect
import datetime
import cStringIO
import re
import xml.etree.ElementTree as _ET
try:
import yaml
USE_YAML = True
except ImportError:
USE_YAML = False
__all__ = [
'clipped',
'diffed',
'ored',
'anded',
'xored',
'LumiblockRange',
'GRL',
]
XML_WHITESPACE = re.compile(
'>\n\s+([^<>\s].*?)\n\s+</', re.DOTALL)
LBRANGE_ORDER = re.compile(
'<LBRange End="(?P<end>\d+)" Start="(?P<start>\d+)"/>')
def fix_attr_order(match):
return '<LBRange Start="{0}" End="{1}"/>'.format(
match.group('start'),
match.group('end'))
def pretty_xml(doc):
"""
Fix-up minidom's toprettyxml() output
http://stackoverflow.com/questions/749796/pretty-printing-xml-in-python
http://stackoverflow.com/a/3367423/1002176
"""
# remove extra whitespace created by minidom
xml = XML_WHITESPACE.sub('>\g<1></', doc.toprettyxml(indent=' '))
# preserve order of Start and End attributes (although the order of XML
# node attributes is not important and should not be depended on)
xml = LBRANGE_ORDER.sub(fix_attr_order, xml)
return xml[len('<?xml version="1.0" ?>') + 1:]
def get_tree_builder():
if info.USE_LXML:
import lxml.etree as ET
else:
import xml.etree.ElementTree as ET
class _MetadataTreeBuilder(ET.TreeBuilder):
"""
Implements doctype() as required to avoid deprecation warnings with
>=python-2.7.
"""
def doctype(self, name, pubid, system):
pass
return _MetadataTreeBuilder()
[docs]def clipped(grl, startrun=None, startlb=None, endrun=None, endlb=None):
"""
Return a clipped GRL between startrun, startlb and
endrun, endlb (inclusive).
*grl*: GRL
*startrun*: [ int | None ]
*startlb*: [ int | None ]
*endrun*: [ int | None ]
*endlb*: [ int | None ]
"""
grl_copy = copy.deepcopy(grl)
grl_copy.clip(startrun=startrun, startlb=startlb,
endrun=endrun, endlb=endlb)
return grl_copy
[docs]def diffed(*args):
"""
Return the difference of multiple GRLs: (((A-B)-C)-D) ...)
*args*: tuple of GRLs
"""
return reduce(sub, args)
[docs]def ored(*args):
"""
Return the OR of multiple GRLs: A | B | C ...
*args*: tuple of GRLs
"""
return reduce(or_, args)
[docs]def anded(*args):
"""
Return the AND of multiple GRLs: A & B & C ...
*args*: tuple of GRLs
"""
return reduce(and_, args)
[docs]def xored(*args):
"""
Return the XOR of multiple GRLs: A ^ B ^ C ...
*args*: tuple of GRLs
"""
return reduce(xor, args)
[docs]class LumiblockRange(tuple):
"""
A 2-tuple consisting of the lower and upper
bounds of a lumiblock range.
"""
def __new__(cls, *args):
"""
*args*: [ tuple | list ]
2-tuple/list of lumiblock numbers in ascending order
"""
if len(args) == 1 and isinstance(args[0], tuple):
args = args[0]
if len(args) != 2:
raise ValueError(
"lbrange must contain exactly 2 elements: {0}".format((args,)))
for lumiblock in args:
if not isinstance(lumiblock, (int, long)):
raise TypeError("lbrange must contain integers or longs only")
if args[0] > args[1]:
raise ValueError("lbrange in wrong order: {0}".format((args,)))
return super(LumiblockRange, cls).__new__(cls, args)
def __contains__(self, lbn):
"""
Determine whether a lumiblock is contained by this
lumiblock range
*lbn*: int
"""
return self[0] <= lbn <= self[1]
def __cmp__(self, other):
"""
Determine whether this lumiblock range should be placed
to the right or left of another lumiblock number or range
*other*: LumiblockRange
"""
if isinstance(other, (int, long)):
if other in self:
return 0
return cmp(self[0], other)
return super(LumiblockRange, self).__cmp__(other)
[docs] def intersects(self, lbrange):
"""
Determine if self intersects with another lumiblock range
*lbrange*: LumiblockRange
"""
if lbrange[0] in self or lbrange[1] in self:
return True
if self[0] in lbrange or self[1] in lbrange:
return True
return False
[docs] def as_set(self):
"""
Convert self to set of lumiblocks
"""
return set(range(self[0], self[1] + 1))
[docs]class GRL(object):
"""
The main GRL class holds a python dictionary
mapping runs to a list of lumiblock ranges (2-tuples)
"""
formats = [
'xml',
'yml',
'txt',
'py',
'cut'
]
ROOT_PATTERN = re.compile(r'\.root[^ \t\n\r\f\v:/]*:/')
def __init__(self, grl=None, from_string=False, format=None):
"""
*grl*: [ dict | str | None ]
*from_string*: bool
If True, interpret grl as xml string and not filename
"""
self.name = 'GRL'
self.version = '1.0'
self.metadata = []
self.__grl = SortedDict()
if not grl:
return
if isinstance(grl, dict):
self.from_dict(grl)
return
if isinstance(grl, basestring) and from_string:
self.from_string(grl)
return
elif from_string:
raise TypeError("grl is non-string type '{0}' while "
"using from_string=True".format(type(grl)))
elif isinstance(grl, (basestring, file)):
filename = grl
if isinstance(grl, basestring):
# is grl a URL?
if re.match('^http(s)?://', grl) is not None:
grl = urllib2.urlopen(grl)
# is grl a ROOT file path?
elif re.search(self.ROOT_PATTERN, grl):
# one place where goodruns requires ROOT
try:
import ROOT
except ImportError:
raise ImportError(
"Specified a GRL in a ROOT file "
"but cannot import ROOT. Is ROOT installed "
"with PyROOT enabled?")
cwd = ROOT.gDirectory
filename, _, path = grl.rpartition(':/')
root_file = ROOT.TFile.Open(filename)
if not root_file:
raise IOError(
"Could not open ROOT file: {0}".format(filename))
grl = root_file.Get(path)
if not grl:
raise ValueError(
"Path {0} does not exist in ROOT file {1}".format(
path, filename))
if not isinstance(grl, ROOT.TObjString):
raise TypeError(
"Object at {0} is not a ROOT.TObjString".format(
path))
self.from_string(str(grl.GetString()))
root_file.Close()
# return to previous directory
cwd.cd()
for run in self.iterruns():
self.__grl[run].sort()
self.__optimize(run)
return
elif isinstance(grl, file):
filename = grl.name
name, ext = os.path.splitext(filename)
if filename == '<stdin>' or ext == '.xml' or format == 'xml':
if info.USE_LXML:
import lxml.etree as ET
else:
import xml.etree.ElementTree as ET
if sys.version_info >= (2, 7):
tree = ET.parse(
grl, parser=ET.XMLParser(
target=get_tree_builder()))
else:
tree = ET.parse(grl)
self.from_xml(tree)
elif ext == '.yml' or format == 'yml':
if USE_YAML:
if isinstance(grl, file):
self.from_dict(yaml.load(grl))
else:
with open(grl) as grl_file:
self.from_dict(yaml.load(grl_file))
else:
raise ImportError("PyYAML module not found")
else:
raise ValueError(
"{0} does not have valid GRL extension: {1}".format(
filename, ext))
for run in self.iterruns():
self.__grl[run].sort()
self.__optimize(run)
return
raise TypeError(
"Unable to initialize GRL from a '{0}'".format(type(grl)))
[docs] def from_string(self, string):
"""
Insert runs and lumiblocks from XML string
*string*: str
"""
if info.USE_LXML:
import lxml.etree as ET
else:
import xml.etree.ElementTree as ET
if sys.version_info >= (2, 7):
tree = ET.XML(string, parser=ET.XMLParser(
target=get_tree_builder()))
else:
tree = ET.fromstring(string)
self.from_xml(tree)
[docs] def from_xml(self, tree):
"""
Insert runs and lumiblocks from XML
*tree*: ElementTree
"""
name = tree.find('NamedLumiRange/Name')
if name is not None:
self.name = name.text
version = tree.find('NamedLumiRange/Version')
if version is not None:
self.version = version.text
metadata = tree.findall('NamedLumiRange/Metadata')
if metadata is not None:
self.metadata = metadata
lbcols = tree.findall(
'NamedLumiRange/LumiBlockCollection')
if lbcols is None:
return
for lbcol in lbcols:
run = int(lbcol.find('Run').text)
lbs = lbcol.findall('LBRange')
for lumiblock in lbs:
self.insert(run,
LumiblockRange(int(lumiblock.attrib['Start']),
int(lumiblock.attrib['End'])))
[docs] def from_dict(self, d):
"""
Convert dict to GRL
*d*: dict
"""
o = {}
for run, lbranges in d.items():
o[run] = [LumiblockRange(*a) for a in lbranges]
self.__grl.update(o)
[docs] def to_dict(self):
"""
Convert self to dict
"""
o = {}
for run, lbranges in self.items():
o[run] = [(a[0], a[1]) for a in lbranges]
return o
def __merge_metadata(self, other=None):
# drop metadata for now
self.metadata = []
def __copy__(self):
return copy.deepcopy(self)
def __nonzero__(self):
return bool(self.__grl)
def __repr__(self):
return self.__str__()
def __str__(self):
output = ''
runs_end = len(self.__grl) - 1
for i, run in enumerate(self.iterruns()):
lbranges = self.__grl[run]
maxlength = max([len(str(lbrange[0])) for lbrange in lbranges])
output += '-' * 15 + '\n'
output += 'RUN: %i\n' % run
output += 'LUMIBLOCKS:\n'
lbranges_end = len(lbranges) - 1
for j, lbrange in enumerate(lbranges):
if lbrange[0] == lbrange[1]:
output += (' %%-%ds' % maxlength) % lbrange[0]
else:
output += (' %%-%ds' % maxlength + ' - %i') % lbrange
if i != runs_end or j != lbranges_end:
output += '\n'
return output
def __getitem__(self, run):
"""
Return list of lumiblock ranges for a run
*run*: int
"""
return self.__grl[run]
def __delitem__(self, run):
"""
Remove run and associated lumiblock ranges from GRL
*run*: int
"""
del self.__grl[run]
def __contains__(self, runlb):
"""
Returns True if this GRL contains a run and lumiblock
*runlb*: tuple
2-tuple of ints containing run number and lumiblock number
"""
run, lbn = runlb
if run in self.__grl:
lbranges = self.__grl[run]
# Locate the LumiblockRange containing lbn
i = bisect.bisect_left(lbranges, lbn)
if (i != len(lbranges)) and (lbn in lbranges[i]):
return True
return False
def __iter__(self):
"""
Iterate over runs in GRL
"""
return self.iterruns()
[docs] def items(self):
"""
Iterate over (run, lbranges) in GRL
"""
return self.__grl.items()
[docs] def iterlbranges(self):
"""
Iterate over (run, lbrange) in GRL
"""
for run, lbranges in self.__grl.items():
for lbrange in lbranges:
yield (run, lbrange)
[docs] def iterruns(self):
"""
Iterate over runs in GRL
"""
for run in self.__grl.iterkeys():
yield run
[docs] def runs(self):
"""
Return list of runs in GRL
"""
return self.__grl.keys()
[docs] def has_run(self, run):
"""
Returns True if run is in GRL, else False
*run*: int
"""
return run in self.__grl
[docs] def insert(self, run, lbrange):
"""
Insert a lumiblock range into a run
*run*: int
*lbrange*: [ LumiblockRange | tuple ]
"""
if not isinstance(run, int):
raise TypeError('run must be an integer')
if not isinstance(lbrange, LumiblockRange):
lbrange = LumiblockRange(*lbrange)
try:
lbranges = self.__grl[run]
i = bisect.bisect(lbranges, lbrange)
lbranges.insert(i, lbrange)
self.__optimize(run)
except KeyError:
self.__grl[run] = [lbrange]
[docs] def remove(self, run, lbrange):
"""
Remove a lumiblock range from a run
*run*: int
*lbrange*: LumiblockRange
"""
if run in self.__grl:
if not isinstance(lbrange, LumiblockRange):
lbrange = LumiblockRange(*lbrange)
lbranges = self.__grl[run]
for mylbrange in lbranges[:]:
if lbrange[1] < mylbrange[0]:
continue
if lbrange == mylbrange:
lbranges.remove(mylbrange)
break
elif lbrange[0] > mylbrange[0] and lbrange[1] < mylbrange[1]:
# embedded: must split
left_lbrange = LumiblockRange(mylbrange[0],
lbrange[0] - 1)
right_lbrange = LumiblockRange(lbrange[1] + 1,
mylbrange[1])
index = lbranges.index(mylbrange)
lbranges[index] = left_lbrange
lbranges.insert(index + 1, right_lbrange)
break
elif lbrange.intersects(mylbrange):
diff = mylbrange.as_set().difference(lbrange.as_set())
if not diff: # empty set
lbranges.remove(mylbrange)
if len(lbranges) == 0:
break
continue
newlbrange = LumiblockRange(min(diff), max(diff))
lbranges[lbranges.index(mylbrange)] = newlbrange
elif mylbrange[0] > lbrange[1]:
break
if len(lbranges) == 0:
del self.__grl[run]
[docs] def clip(self, startrun=None, startlb=None, endrun=None, endlb=None):
"""
Clip the GRL between startrun, startlb and endrun, endlb (inclusive)
*startrun*: [ int | None ]
*startlb*: [ int | None ]
*endrun*: [ int | None ]
*endlb*: [ int | None ]
"""
for run in self.runs():
if startrun is not None:
if run < startrun:
del self.__grl[run]
elif run == startrun:
if startlb is not None:
lbranges = self.__grl[run][:]
for lbrange in lbranges:
if lbrange[1] < startlb:
self.__grl[run].remove(lbrange)
elif startlb >= lbrange[0] and \
startlb <= lbrange[1]:
self.__grl[run][
self.__grl[run].index(lbrange)
] = LumiblockRange(startlb, lbrange[1])
if len(self.__grl[run]) == 0:
del self[run]
if endrun is not None:
if run > endrun:
del self.__grl[run]
elif run == endrun:
if endlb is not None:
lbranges = self.__grl[run][:]
for lbrange in lbranges:
if lbrange[0] > endlb:
self[run].remove(lbrange)
elif endlb >= lbrange[0] and endlb <= lbrange[1]:
self[run][
self[run].index(lbrange)
] = LumiblockRange(lbrange[0], endlb)
if len(self[run]) == 0:
del self[run]
def __optimize(self, run):
"""
Merge lumiblock ranges
*run*: int
"""
lbranges = self.__grl[run]
if len(lbranges) == 0:
del self.__grl[run]
return
first = 0
last = len(lbranges) - 1
while first != last:
_next = first + 1
merged = False
while _next <= last:
if lbranges[first][1] >= lbranges[_next][1]:
for index in xrange(first + 1, _next + 1):
lbranges.pop(_next)
merged = True
break
elif lbranges[first][1] + 1 >= lbranges[_next][0]:
lbranges[first] = LumiblockRange(lbranges[first][0],
lbranges[_next][1])
for index in xrange(first + 1, _next + 1):
lbranges.pop(_next)
merged = True
break
_next += 1
last = len(lbranges) - 1
if not merged:
first += 1
def __eq__(self, other):
return self.__grl == other.__grl
def __ne__(self, other):
return not self.__eq__(other)
def __add__(self, other):
grlcopy = copy.deepcopy(self)
grlcopy += other
return grlcopy
def __iadd__(self, other):
if isinstance(other, basestring):
other = GRL(other, from_string=True)
for run, lbrange in other.iterlbranges():
self.insert(run, LumiblockRange(lbrange))
self.__merge_metadata(other)
return self
def __sub__(self, other):
grlcopy = copy.deepcopy(self)
grlcopy -= other
return grlcopy
def __isub__(self, other):
if isinstance(other, basestring):
other = GRL(other, from_string=True)
for run, lbrange in other.iterlbranges():
self.remove(run, lbrange)
self.__merge_metadata(other)
return self
def __and__(self, other):
""" Create a new GRL that is the overlap between two GRLs
"""
return self - (self - other)
def __iand__(self, other):
""" Update this GRL by only including the overlap with another GRL
"""
self -= (self - other)
return self
def __or__(self, other):
""" Merge two GRLs
"""
return self + other
def __ior__(self, other):
""" Update this GRL by adding the logical OR with another GRL
"""
self += other
return self
def __xor__(self, other):
""" Exclusive OR (XOR) between two GRLs
"""
return (self | other) - (self & other)
def __ixor__(self, other):
""" Update this GRL by removing overlap with another GRL
"""
grlcopy = copy.deepcopy(self)
self |= other
self -= (grlcopy & other)
return self
[docs] def cut(self, runname='RunNumber', lbname='lbn'):
"""
Convert this GRL into a TCut expression.
*runname*: str
*lbname*: str
"""
cut = ''
for run in self.iterruns():
lbcut = ''
for lbrange in self[run]:
newcut = (lbname + '>={0:d}&&' +
lbname + '<={1:d}').format(*lbrange)
if lbcut:
lbcut = '({0})|({1})'.format(lbcut, newcut)
else:
lbcut = newcut
newcut = '({0}=={1:d})&&({2})'.format(runname, run, lbcut)
if cut:
cut = '({0})|({1})'.format(cut, newcut)
else:
cut = newcut
return cut
[docs] def str(self, format='xml'):
"""
Return string repr of self in the specified format
*format*: str
"""
str_io = cStringIO.StringIO()
self.write(filehandle=str_io, format=format)
return str_io.getvalue()
[docs] def save(self, name):
"""
Save GRL to file by name. Determine format from
extension.
*name*: str
"""
# is name a ROOT file path?
if re.search(self.ROOT_PATTERN, name):
# one place where goodruns requires ROOT
try:
import ROOT
ROOT.PyConfig.IgnoreCommandLineOptions = True
except ImportError:
raise ImportError(
"Attempting to save GRL in ROOT file but cannot import "
"ROOT. Are ROOT and PyROOT installed?")
cwd = ROOT.gDirectory
filename, _, path = name.rpartition(':/')
root_file = ROOT.TFile.Open(filename, 'UPDATE')
if not root_file:
raise IOError(
'Could not open ROOT file: {0}'.format(filename))
head, tail = os.path.split(os.path.normpath(path))
if head and not root_file.cd(head):
raise ValueError(
'Path {0} does not exist in file {1}'.format(
head, filename))
xml_string = ROOT.TObjString(self.str())
xml_string.Write(tail)
root_file.Close()
# return to previous directory
cwd.cd()
else:
_, ext = os.path.splitext(name)
# ignore period
ext = ext[1:]
if ext not in GRL.formats:
raise ValueError(
"{0} does not have a valid GRL extension".format(name))
with open(name, 'w') as filehandle:
self.write(filehandle, format=ext)
[docs] def write(self, filehandle, format='xml'):
"""
Write the GRL in the specified format to the file object.
*filehandle*: file
*format*: str
"""
if format == 'xml':
if info.USE_LXML:
import lxml.etree as ET
else:
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
root = ET.Element('LumiRangeCollection')
subroot = ET.SubElement(root, 'NamedLumiRange')
name = ET.SubElement(subroot, 'Name')
name.text = self.name
version = ET.SubElement(subroot, 'Version')
version.text = self.version
for meta in self.metadata:
subroot.append(meta)
for run in self.iterruns():
lumiblocks = self.__grl[run]
lbcol = ET.SubElement(subroot, 'LumiBlockCollection')
runelement = ET.SubElement(lbcol, 'Run')
runelement.text = str(run)
for lumiblock in lumiblocks:
lbrange = ET.SubElement(lbcol, 'LBRange')
lbrange.set('Start', str(lumiblock[0]))
lbrange.set('End', str(lumiblock[1]))
date = datetime.datetime.now().strftime("%Y-%m-%d at %H:%M:%S")
meta = (
'''<!DOCTYPE LumiRangeCollection SYSTEM '''
'''"http://atlas-runquery.cern.ch/LumiRangeCollection.dtd">\n'''
'''<!-- This document was created by goodruns: '''
'''http://pypi.python.org/pypi/goodruns/ on {0} -->\n'''.format(date))
filehandle.write('<?xml version="1.0"?>\n')
filehandle.write(meta)
tree = ET.ElementTree(root)
if info.USE_LXML:
tree.write(filehandle, pretty_print=True)
else:
xml = minidom.parseString(ET.tostring(tree.getroot(), 'utf-8'))
filehandle.write(pretty_xml(xml))
elif format in ('yml', 'yaml'):
if not USE_YAML:
raise RuntimeError(
"YAML is not installed (pip install pyyaml)")
filehandle.write(yaml.dump(self.to_dict()))
elif format == 'txt':
filehandle.write(str(self) + '\n')
elif format in ('py', 'python'):
filehandle.write("grl = ")
pprint(self.__grl, stream=filehandle)
elif format == 'cut':
filehandle.write(self.cut() + '\n')
else:
raise ValueError("Unrecognized grl format")