Package madgraph :: Package various :: Module histograms
[hide private]
[frames] | no frames]

Source Code for Module madgraph.various.histograms

   1  #! /usr/bin/env python 
   2  ################################################################################ 
   3  # 
   4  # Copyright (c) 2010 The MadGraph5_aMC@NLO Development team and Contributors 
   5  # 
   6  # This file is a part of the MadGraph5_aMC@NLO project, an application which  
   7  # automatically generates Feynman diagrams and matrix elements for arbitrary 
   8  # high-energy processes in the Standard Model and beyond. 
   9  # 
  10  # It is subject to the MadGraph5_aMC@NLO license which should accompany this  
  11  # distribution. 
  12  # 
  13  # For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch 
  14  # 
  15  ################################################################################ 
  16  """Module for the handling of histograms, including Monte-Carlo error per bin 
  17  and scale/PDF uncertainties.""" 
  18   
  19  from __future__ import division 
  20   
  21  import array 
  22  import copy 
  23  import fractions 
  24  import itertools 
  25  import logging 
  26  import math 
  27  import os 
  28  import re 
  29  import sys 
  30  import StringIO 
  31  import subprocess 
  32  import xml.dom.minidom as minidom 
  33  from xml.parsers.expat import ExpatError as XMLParsingError 
  34   
  35  root_path = os.path.split(os.path.dirname(os.path.realpath( __file__ )))[0] 
  36  sys.path.append(os.path.join(root_path))  
  37  sys.path.append(os.path.join(root_path,os.pardir)) 
  38  try: 
  39      # import from madgraph directory 
  40      import madgraph.various.misc as misc 
  41      from madgraph import MadGraph5Error 
  42      logger = logging.getLogger("madgraph.various.histograms") 
  43   
  44  except ImportError, error: 
  45      # import from madevent directory 
  46      import internal.misc as misc     
  47      from internal import MadGraph5Error 
  48      logger = logging.getLogger("internal.histograms") 
49 50 # I copy the Physics object list here so as not to add a whole dependency to 51 # base_objects which is annoying when using this histograms module from the 52 # bin/internal location of a process output (i.e. outside an MG5_aMC env.) 53 54 #=============================================================================== 55 # PhysicsObjectList 56 #=============================================================================== 57 -class histograms_PhysicsObjectList(list):
58 """A class to store lists of physics object.""" 59
60 - class PhysicsObjectListError(Exception):
61 """Exception raised if an error occurs in the definition 62 or execution of a physics object list.""" 63 pass
64
65 - def __init__(self, init_list=None):
66 """Creates a new particle list object. If a list of physics 67 object is given, add them.""" 68 69 list.__init__(self) 70 71 if init_list is not None: 72 for object in init_list: 73 self.append(object)
74
75 - def append(self, object):
76 """Appends an element, but test if valid before.""" 77 78 assert self.is_valid_element(object), \ 79 "Object %s is not a valid object for the current list" % repr(object) 80 81 list.append(self, object)
82 83
84 - def is_valid_element(self, obj):
85 """Test if object obj is a valid element for the list.""" 86 return True
87
88 - def __str__(self):
89 """String representation of the physics object list object. 90 Outputs valid Python with improved format.""" 91 92 mystr = '[' 93 94 for obj in self: 95 mystr = mystr + str(obj) + ',\n' 96 97 mystr = mystr.rstrip(',\n') 98 99 return mystr + ']'
100 #===============================================================================
101 102 -class Bin(object):
103 """A class to store Bin related features and function. 104 """ 105
106 - def __init__(self, boundaries=(0.0,0.0), wgts=None, n_entries = 0):
107 """ Initializes an empty bin, necessarily with boundaries. """ 108 109 self.boundaries = boundaries 110 self.n_entries = n_entries 111 if not wgts: 112 self.wgts = {'central':0.0} 113 else: 114 self.wgts = wgts
115
116 - def __setattr__(self, name, value):
117 if name=='boundaries': 118 if not isinstance(value, tuple): 119 raise MadGraph5Error, "Argument '%s' for bin property "+\ 120 "'boundaries' must be a tuple."%str(value) 121 else: 122 for coordinate in value: 123 if isinstance(coordinate, tuple): 124 for dim in coordinate: 125 if not isinstance(dim, float): 126 raise MadGraph5Error, "Coordinate '%s' of the bin"+\ 127 " boundary '%s' must be a float."%str(dim,value) 128 elif not isinstance(coordinate, float): 129 raise MadGraph5Error, "Element '%s' of the bin boundaries"+\ 130 " specified must be a float."%str(bound) 131 elif name=='wgts': 132 if not isinstance(value, dict): 133 raise MadGraph5Error, "Argument '%s' for bin uncertainty "+\ 134 "'wgts' must be a dictionary."%str(value) 135 for val in value.values(): 136 if not isinstance(val,float): 137 raise MadGraph5Error, "The bin weight value '%s' is not a "+\ 138 "float."%str(val) 139 140 super(Bin, self).__setattr__(name,value)
141
142 - def get_weight(self, key='central'):
143 """ Accesses a specific weight from this bin.""" 144 try: 145 return self.wgts[key] 146 except KeyError: 147 raise MadGraph5Error, "Weight with ID '%s' is not defined for"+\ 148 " this bin"%str(key)
149
150 - def set_weight(self, wgt, key='central'):
151 """ Accesses a specific weight from this bin.""" 152 153 # an assert is used here in this intensive function, so as to avoid 154 # slow-down when not in debug mode. 155 assert(isinstance(wgt, float)) 156 157 try: 158 self.wgts[key] = wgt 159 except KeyError: 160 raise MadGraph5Error, "Weight with ID '%s' is not defined for"+\ 161 " this bin"%str(key)
162
163 - def addEvent(self, weights = 1.0):
164 """ Add an event to this bin. """ 165 166 167 if isinstance(weights, float): 168 weights = {'central': weights} 169 170 for key in weights: 171 if key == 'stat_error': 172 continue 173 try: 174 self.wgts[key] += weights[key] 175 except KeyError: 176 raise MadGraph5Error('The event added defines the weight '+ 177 '%s which was not '%key+'registered in this histogram.') 178 179 self.n_entries += 1
180 181 #if 'stat_error' not in weights and 'central' in w: 182 # self.wgts['stat_error'] = self.wgts['central']/math.sqrt(float(self.n_entries)) 183 #else: 184 # self.wgts['stat_error'] = math.sqrt( self.wgts['stat_error']**2 + 185 # weights['stat_error']**2 ) 186
187 - def nice_string(self, order=None, short=True):
188 """ Nice representation of this Bin. 189 One can order the weight according to the argument if provided.""" 190 191 res = ["Bin boundaries : %s"%str(self.boundaries)] 192 if not short: 193 res.append("Bin weights :") 194 if order is None: 195 label_list = self.wgts.keys() 196 else: 197 label_list = order 198 199 for label in label_list: 200 try: 201 res.append(" -> '%s' : %4.3e"%(str(label),self.wgts[label])) 202 except KeyError: 203 pass 204 else: 205 res.append("Central weight : %4.3e"%self.get_weight()) 206 207 return '\n'.join(res)
208
209 - def alter_weights(self, func):
210 """ Apply a given function to all bin weights.""" 211 self.wgts = func(self.wgts)
212 213 @classmethod
214 - def combine(cls, binA, binB, func):
215 """ Function to combine two bins. The 'func' is such that it takes 216 two weight dictionaries and merge them into one.""" 217 218 res_bin = cls() 219 if binA.boundaries != binB.boundaries: 220 raise MadGraph5Error, 'The two bins to combine have'+\ 221 ' different boundaries, %s!=%s.'%(str(binA.boundaries),str(binB.boundaries)) 222 res_bin.boundaries = binA.boundaries 223 224 try: 225 res_bin.wgts = func(binA.wgts, binB.wgts) 226 except Exception as e: 227 raise MadGraph5Error, "When combining two bins, the provided"+\ 228 " function '%s' triggered the following error:\n\"%s\"\n"%\ 229 (func.__name__,str(e))+" when combining the following two bins:\n"+\ 230 binA.nice_string(short=False)+"\n and \n"+binB.nice_string(short=False) 231 232 return res_bin
233
234 -class BinList(histograms_PhysicsObjectList):
235 """ A class implementing features related to a list of Bins. """ 236
237 - def __init__(self, list = [], bin_range = None, 238 weight_labels = None):
239 """ Initialize a list of Bins. It is possible to define the range 240 as a list of three floats: [min_x, max_x, bin_width]""" 241 242 self.weight_labels = weight_labels 243 if bin_range: 244 # Set the default weight_labels to something meaningful 245 if not self.weight_labels: 246 self.weight_labels = ['central', 'stat_error'] 247 if len(bin_range)!=3 or any(not isinstance(f, float) for f in bin_range): 248 raise MadGraph5Error, "The range argument to build a BinList"+\ 249 " must be a list of exactly three floats." 250 current = bin_range[0] 251 while current < bin_range[1]: 252 self.append(Bin(boundaries = 253 (current, min(current+bin_range[2],bin_range[1])), 254 wgts = dict((wgt,0.0) for wgt in self.weight_labels))) 255 current += bin_range[2] 256 else: 257 super(BinList, self).__init__(list)
258
259 - def is_valid_element(self, obj):
260 """Test whether specified object is of the right type for this list.""" 261 262 return isinstance(obj, Bin)
263
264 - def __setattr__(self, name, value):
265 if name=='weight_labels': 266 if not value is None and not isinstance(value, list): 267 raise MadGraph5Error, "Argument '%s' for BinList property '%s'"\ 268 %(str(value),name)+' must be a list.' 269 elif not value is None: 270 for label in value: 271 if all((not isinstance(label,cls)) for cls in \ 272 [str, int, float, tuple]): 273 raise MadGraph5Error, "Element '%s' of the BinList property '%s'"\ 274 %(str(value),name)+' must be a string, an '+\ 275 'integer, a float or a tuple of float.' 276 if isinstance(label, tuple): 277 if len(label)>=1: 278 if not isinstance(label[0], (float, str)): 279 raise MadGraph5Error, "Argument "+\ 280 "'%s' for BinList property '%s'"%(str(value),name)+\ 281 ' can be a tuple, but its first element must be a float or string.' 282 for elem in label[1:]: 283 if not isinstance(elem, (float,int,str)): 284 raise MadGraph5Error, "Argument "+\ 285 "'%s' for BinList property '%s'"%(str(value),name)+\ 286 ' can be a tuple, but its elements past the first one must be either floats, integers or strings' 287 288 289 super(BinList, self).__setattr__(name, value)
290
291 - def append(self, object):
292 """Appends an element, but test if valid before.""" 293 294 super(BinList,self).append(object) 295 # Assign the weight labels to those of the first bin added 296 if len(self)==1 and self.weight_labels is None: 297 self.weight_labels = object.wgts.keys()
298
299 - def nice_string(self, short=True):
300 """ Nice representation of this BinList.""" 301 302 res = ["Number of bin in the list : %d"%len(self)] 303 res.append("Registered weight labels : [%s]"%(', '.join([ 304 str(label) for label in self.weight_labels]))) 305 if not short: 306 for i, bin in enumerate(self): 307 res.append('Bin number %d :'%i) 308 res.append(bin.nice_string(order=self.weight_labels, short=short)) 309 310 return '\n'.join(res)
311
312 -class Histogram(object):
313 """A mother class for all specific implementations of Histogram conventions 314 """ 315 316 allowed_dimensions = None 317 allowed_types = [] 318 allowed_axis_modes = ['LOG','LIN'] 319
320 - def __init__(self, title = "NoName", n_dimensions = 2, type=None, 321 x_axis_mode = 'LIN', y_axis_mode = 'LOG', bins=None):
322 """ Initializes an empty histogram, possibly specifying 323 > a title 324 > a number of dimensions 325 > a bin content 326 """ 327 328 self.title = title 329 self.dimension = n_dimensions 330 if not bins: 331 self.bins = BinList([]) 332 else: 333 self.bins = bins 334 self.type = type 335 self.x_axis_mode = x_axis_mode 336 self.y_axis_mode = y_axis_mode
337
338 - def __setattr__(self, name, value):
339 if name=='title': 340 if not isinstance(value, str): 341 raise MadGraph5Error, "Argument '%s' for the histogram property "+\ 342 "'title' must be a string."%str(value) 343 elif name=='dimension': 344 if not isinstance(value, int): 345 raise MadGraph5Error, "Argument '%s' for histogram property "+\ 346 "'dimension' must be an integer."%str(value) 347 if self.allowed_dimensions and value not in self.allowed_dimensions: 348 raise MadGraph5Error, "%i-Dimensional histograms not supported "\ 349 %value+"by class '%s'. Supported dimensions are '%s'."\ 350 %(self.__class__.__name__,self.allowed_dimensions) 351 elif name=='bins': 352 if not isinstance(value, BinList): 353 raise MadGraph5Error, "Argument '%s' for histogram property "+\ 354 "'bins' must be a BinList."%str(value) 355 else: 356 for bin in value: 357 if not isinstance(bin, Bin): 358 raise MadGraph5Error, "Element '%s' of the "%str(bin)+\ 359 " histogram bin list specified must be a bin." 360 elif name=='type': 361 if not (value is None or value in self.allowed_types or 362 self.allowed_types==[]): 363 raise MadGraph5Error, "Argument '%s' for histogram"%str(value)+\ 364 " property 'type' must be a string in %s or None."\ 365 %([str(t) for t in self.allowed_types]) 366 elif name in ['x_axis_mode','y_axis_mode']: 367 if not value in self.allowed_axis_modes: 368 raise MadGraph5Error, "Attribute '%s' of the histogram"%str(name)+\ 369 " must be in [%s], ('%s' given)"%(str(self.allowed_axis_modes), 370 str(value)) 371 372 super(Histogram, self).__setattr__(name,value)
373
374 - def nice_string(self, short=True):
375 """ Nice representation of this histogram. """ 376 377 res = ['<%s> histogram:'%self.__class__.__name__] 378 res.append(' -> title : "%s"'%self.title) 379 res.append(' -> dimensions : %d'%self.dimension) 380 if not self.type is None: 381 res.append(' -> type : %s'%self.type) 382 else: 383 res.append(' -> type : None') 384 res.append(' -> (x, y)_axis : ( %s, %s)'%\ 385 (tuple([('Linear' if mode=='LIN' else 'Logarithmic') for mode in \ 386 [self.x_axis_mode, self.y_axis_mode]]))) 387 if short: 388 res.append(' -> n_bins : %s'%len(self.bins)) 389 res.append(' -> weight types : [ %s ]'% 390 (', '.join([str(label) for label in self.bins.weight_labels]) \ 391 if (not self.bins.weight_labels is None) else 'None')) 392 393 else: 394 res.append(' -> Bins content :') 395 res.append(self.bins.nice_string(short)) 396 397 return '\n'.join(res)
398
399 - def alter_weights(self, func):
400 """ Apply a given function to all bin weights.""" 401 402 for bin in self.bins: 403 bin.alter_weights(func)
404 405 @classmethod
406 - def combine(cls, histoA, histoB, func):
407 """ Function to combine two Histograms. The 'func' is such that it takes 408 two weight dictionaries and merge them into one.""" 409 410 res_histogram = copy.copy(histoA) 411 if histoA.title != histoB.title: 412 res_histogram.title = "[%s]__%s__[%s]"%(histoA.title,func.__name__, 413 histoB.title) 414 else: 415 res_histogram.title = histoA.title 416 417 res_histogram.bins = BinList([]) 418 if len(histoA.bins)!=len(histoB.bins): 419 raise MadGraph5Error, 'The two histograms to combine have a '+\ 420 'different number of bins, %d!=%d.'%(len(histoA.bins),len(histoB.bins)) 421 422 if histoA.dimension!=histoB.dimension: 423 raise MadGraph5Error, 'The two histograms to combine have a '+\ 424 'different dimensions, %d!=%d.'%(histoA.dimension,histoB.dimension) 425 res_histogram.dimension = histoA.dimension 426 427 for i, bin in enumerate(histoA.bins): 428 res_histogram.bins.append(Bin.combine(bin, histoB.bins[i],func)) 429 430 # Reorder the weight labels as in the original histogram and add at the 431 # end the new ones which resulted from the combination, in a sorted order 432 res_histogram.bins.weight_labels = [label for label in histoA.bins.\ 433 weight_labels if label in res_histogram.bins.weight_labels] + \ 434 sorted([label for label in res_histogram.bins.weight_labels if\ 435 label not in histoA.bins.weight_labels]) 436 437 438 return res_histogram
439 440 # ================================================== 441 # Some handy function for Histogram combination 442 # ================================================== 443 @staticmethod
444 - def MULTIPLY(wgtsA, wgtsB):
445 """ Apply the multiplication to the weights of two bins.""" 446 447 new_wgts = {} 448 449 new_wgts['stat_error'] = math.sqrt( 450 (wgtsA['stat_error']*wgtsB['central'])**2+ 451 (wgtsA['central']*wgtsB['stat_error'])**2) 452 453 for label, wgt in wgtsA.items(): 454 if label=='stat_error': 455 continue 456 new_wgts[label] = wgt*wgtsB[label] 457 458 return new_wgts
459 460 @staticmethod
461 - def DIVIDE(wgtsA, wgtsB):
462 """ Apply the division to the weights of two bins.""" 463 464 new_wgts = {} 465 if wgtsB['central'] == 0.0: 466 new_wgts['stat_error'] = 0.0 467 else: 468 # d(x/y) = ( (dx/y)**2 + ((x*dy)/(y**2))**2 )**0.5 469 new_wgts['stat_error'] = math.sqrt(wgtsA['stat_error']**2+ 470 ((wgtsA['central']*wgtsB['stat_error'])/ 471 wgtsB['central'])**2)/wgtsB['central'] 472 473 for label, wgt in wgtsA.items(): 474 if label=='stat_error': 475 continue 476 if wgtsB[label]==0.0 and wgt==0.0: 477 new_wgts[label] = 0.0 478 elif wgtsB[label]==0.0: 479 # This situation is most often harmless and just happens in regions 480 # with low statistics, so I'll bypass the warning here. 481 # logger.debug('Warning:: A bin with finite weight was divided '+\ 482 # 'by a bin with zero weight.') 483 new_wgts[label] = 0.0 484 else: 485 new_wgts[label] = wgt/wgtsB[label] 486 487 return new_wgts
488 489 @staticmethod
490 - def OPERATION(wgtsA, wgtsB, wgt_operation, stat_error_operation):
491 """ Apply the operation to the weights of two bins. Notice that we 492 assume here the two dict operands to have the same weight labels. 493 The operation is a function that takes two floats as input.""" 494 495 new_wgts = {} 496 for label, wgt in wgtsA.items(): 497 if label!='stat_error': 498 new_wgts[label] = wgt_operation(wgt, wgtsB[label]) 499 else: 500 new_wgts[label] = stat_error_operation(wgt, wgtsB[label]) 501 # if new_wgts[label]>1.0e+10: 502 # print "stat_error_operation is ",stat_error_operation.__name__ 503 # print " inputs were ",wgt, wgtsB[label] 504 # print "for label", label 505 506 return new_wgts
507 508 509 @staticmethod
510 - def SINGLEHISTO_OPERATION(wgts, wgt_operation, stat_error_operation):
511 """ Apply the operation to the weights of a *single* bins. 512 The operation is a function that takes a single float as input.""" 513 514 new_wgts = {} 515 for label, wgt in wgts.items(): 516 if label!='stat_error': 517 new_wgts[label] = wgt_operation(wgt) 518 else: 519 new_wgts[label] = stat_error_operation(wgt) 520 521 return new_wgts
522 523 @staticmethod
524 - def ADD(wgtsA, wgtsB):
525 """ Implements the addition using OPERATION above. """ 526 return Histogram.OPERATION(wgtsA, wgtsB, 527 (lambda a,b: a+b), 528 (lambda a,b: math.sqrt(a**2+b**2)))
529 530 @staticmethod
531 - def SUBTRACT(wgtsA, wgtsB):
532 """ Implements the subtraction using OPERATION above. """ 533 534 return Histogram.OPERATION(wgtsA, wgtsB, 535 (lambda a,b: a-b), 536 (lambda a,b: math.sqrt(a**2+b**2)))
537 538 @staticmethod
539 - def RESCALE(factor):
540 """ Implements the rescaling using SINGLEHISTO_OPERATION above. """ 541 542 def rescaler(wgts): 543 return Histogram.SINGLEHISTO_OPERATION(wgts,(lambda a: a*factor), 544 (lambda a: a*factor))
545 546 return rescaler
547 548 @staticmethod
549 - def OFFSET(offset):
550 """ Implements the offset using SINGLEBIN_OPERATION above. """ 551 def offsetter(wgts): 552 return Histogram.SINGLEHISTO_OPERATION( 553 wgts,(lambda a: a+offset),(lambda a: a))
554 555 return offsetter 556
557 - def __add__(self, other):
558 """ Overload the plus function. """ 559 if isinstance(other, Histogram): 560 return self.__class__.combine(self,other,Histogram.ADD) 561 elif isinstance(other, int) or isinstance(other, float): 562 self.alter_weights(Histogram.OFFSET(float(other))) 563 return self 564 else: 565 return NotImplemented, 'Histograms can only be added to other '+\ 566 ' histograms or scalars.'
567
568 - def __sub__(self, other):
569 """ Overload the subtraction function. """ 570 if isinstance(other, Histogram): 571 return self.__class__.combine(self,other,Histogram.SUBTRACT) 572 elif isinstance(other, int) or isinstance(other, float): 573 self.alter_weights(Histogram.OFFSET(-float(other))) 574 return self 575 else: 576 return NotImplemented, 'Histograms can only be subtracted to other '+\ 577 ' histograms or scalars.'
578
579 - def __mul__(self, other):
580 """ Overload the multiplication function. """ 581 if isinstance(other, Histogram): 582 return self.__class__.combine(self,other,Histogram.MULTIPLY) 583 elif isinstance(other, int) or isinstance(other, float): 584 self.alter_weights(Histogram.RESCALE(float(other))) 585 return self 586 else: 587 return NotImplemented, 'Histograms can only be multiplied to other '+\ 588 ' histograms or scalars.'
589
590 - def __div__(self, other):
591 """ Overload the multiplication function. """ 592 if isinstance(other, Histogram): 593 return self.__class__.combine(self,other,Histogram.DIVIDE) 594 elif isinstance(other, int) or isinstance(other, float): 595 self.alter_weights(Histogram.RESCALE(1.0/float(other))) 596 return self 597 else: 598 return NotImplemented, 'Histograms can only be divided with other '+\ 599 ' histograms or scalars.'
600 601 __truediv__ = __div__ 602
603 -class HwU(Histogram):
604 """A concrete implementation of an histogram plots using the HwU format for 605 reading/writing histogram content.""" 606 607 allowed_dimensions = [2] 608 allowed_types = [] 609 610 # For now only HwU output format is implemented. 611 output_formats_implemented = ['HwU','gnuplot'] 612 # Lists the mandatory named weights that must be specified for each bin and 613 # what corresponding label we assign them to in the Bin weight dictionary, 614 # (if any). 615 mandatory_weights = {'xmin':'boundary_xmin', 'xmax':'boundary_xmax', 616 'central value':'central', 'dy':'stat_error'} 617 618 # ======================== 619 # Weight name parser RE's 620 # ======================== 621 # This marks the start of the line that defines the name of the weights 622 weight_header_start_re = re.compile('^##.*') 623 # This is the format of a weight name specifier. It is much more complicated 624 # than necessary because the HwU standard allows for spaces from within 625 # the name of a weight 626 weight_header_re = re.compile( 627 '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*') 628 629 # ================================ 630 # Histo weight specification RE's 631 # ================================ 632 # The start of a plot 633 histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+ 634 '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$') 635 # A given weight specifier 636 a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?' 637 histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE) 638 a_int_re = '[\+|-]?\d+' 639 640 # The end of a plot 641 histo_end_re = re.compile(r'^\s*<\\histogram>\s*$') 642 # A scale type of weight 643 weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\ 644 '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE) 645 weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$') 646 weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$') 647 weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re) 648 weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re, 649 re.IGNORECASE) 650 651 weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\ 652 '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\ 653 '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE) 654 weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$') 655 656
657 - class ParseError(MadGraph5Error):
658 """a class for histogram data parsing errors"""
659 660 @classmethod
661 - def get_HwU_wgt_label_type(cls, wgt_label):
662 """ From the format of the weight label given in argument, it returns 663 a string identifying the type of standard weight it is.""" 664 665 if isinstance(wgt_label,str): 666 return 'UNKNOWN_TYPE' 667 if isinstance(wgt_label,tuple): 668 if len(wgt_label)==0: 669 return 'UNKNOWN_TYPE' 670 if isinstance(wgt_label[0],float): 671 return 'murmuf_scales' 672 if isinstance(wgt_label[0],str): 673 return wgt_label[0] 674 if isinstance(wgt_label,float): 675 return 'merging_scale' 676 if isinstance(wgt_label,int): 677 return 'pdfset' 678 # No clue otherwise 679 return 'UNKNOWN_TYPE'
680 681
682 - def __init__(self, file_path=None, weight_header=None, 683 raw_labels=False, consider_reweights='ALL', selected_central_weight=None, **opts):
684 """ Read one plot from a file_path or a stream. Notice that this 685 constructor only reads one, and the first one, of the plots specified. 686 If file_path was a path in argument, it would then close the opened stream. 687 If file_path was a stream in argument, it would leave it open. 688 The option weight_header specifies an ordered list of weight names 689 to appear in the file specified. 690 The option 'raw_labels' specifies that one wants to import the 691 histogram data with no treatment of the weight labels at all 692 (this is used for the matplotlib output).""" 693 694 super(HwU, self).__init__(**opts) 695 696 self.dimension = 2 697 698 if file_path is None: 699 return 700 elif isinstance(file_path, str): 701 stream = open(file_path,'r') 702 elif isinstance(file_path, file): 703 stream = file_path 704 else: 705 raise MadGraph5Error, "Argument file_path '%s' for HwU init"\ 706 %str(file_path)+"ialization must be either a file path or a stream." 707 708 # Attempt to find the weight headers if not specified 709 if not weight_header: 710 weight_header = HwU.parse_weight_header(stream, raw_labels=raw_labels) 711 712 if not self.parse_one_histo_from_stream(stream, weight_header, 713 consider_reweights=consider_reweights, 714 selected_central_weight=selected_central_weight, 715 raw_labels=raw_labels): 716 # Indicate that the initialization of the histogram was unsuccessful 717 # by setting the BinList property to None. 718 super(Histogram,self).__setattr__('bins',None) 719 720 # Explicitly close the opened stream for clarity. 721 if isinstance(file_path, str): 722 stream.close()
723
724 - def addEvent(self, x_value, weights = 1.0):
725 """ Add an event to the current plot. """ 726 727 for bin in self.bins: 728 if bin.boundaries[0] <= x_value < bin.boundaries[1]: 729 bin.addEvent(weights = weights)
730
731 - def get(self, name):
732 733 if name == 'bins': 734 return [b.boundaries[0] for b in self.bins] 735 else: 736 return [b.wgts[name] for b in self.bins]
737
738 - def add_line(self, names):
739 """add a column to the HwU. name can be a list""" 740 741 if isinstance(names, str): 742 names = [names] 743 else: 744 names = list(names) 745 #check if all the entry are new 746 for name in names[:]: 747 if name in self.bins[0].wgts: 748 logger.warning("name: %s is already defines in HwU.") 749 names.remove(name) 750 # 751 for name in names: 752 self.bins.weight_labels.append(name) 753 for bin in self.bins: 754 bin.wgts[name] = 0
755
756 - def get_uncertainty_band(self, selector, mode=0):
757 """return two list of entry one with the minimum and one with the maximum value. 758 selector can be: 759 - a regular expression on the label name 760 - a function returning T/F (applying on the label name) 761 - a list of labels 762 - a keyword 763 """ 764 765 # find the set of weights to consider 766 if isinstance(selector, str): 767 if selector == 'QCUT': 768 selector = r'^Weight_MERGING=[\d]*[.]?\d*$' 769 elif selector == 'SCALE': 770 selector = r'(MUF=\d*[.]?\d*_MUR=([^1]\d*|1\d+)_PDF=\d*)[.]?\d*|(MUF=([^1]\d*|1\d+)[.]?\d*_MUR=\d*[.]?\d*_PDF=\d*)' 771 elif selector == 'ALPSFACT': 772 selector = r'ALPSFACT' 773 elif selector == 'PDF': 774 selector = r'(?:MUF=1_MUR=1_PDF=|MU(?:F|R)="1.0" MU(?:R|F)="1.0" PDF=")(\d*)' 775 if not mode: 776 # pdfs=[] 777 ## for n in self.bins[0].wgts: 778 # misc.sprint( n) 779 # if re.search(selector,n, re.IGNORECASE): 780 # pdfs.append(int(re.findall(selector, n)[0])) 781 pdfs = [int(re.findall(selector, n)[0]) for n in self.bins[0].wgts if re.search(selector,n, re.IGNORECASE)] 782 min_pdf, max_pdf = min(pdfs), max(pdfs) 783 if max_pdf - min_pdf > 100: 784 mode == 'min/max' 785 elif max_pdf <= 90000: 786 mode = 'hessian' 787 else: 788 mode = 'gaussian' 789 selections = [n for n in self.bins[0].wgts if re.search(selector,n, re.IGNORECASE)] 790 elif hasattr(selector, '__call__'): 791 selections = [n for n in self.bins[0].wgts if selector(n)] 792 elif isinstance(selector, (list, tuple)): 793 selections = selector 794 795 # find the way to find the minimal/maximal curve 796 if not mode: 797 mode = 'min/max' 798 799 # build the collection of values 800 values = [] 801 for s in selections: 802 values.append(self.get(s)) 803 804 #sanity check 805 if not len(values): 806 return [0] * len(self.bins), [0]* len(self.bins) 807 elif len(values) ==1: 808 return values[0], values[0] 809 810 811 # Start the real work 812 if mode == 'min/max': 813 min_value, max_value = [], [] 814 for i in xrange(len(values[0])): 815 data = [values[s][i] for s in xrange(len(values))] 816 min_value.append(min(data)) 817 max_value.append(max(data)) 818 elif mode == 'gaussian': 819 # use Gaussian method (NNPDF) 820 min_value, max_value = [], [] 821 for i in xrange(len(values[0])): 822 pdf_stdev = 0.0 823 data = [values[s][i] for s in xrange(len(values))] 824 sdata = sum(data)/len(data) 825 sdata2 = sum(x**2 for x in data)/len(data) 826 pdf_stdev = math.sqrt(max(sdata2 -sdata**2,0.0)) 827 min_value.append(sdata - pdf_stdev) 828 max_value.append(sdata + pdf_stdev) 829 830 elif mode == 'hessian': 831 # For old PDF this is based on the set ordering -> 832 #need to order the pdf sets: 833 pdfs = [(int(re.findall(selector, n)[0]),n) for n in self.bins[0].wgts if re.search(selector,n, re.IGNORECASE)] 834 pdfs.sort() 835 836 # check if the central was put or not in this sets: 837 if len(pdfs) % 2: 838 # adding the central automatically 839 pdf1 = pdfs[0][0] 840 central = pdf1 -1 841 name = pdfs[0][1].replace(str(pdf1), str(central)) 842 central = self.get(name) 843 else: 844 central = self.get(pdfs.pop(0)[1]) 845 846 #rebuilt the collection of values but this time ordered correctly 847 values = [] 848 for _, name in pdfs: 849 values.append(self.get(name)) 850 851 #Do the computation 852 min_value, max_value = [], [] 853 for i in xrange(len(values[0])): 854 pdf_up = 0 855 pdf_down = 0 856 cntrl_val = central[i] 857 for s in range(int((len(pdfs))/2)): 858 pdf_up += max(0.0,values[2*s][i] - cntrl_val, 859 values[2*s+1][i] - cntrl_val)**2 860 pdf_down += max(0.0,cntrl_val - values[2*s][i], 861 cntrl_val - values[2*s+1][i])**2 862 863 min_value.append(cntrl_val - math.sqrt(pdf_down)) 864 max_value.append(cntrl_val + math.sqrt(pdf_up)) 865 866 867 868 869 return min_value, max_value
870
871 - def get_formatted_header(self):
872 """ Return a HwU formatted header for the weight label definition.""" 873 874 res = '##& xmin & xmax & ' 875 876 if 'central' in self.bins.weight_labels: 877 res += 'central value & dy & ' 878 879 others = [] 880 for label in self.bins.weight_labels: 881 if label in ['central', 'stat_error']: 882 continue 883 label_type = HwU.get_HwU_wgt_label_type(label) 884 if label_type == 'UNKNOWN_TYPE': 885 others.append(label) 886 elif label_type == 'scale': 887 others.append('muR=%6.3f muF=%6.3f'%(label[1],label[2])) 888 elif label_type == 'scale_adv': 889 others.append('dyn=%i muR=%6.3f muF=%6.3f'%(label[1],label[2],label[3])) 890 elif label_type == 'merging_scale': 891 others.append('TMS=%4.2f'%label[1]) 892 elif label_type == 'pdf': 893 others.append('PDF=%i'%(label[1])) 894 elif label_type == 'pdf_adv': 895 others.append('PDF=%i %s'%(label[1],label[2])) 896 elif label_type == 'alpsfact': 897 others.append('alpsfact=%d'%label[1]) 898 899 return res+' & '.join(others)
900
901 - def get_HwU_source(self, print_header=True):
902 """ Returns the string representation of this histogram using the 903 HwU standard.""" 904 905 res = [] 906 if print_header: 907 res.append(self.get_formatted_header()) 908 res.extend(['']) 909 res.append('<histogram> %s "%s"'%(len(self.bins), 910 self.get_HwU_histogram_name(format='HwU'))) 911 for bin in self.bins: 912 if 'central' in bin.wgts: 913 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)+ 914 [bin.wgts['central'],bin.wgts['stat_error']])) 915 else: 916 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries))) 917 res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 918 self.bins.weight_labels if key not in ['central','stat_error']) 919 res.append('<\histogram>') 920 return res
921
922 - def output(self, path=None, format='HwU', print_header=True):
923 """ Ouput this histogram to a file, stream or string if path is kept to 924 None. The supported format are for now. Chose whether to print the header 925 or not.""" 926 927 if not format in HwU.output_formats_implemented: 928 raise MadGraph5Error, "The specified output format '%s'"%format+\ 929 " is not yet supported. Supported formats are %s."\ 930 %HwU.output_formats_implemented 931 932 if format == 'HwU': 933 str_output_list = self.get_HwU_source(print_header=print_header) 934 935 if path is None: 936 return '\n'.join(str_output_list) 937 elif isinstance(path, str): 938 stream = open(path,'w') 939 stream.write('\n'.join(str_output_list)) 940 stream.close() 941 elif isinstance(path, file): 942 path.write('\n'.join(str_output_list)) 943 944 # Successful writeout 945 return True
946
947 - def test_plot_compability(self, other, consider_type=True, 948 consider_unknown_weight_labels=True):
949 """ Test whether the defining attributes of self are identical to histo, 950 typically to make sure that they are the same plots but from different 951 runs, and they can be summed safely. We however don't want to 952 overload the __eq__ because it is still a more superficial check.""" 953 954 this_known_weight_labels = [label for label in self.bins.weight_labels if 955 HwU.get_HwU_wgt_label_type(label)!='UNKNOWN_TYPE'] 956 other_known_weight_labels = [label for label in other.bins.weight_labels if 957 HwU.get_HwU_wgt_label_type(label)!='UNKNOWN_TYPE'] 958 this_unknown_weight_labels = [label for label in self.bins.weight_labels if 959 HwU.get_HwU_wgt_label_type(label)=='UNKNOWN_TYPE'] 960 other_unknown_weight_labels = [label for label in other.bins.weight_labels if 961 HwU.get_HwU_wgt_label_type(label)=='UNKNOWN_TYPE'] 962 963 if self.title != other.title or \ 964 set(this_known_weight_labels) != set(other_known_weight_labels) or \ 965 (set(this_unknown_weight_labels) != set(other_unknown_weight_labels) and\ 966 consider_unknown_weight_labels) or \ 967 (self.type != other.type and consider_type) or \ 968 self.x_axis_mode != self.x_axis_mode or \ 969 self.y_axis_mode != self.y_axis_mode or \ 970 any(b1.boundaries!=b2.boundaries for (b1,b2) in \ 971 zip(self.bins,other.bins)): 972 return False 973 974 return True
975 976 977 978 @classmethod
979 - def parse_weight_header(cls, stream, raw_labels=False):
980 """ Read a given stream until it finds a header specifying the weights 981 and then returns them.""" 982 983 for line in stream: 984 if cls.weight_header_start_re.match(line): 985 header = [h.group('wgt_name') for h in 986 cls.weight_header_re.finditer(line)] 987 if any((name not in header) for name in cls.mandatory_weights): 988 raise HwU.ParseError, "The mandatory weight names %s were"\ 989 %str(cls.mandatory_weights.keys())+" are not all present"+\ 990 " in the following HwU header definition:\n %s"%line 991 992 # Apply replacement rules specified in mandatory_weights 993 if raw_labels: 994 # If using raw labels, then just change the name of the 995 # labels corresponding to the bin edges 996 header = [ (h if h not in ['xmin','xmax'] else 997 cls.mandatory_weights[h]) for h in header ] 998 # And return it with no further modification 999 return header 1000 else: 1001 header = [ (h if h not in cls.mandatory_weights else 1002 cls.mandatory_weights[h]) for h in header ] 1003 1004 # We use a special rule for the weight labeled as a 1005 # muR=2.0 muF=1.0 scale specification, in which case we store 1006 # it as a tuple 1007 for i, h in enumerate(header): 1008 scale_wgt = HwU.weight_label_scale.match(h) 1009 PDF_wgt = HwU.weight_label_PDF.match(h) 1010 Merging_wgt = HwU.weight_label_TMS.match(h) 1011 alpsfact_wgt = HwU.weight_label_alpsfact.match(h) 1012 scale_wgt_adv = HwU.weight_label_scale_adv.match(h) 1013 PDF_wgt_adv = HwU.weight_label_PDF_adv.match(h) 1014 if scale_wgt_adv: 1015 header[i] = ('scale_adv', 1016 int(scale_wgt_adv.group('dyn_choice')), 1017 float(scale_wgt_adv.group('mur_fact')), 1018 float(scale_wgt_adv.group('muf_fact'))) 1019 elif scale_wgt: 1020 header[i] = ('scale', 1021 float(scale_wgt.group('mur_fact')), 1022 float(scale_wgt.group('muf_fact'))) 1023 elif PDF_wgt_adv: 1024 header[i] = ('pdf_adv', 1025 int(PDF_wgt_adv.group('PDF_set')), 1026 PDF_wgt_adv.group('PDF_set_cen')) 1027 elif PDF_wgt: 1028 header[i] = ('pdf',int(PDF_wgt.group('PDF_set'))) 1029 elif Merging_wgt: 1030 header[i] = ('merging_scale',float(Merging_wgt.group('Merging_scale'))) 1031 elif alpsfact_wgt: 1032 header[i] = ('alpsfact',float(alpsfact_wgt.group('alpsfact'))) 1033 1034 return header 1035 1036 raise HwU.ParseError, "The weight headers could not be found."
1037 1038
1039 - def process_histogram_name(self, histogram_name):
1040 """ Parse the histogram name for tags which would set its various 1041 attributes.""" 1042 1043 for i, tag in enumerate(histogram_name.split('|')): 1044 if i==0: 1045 self.title = tag.strip() 1046 else: 1047 stag = tag.split('@') 1048 if len(stag)==1 and stag[0].startswith('#'): continue 1049 if len(stag)!=2: 1050 raise MadGraph5Error, 'Specifier in title must have the'+\ 1051 " syntax @<attribute_name>:<attribute_value>, not '%s'."%tag.strip() 1052 # Now list all supported modifiers here 1053 stag = [t.strip().upper() for t in stag] 1054 if stag[0] in ['T','TYPE']: 1055 self.type = stag[1] 1056 elif stag[0] in ['X_AXIS', 'X']: 1057 self.x_axis_mode = stag[1] 1058 elif stag[0] in ['Y_AXIS', 'Y']: 1059 self.y_axis_mode = stag[1] 1060 elif stag[0] in ['JETSAMPLE', 'JS']: 1061 self.jetsample = int(stag[1]) 1062 else: 1063 raise MadGraph5Error, "Specifier '%s' not recognized."%stag[0]
1064
1065 - def get_HwU_histogram_name(self, format='human'):
1066 """ Returns the histogram name in the HwU syntax or human readable.""" 1067 1068 type_map = {'NLO':'NLO', 'LO':'LO', 'AUX':'auxiliary histogram'} 1069 1070 if format=='human': 1071 res = self.title 1072 if not self.type is None: 1073 try: 1074 res += ', %s'%type_map[self.type] 1075 except KeyError: 1076 res += ', %s'%str('NLO' if self.type.split()[0]=='NLO' else 1077 self.type) 1078 if hasattr(self,'jetsample'): 1079 if self.jetsample==-1: 1080 res += ', all jet samples' 1081 else: 1082 res += ', Jet sample %d'%self.jetsample 1083 1084 return res 1085 1086 elif format=='human-no_type': 1087 res = self.title 1088 return res 1089 1090 elif format=='HwU': 1091 res = [self.title] 1092 res.append('|X_AXIS@%s'%self.x_axis_mode) 1093 res.append('|Y_AXIS@%s'%self.y_axis_mode) 1094 if hasattr(self,'jetsample'): 1095 res.append('|JETSAMPLE@%d'%self.jetsample) 1096 if self.type: 1097 res.append('|TYPE@%s'%self.type) 1098 return ' '.join(res)
1099
1100 - def parse_one_histo_from_stream(self, stream, all_weight_header, 1101 consider_reweights='ALL', raw_labels=False, selected_central_weight=None):
1102 """ Reads *one* histogram from a stream, with the mandatory specification 1103 of the ordered list of weight names. Return True or False depending 1104 on whether the starting definition of a new plot could be found in this 1105 stream.""" 1106 n_bins = 0 1107 1108 if consider_reweights=='ALL' or raw_labels: 1109 weight_header = all_weight_header 1110 else: 1111 new_weight_header = [] 1112 # Filter the weights to consider based on the user selection 1113 for wgt_label in all_weight_header: 1114 if wgt_label in ['central','stat_error','boundary_xmin','boundary_xmax'] or\ 1115 HwU.get_HwU_wgt_label_type(wgt_label) in consider_reweights: 1116 new_weight_header.append(wgt_label) 1117 weight_header = new_weight_header 1118 1119 # Find the starting point of the stream 1120 for line in stream: 1121 start = HwU.histo_start_re.match(line) 1122 if not start is None: 1123 self.process_histogram_name(start.group('histo_name')) 1124 # We do not want to include auxiliary diagrams which would be 1125 # recreated anyway. 1126 if self.type == 'AUX': 1127 continue 1128 n_bins = int(start.group('n_bins')) 1129 # Make sure to exclude the boundaries from the weight 1130 # specification 1131 self.bins = BinList(weight_labels = [ wgt_label for 1132 wgt_label in weight_header if wgt_label not in 1133 ['boundary_xmin','boundary_xmax']]) 1134 break 1135 1136 # Now look for the bin weights definition 1137 for line_bin in stream: 1138 bin_weights = {} 1139 boundaries = [0.0,0.0] 1140 for j, weight in \ 1141 enumerate(HwU.histo_bin_weight_re.finditer(line_bin)): 1142 if j == len(all_weight_header): 1143 raise HwU.ParseError, "There is more bin weights"+\ 1144 " specified than expected (%i)"%len(weight_header) 1145 if selected_central_weight == all_weight_header[j]: 1146 bin_weights['central'] = float(weight.group('weight')) 1147 if all_weight_header[j] == 'boundary_xmin': 1148 boundaries[0] = float(weight.group('weight')) 1149 elif all_weight_header[j] == 'boundary_xmax': 1150 boundaries[1] = float(weight.group('weight')) 1151 elif all_weight_header[j] == 'central' and not selected_central_weight is None: 1152 continue 1153 elif all_weight_header[j] in weight_header: 1154 bin_weights[all_weight_header[j]] = \ 1155 float(weight.group('weight')) 1156 1157 # For the HwU format, we know that exactly two 'weights' 1158 # specified in the weight_header are in fact the boundary 1159 # coordinate, so we must subtract two. 1160 if len(bin_weights)<(len(weight_header)-2): 1161 raise HwU.ParseError, " There are only %i weights"\ 1162 %len(bin_weights)+" specified and %i were expected."%\ 1163 (len(weight_header)-2) 1164 self.bins.append(Bin(tuple(boundaries), bin_weights)) 1165 if len(self.bins)==n_bins: 1166 break 1167 1168 if len(self.bins)!=n_bins: 1169 raise HwU.ParseError, "%i bin specification "%len(self.bins)+\ 1170 "were found and %i were expected."%n_bins 1171 1172 # Now jump to the next <\histo> tag. 1173 for line_end in stream: 1174 if HwU.histo_end_re.match(line_end): 1175 # Finally, remove all the auxiliary weights, but only if not 1176 # asking for raw labels 1177 if not raw_labels: 1178 self.trim_auxiliary_weights() 1179 # End of successful parsing this histogram, so return True. 1180 return True 1181 1182 # Could not find a plot definition starter in this stream, return False 1183 return False
1184
1185 - def trim_auxiliary_weights(self):
1186 """ Remove all weights which are auxiliary (whose name end with '@aux') 1187 so that they are not included (they will be regenerated anyway).""" 1188 1189 for i, wgt_label in enumerate(self.bins.weight_labels): 1190 if isinstance(wgt_label, str) and wgt_label.endswith('@aux'): 1191 for bin in self.bins: 1192 try: 1193 del bin.wgts[wgt_label] 1194 except KeyError: 1195 pass 1196 self.bins.weight_labels = [wgt_label for wgt_label in 1197 self.bins.weight_labels if (not isinstance(wgt_label, str) 1198 or (isinstance(wgt_label, str) and not wgt_label.endswith('@aux')) )]
1199
1200 - def set_uncertainty(self, type='all_scale',lhapdfconfig='lhapdf-config'):
1201 """ Adds a weight to the bins which is the envelope of the scale 1202 uncertainty, for the scale specified which can be either 'mur', 'muf', 1203 'all_scale' or 'PDF'.""" 1204 1205 if type.upper()=='MUR': 1206 new_wgt_label = 'delta_mur' 1207 scale_position = 1 1208 elif type.upper()=='MUF': 1209 new_wgt_label = 'delta_muf' 1210 scale_position = 2 1211 elif type.upper()=='ALL_SCALE': 1212 new_wgt_label = 'delta_mu' 1213 scale_position = -1 1214 elif type.upper()=='PDF': 1215 new_wgt_label = 'delta_pdf' 1216 scale_position = -2 1217 elif type.upper()=='MERGING': 1218 new_wgt_label = 'delta_merging' 1219 elif type.upper()=='ALPSFACT': 1220 new_wgt_label = 'delta_alpsfact' 1221 else: 1222 raise MadGraph5Error, ' The function set_uncertainty can'+\ 1223 " only handle the scales 'mur', 'muf', 'all_scale', 'pdf',"+\ 1224 "'merging' or 'alpsfact'." 1225 1226 wgts_to_consider=[] 1227 label_to_consider=[] 1228 if type.upper() == 'MERGING': 1229 # It is a list of list because we consider only the possibility of 1230 # a single "central value" in this case, so the outtermost list is 1231 # always of length 1. 1232 wgts_to_consider.append([ label for label in self.bins.weight_labels if \ 1233 HwU.get_HwU_wgt_label_type(label)=='merging_scale' ]) 1234 label_to_consider.append('none') 1235 1236 elif type.upper() == 'ALPSFACT': 1237 # It is a list of list because we consider only the possibility of 1238 # a single "central value" in this case, so the outtermost list is 1239 # always of length 1. 1240 wgts_to_consider.append([ label for label in self.bins.weight_labels if \ 1241 HwU.get_HwU_wgt_label_type(label)=='alpsfact' ]) 1242 label_to_consider.append('none') 1243 elif scale_position > -2: 1244 ##########: advanced scale 1245 dyn_scales=[label[1] for label in self.bins.weight_labels if \ 1246 HwU.get_HwU_wgt_label_type(label)=='scale_adv'] 1247 # remove doubles in list but keep the order! 1248 dyn_scales=[scale for n,scale in enumerate(dyn_scales) if scale not in dyn_scales[:n]] 1249 for dyn_scale in dyn_scales: 1250 wgts=[label for label in self.bins.weight_labels if \ 1251 HwU.get_HwU_wgt_label_type(label)=='scale_adv' and label[1]==dyn_scale] 1252 if wgts: 1253 wgts_to_consider.append(wgts) 1254 label_to_consider.append(dyn_scale) 1255 ##########: normal scale 1256 wgts=[label for label in self.bins.weight_labels if \ 1257 HwU.get_HwU_wgt_label_type(label)=='scale'] 1258 ## this is for the 7-point variations (excludes mur/muf = 4, 1/4) 1259 #wgts_to_consider = [ label for label in self.bins.weight_labels if \ 1260 # isinstance(label,tuple) and label[0]=='scale' and \ 1261 # not (0.5 in label and 2.0 in label)] 1262 if wgts: 1263 wgts_to_consider.append(wgts) 1264 label_to_consider.append('none') 1265 ##########: remove renormalisation OR factorisation scale dependence... 1266 1267 if scale_position > -1: 1268 for wgts in wgts_to_consider: 1269 wgts_to_consider.remove(wgts) 1270 wgts = [ label for label in wgts if label[-scale_position]==1.0 ] 1271 wgts_to_consider.append(wgts) 1272 elif scale_position == -2: 1273 ##########: advanced PDF 1274 pdf_sets=[label[2] for label in self.bins.weight_labels if \ 1275 HwU.get_HwU_wgt_label_type(label)=='pdf_adv'] 1276 # remove doubles in list but keep the order! 1277 pdf_sets=[ii for n,ii in enumerate(pdf_sets) if ii not in pdf_sets[:n]] 1278 for pdf_set in pdf_sets: 1279 wgts=[label for label in self.bins.weight_labels if \ 1280 HwU.get_HwU_wgt_label_type(label)=='pdf_adv' and label[2]==pdf_set] 1281 if wgts: 1282 wgts_to_consider.append(wgts) 1283 label_to_consider.append(pdf_set) 1284 ##########: normal PDF 1285 wgts = [ label for label in self.bins.weight_labels if \ 1286 HwU.get_HwU_wgt_label_type(label)=='pdf'] 1287 if wgts: 1288 wgts_to_consider.append(wgts) 1289 label_to_consider.append('none') 1290 1291 if len(wgts_to_consider)==0 or all(len(wgts)==0 for wgts in wgts_to_consider): 1292 # No envelope can be constructed, it is not worth adding the weights 1293 return (None,[None]) 1294 1295 # find and import python version of lhapdf if doing PDF uncertainties 1296 if type=='PDF': 1297 use_lhapdf=False 1298 try: 1299 lhapdf_libdir=subprocess.Popen([lhapdfconfig,'--libdir'],\ 1300 stdout=subprocess.PIPE).stdout.read().strip() 1301 except: 1302 use_lhapdf=False 1303 else: 1304 try: 1305 candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \ 1306 if os.path.isdir(os.path.join(lhapdf_libdir,dirname))] 1307 except OSError: 1308 candidates=[] 1309 for candidate in candidates: 1310 if os.path.isfile(os.path.join(lhapdf_libdir,candidate,'site-packages','lhapdf.so')): 1311 sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages')) 1312 try: 1313 import lhapdf 1314 use_lhapdf=True 1315 break 1316 except ImportError: 1317 sys.path.pop(0) 1318 continue 1319 1320 if not use_lhapdf: 1321 try: 1322 candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \ 1323 if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))] 1324 except OSError: 1325 candidates=[] 1326 for candidate in candidates: 1327 if os.path.isfile(os.path.join(lhapdf_libdir+'64',candidate,'site-packages','lhapdf.so')): 1328 sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages')) 1329 try: 1330 import lhapdf 1331 use_lhapdf=True 1332 break 1333 except ImportError: 1334 sys.path.pop(0) 1335 continue 1336 1337 if not use_lhapdf: 1338 try: 1339 import lhapdf 1340 use_lhapdf=True 1341 except ImportError: 1342 logger.warning("Failed to access python version of LHAPDF: "\ 1343 "cannot compute PDF uncertainty from the "\ 1344 "weights in the histograms. The weights in the HwU data files " \ 1345 "still cover all PDF set members, "\ 1346 "but the automatic computation of the uncertainties from "\ 1347 "those weights might not be correct. \n "\ 1348 "If the python interface to LHAPDF is available on your system, try "\ 1349 "adding its location to the PYTHONPATH environment variable and the"\ 1350 "LHAPDF library location to LD_LIBRARY_PATH (linux) or DYLD_LIBRARY_PATH (mac os x).") 1351 1352 if type=='PDF' and use_lhapdf: 1353 lhapdf.setVerbosity(0) 1354 1355 # Place the new weight label last before the first tuple 1356 position=[] 1357 labels=[] 1358 for i,label in enumerate(label_to_consider): 1359 wgts=wgts_to_consider[i] 1360 if label != 'none': 1361 new_wgt_labels=['%s_cen %s @aux' % (new_wgt_label,label), 1362 '%s_min %s @aux' % (new_wgt_label,label), 1363 '%s_max %s @aux' % (new_wgt_label,label)] 1364 else: 1365 new_wgt_labels=['%s_cen @aux' % new_wgt_label, 1366 '%s_min @aux' % new_wgt_label, 1367 '%s_max @aux' % new_wgt_label] 1368 try: 1369 pos=[(not isinstance(lab, str)) for lab in \ 1370 self.bins.weight_labels].index(True) 1371 position.append(pos) 1372 labels.append(label) 1373 self.bins.weight_labels = self.bins.weight_labels[:pos]+\ 1374 new_wgt_labels + self.bins.weight_labels[pos:] 1375 except ValueError: 1376 pos=len(self.bins.weight_labels) 1377 position.append(pos) 1378 labels.append(label) 1379 self.bins.weight_labels.extend(new_wgt_labels) 1380 1381 if type=='PDF' and use_lhapdf and label != 'none': 1382 p=lhapdf.getPDFSet(label) 1383 1384 # Now add the corresponding weight to all Bins 1385 for bin in self.bins: 1386 if type!='PDF': 1387 bin.wgts[new_wgt_labels[0]] = bin.wgts[wgts[0]] 1388 bin.wgts[new_wgt_labels[1]] = min(bin.wgts[label] \ 1389 for label in wgts) 1390 bin.wgts[new_wgt_labels[2]] = max(bin.wgts[label] \ 1391 for label in wgts) 1392 elif type=='PDF' and use_lhapdf and label != 'none' and len(wgts) > 1: 1393 pdfs = [bin.wgts[pdf] for pdf in sorted(wgts)] 1394 ep=p.uncertainty(pdfs,-1) 1395 bin.wgts[new_wgt_labels[0]] = ep.central 1396 bin.wgts[new_wgt_labels[1]] = ep.central-ep.errminus 1397 bin.wgts[new_wgt_labels[2]] = ep.central+ep.errplus 1398 elif type=='PDF' and use_lhapdf and label != 'none' and len(bin.wgts) == 1: 1399 bin.wgts[new_wgt_labels[0]] = bin.wgts[wgts[0]] 1400 bin.wgts[new_wgt_labels[1]] = bin.wgts[wgts[0]] 1401 bin.wgts[new_wgt_labels[2]] = bin.wgts[wgts[0]] 1402 else: 1403 pdfs = [bin.wgts[pdf] for pdf in sorted(wgts)] 1404 pdf_up = 0.0 1405 pdf_down = 0.0 1406 cntrl_val = bin.wgts['central'] 1407 if wgts[0] <= 90000: 1408 # use Hessian method (CTEQ & MSTW) 1409 if len(pdfs)>2: 1410 for i in range(int((len(pdfs)-1)/2)): 1411 pdf_up += max(0.0,pdfs[2*i+1]-cntrl_val, 1412 pdfs[2*i+2]-cntrl_val)**2 1413 pdf_down += max(0.0,cntrl_val-pdfs[2*i+1], 1414 cntrl_val-pdfs[2*i+2])**2 1415 pdf_up = cntrl_val + math.sqrt(pdf_up) 1416 pdf_down = cntrl_val - math.sqrt(pdf_down) 1417 else: 1418 pdf_up = bin.wgts[pdfs[0]] 1419 pdf_down = bin.wgts[pdfs[0]] 1420 elif wgts[0] in range(90200, 90303) or \ 1421 wgts[0] in range(90400, 90433) or \ 1422 wgts[0] in range(90700, 90801) or \ 1423 wgts[0] in range(90900, 90931) or \ 1424 wgts[0] in range(91200, 91303) or \ 1425 wgts[0] in range(91400, 91433) or \ 1426 wgts[0] in range(91700, 91801) or \ 1427 wgts[0] in range(91900, 91931): 1428 # PDF4LHC15 Hessian sets 1429 pdf_stdev = 0.0 1430 for pdf in pdfs[1:]: 1431 pdf_stdev += (pdf - cntrl_val)**2 1432 pdf_stdev = math.sqrt(pdf_stdev) 1433 pdf_up = cntrl_val+pdf_stdev 1434 pdf_down = cntrl_val-pdf_stdev 1435 else: 1436 # use Gaussian method (NNPDF) 1437 pdf_stdev = 0.0 1438 for pdf in pdfs[1:]: 1439 pdf_stdev += (pdf - cntrl_val)**2 1440 pdf_stdev = math.sqrt(pdf_stdev/float(len(pdfs)-2)) 1441 pdf_up = cntrl_val+pdf_stdev 1442 pdf_down = cntrl_val-pdf_stdev 1443 # Finally add them to the corresponding new weight 1444 bin.wgts[new_wgt_labels[0]] = bin.wgts[wgts[0]] 1445 bin.wgts[new_wgt_labels[1]] = pdf_down 1446 bin.wgts[new_wgt_labels[2]] = pdf_up 1447 1448 # And return the position in self.bins.weight_labels of the first 1449 # of the two new weight label added. 1450 return (position,labels)
1451
1452 - def select_central_weight(self, selected_label):
1453 """ Select a specific merging scale for the central value of this Histogram. """ 1454 if selected_label not in self.bins.weight_labels: 1455 raise MadGraph5Error, "Selected weight label '%s' could not be found in this HwU."%selected_label 1456 1457 for bin in self.bins: 1458 bin.wgts['central']=bin.wgts[selected_label]
1459
1460 - def rebin(self, n_rebin):
1461 """ Rebin the x-axis so as to merge n_rebin consecutive bins into a 1462 single one. """ 1463 1464 if n_rebin < 1 or not isinstance(n_rebin, int): 1465 raise MadGraph5Error, "The argument 'n_rebin' of the HwU function"+\ 1466 " 'rebin' must be larger or equal to 1, not '%s'."%str(n_rebin) 1467 elif n_rebin==1: 1468 return 1469 1470 if self.type and 'NOREBIN' in self.type.upper(): 1471 return 1472 1473 rebinning_list = list(range(0,len(self.bins),n_rebin))+[len(self.bins),] 1474 concat_list = [self.bins[rebinning_list[i]:rebinning_list[i+1]] for \ 1475 i in range(len(rebinning_list)-1)] 1476 1477 new_bins = copy.copy(self.bins) 1478 del new_bins[:] 1479 1480 for bins_to_merge in concat_list: 1481 if len(bins_to_merge)==0: 1482 continue 1483 new_bins.append(Bin(boundaries=(bins_to_merge[0].boundaries[0], 1484 bins_to_merge[-1].boundaries[1]),wgts={'central':0.0})) 1485 for weight in self.bins.weight_labels: 1486 if weight != 'stat_error': 1487 new_bins[-1].wgts[weight] = \ 1488 sum(b.wgts[weight] for b in bins_to_merge) 1489 else: 1490 new_bins[-1].wgts['stat_error'] = \ 1491 math.sqrt(sum(b.wgts['stat_error']**2 for b in\ 1492 bins_to_merge)) 1493 1494 self.bins = new_bins
1495 1496 @classmethod
1497 - def get_x_optimal_range(cls, histo_list, weight_labels=None):
1498 """ Function to determine the optimal x-axis range when plotting 1499 together the histos in histo_list and considering the weights 1500 weight_labels""" 1501 1502 # If no list of weight labels to consider is given, use them all. 1503 if weight_labels is None: 1504 weight_labels = histo_list[0].bins.weight_labels 1505 1506 all_boundaries = sum([ list(bin.boundaries) for histo in histo_list \ 1507 for bin in histo.bins if \ 1508 (sum(abs(bin.wgts[label]) for label in weight_labels) > 0.0)] ,[]) 1509 1510 if len(all_boundaries)==0: 1511 all_boundaries = sum([ list(bin.boundaries) for histo in histo_list \ 1512 for bin in histo.bins],[]) 1513 if len(all_boundaries)==0: 1514 raise MadGraph5Error, "The histograms with title '%s'"\ 1515 %histo_list[0].title+" seems to have no bins." 1516 1517 x_min = min(all_boundaries) 1518 x_max = max(all_boundaries) 1519 1520 return (x_min, x_max)
1521 1522 @classmethod
1523 - def get_y_optimal_range(cls,histo_list, labels=None, 1524 scale='LOG', Kratio = False):
1525 """ Function to determine the optimal y-axis range when plotting 1526 together the histos in histo_list and considering the weights 1527 weight_labels. The option Kratio is present to allow for the couple of 1528 tweaks necessary for the the K-factor ratio histogram y-range.""" 1529 1530 # If no list of weight labels to consider is given, use them all. 1531 if labels is None: 1532 weight_labels = histo_list[0].bins.weight_labels 1533 else: 1534 weight_labels = labels 1535 1536 all_weights = [] 1537 for histo in histo_list: 1538 for bin in histo.bins: 1539 for label in weight_labels: 1540 # Filter out bin weights at *exactly* because they often 1541 # come from pathological division by zero for empty bins. 1542 if Kratio and bin.wgts[label]==0.0: 1543 continue 1544 if scale!='LOG': 1545 all_weights.append(bin.wgts[label]) 1546 if label == 'stat_error': 1547 all_weights.append(-bin.wgts[label]) 1548 elif bin.wgts[label]>0.0: 1549 all_weights.append(bin.wgts[label]) 1550 1551 1552 sum([ [bin.wgts[label] for label in weight_labels if \ 1553 (scale!='LOG' or bin.wgts[label]!=0.0)] \ 1554 for histo in histo_list for bin in histo.bins], []) 1555 1556 all_weights.sort() 1557 if len(all_weights)!=0: 1558 partial_max = all_weights[int(len(all_weights)*0.95)] 1559 partial_min = all_weights[int(len(all_weights)*0.05)] 1560 max = all_weights[-1] 1561 min = all_weights[0] 1562 else: 1563 if scale!='LOG': 1564 return (0.0,1.0) 1565 else: 1566 return (1.0,10.0) 1567 1568 y_max = 0.0 1569 y_min = 0.0 1570 1571 # If the maximum is too far from the 90% max, then take the partial max 1572 if (max-partial_max)>2.0*(partial_max-partial_min): 1573 y_max = partial_max 1574 else: 1575 y_max = max 1576 1577 # If the maximum is too far from the 90% max, then take the partial max 1578 if (partial_min - min)>2.0*(partial_max-partial_min) and min != 0.0: 1579 y_min = partial_min 1580 else: 1581 y_min = min 1582 1583 if Kratio: 1584 median = all_weights[len(all_weights)//2] 1585 spread = (y_max-y_min) 1586 if abs(y_max-median)<spread*0.05 or abs(median-y_min)<spread*0.05: 1587 y_max = median + spread/2.0 1588 y_min = median - spread/2.0 1589 if y_min != y_max: 1590 return ( y_min , y_max ) 1591 1592 # Enforce the maximum if there is 5 bins or less 1593 if len(histo_list[0].bins) <= 5: 1594 y_min = min 1595 y_max = max 1596 1597 # Finally make sure the range has finite length 1598 if y_min == y_max: 1599 if max == min: 1600 y_min -= 1.0 1601 y_max += 1.0 1602 else: 1603 y_min = min 1604 y_max = max 1605 1606 return ( y_min , y_max )
1607
1608 -class HwUList(histograms_PhysicsObjectList):
1609 """ A class implementing features related to a list of Hwu Histograms. """ 1610 1611 # Define here the number of line color schemes defined. If you need more, 1612 # simply define them in the gnuplot header and increase the number below. 1613 # It must be <= 9. 1614 number_line_colors_defined = 8 1615
1616 - def is_valid_element(self, obj):
1617 """Test wether specified object is of the right type for this list.""" 1618 1619 return isinstance(obj, HwU) or isinstance(obj, HwUList)
1620
1621 - def __init__(self, file_path, weight_header=None, run_id=None, 1622 merging_scale=None, accepted_types_order=[], consider_reweights='ALL', 1623 raw_labels=False, **opts):
1624 """ Read one plot from a file_path or a stream. 1625 This constructor reads all plots specified in target file. 1626 File_path can be a path or a stream in the argument. 1627 The option weight_header specifies an ordered list of weight names 1628 to appear in the file or stream specified. It accepted_types_order is 1629 empty, no filter is applied, otherwise only histograms of the specified 1630 types will be kept, and in this specified order for a given identical 1631 title. The option 'consider_reweights' selects whether one wants to 1632 include all the extra scale/pdf/merging variation weights. Possible values 1633 are 'ALL' or a list of the return types of the function get_HwU_wgt_label_type(). 1634 The option 'raw_labels' specifies that one wants to import the 1635 histogram data with no treatment of the weight labels at all 1636 (this is used for the matplotlib output). 1637 """ 1638 1639 if isinstance(file_path, str): 1640 stream = open(file_path,'r') 1641 elif isinstance(file_path, file): 1642 stream = file_path 1643 else: 1644 return super(HwUList,self).__init__(file_path, **opts) 1645 1646 try: 1647 # Try to read it in XML format 1648 self.parse_histos_from_PY8_XML_stream(stream, run_id, 1649 merging_scale, accepted_types_order, 1650 consider_reweights=consider_reweights, 1651 raw_labels=raw_labels) 1652 except XMLParsingError: 1653 # Rewinding the stream 1654 stream.seek(0) 1655 # Attempt to find the weight headers if not specified 1656 if not weight_header: 1657 weight_header = HwU.parse_weight_header(stream,raw_labels=raw_labels) 1658 1659 # Select a specific merging scale if asked for: 1660 selected_label = None 1661 if not merging_scale is None: 1662 for label in weight_header: 1663 if HwU.get_HwU_wgt_label_type(label)=='merging_scale': 1664 if float(label[1])==merging_scale: 1665 selected_label = label 1666 break 1667 if selected_label is None: 1668 raise MadGraph5Error, "No weight could be found in the input HwU "+\ 1669 "for the selected merging scale '%4.2f'."%merging_scale 1670 1671 new_histo = HwU(stream, weight_header,raw_labels=raw_labels, 1672 consider_reweights=consider_reweights, 1673 selected_central_weight=selected_label) 1674 # new_histo.select_central_weight(selected_label) 1675 while not new_histo.bins is None: 1676 if accepted_types_order==[] or \ 1677 new_histo.type in accepted_types_order: 1678 self.append(new_histo) 1679 new_histo = HwU(stream, weight_header, raw_labels=raw_labels, 1680 consider_reweights=consider_reweights, 1681 selected_central_weight=selected_label) 1682 1683 # if not run_id is None: 1684 # logger.debug("The run_id '%s' was specified, but "%run_id+ 1685 # "format of the HwU plot source is the MG5aMC"+ 1686 # " so that the run_id information is ignored.") 1687 1688 # Order the histograms according to their type. 1689 titles_order = [h.title for h in self] 1690 def ordering_function(histo): 1691 title_position = titles_order.index(histo.title) 1692 if accepted_types_order==[]: 1693 type_precedence = {'NLO':1,'LO':2,None:3,'AUX':5} 1694 try: 1695 ordering_key = (title_position,type_precedence[histo.type]) 1696 except KeyError: 1697 ordering_key = (title_position,4) 1698 else: 1699 ordering_key = (title_position, 1700 accepted_types_order.index(histo.type)) 1701 return ordering_key
1702 1703 # The command below is to first order them in alphabetical order, but it 1704 # is often better to keep the order of the original HwU source. 1705 # self.sort(key=lambda histo: '%s_%d'%(histo.title, 1706 # type_order.index(histo.type))) 1707 self.sort(key=ordering_function) 1708 1709 # Explicitly close the opened stream for clarity. 1710 if isinstance(file_path, str): 1711 stream.close()
1712
1713 - def get_hist_names(self):
1714 """return a list of all the names of define histograms""" 1715 1716 output = [] 1717 for hist in self: 1718 output.append(hist.get_HwU_histogram_name()) 1719 return output
1720
1721 - def get_wgt_names(self):
1722 """ return the list of all weights define in each histograms""" 1723 1724 return self[0].bins.weight_labels
1725 1726
1727 - def get(self, name):
1728 """return the HWU histograms related to a given name""" 1729 for hist in self: 1730 if hist.get_HwU_histogram_name() == name: 1731 return hist 1732 1733 raise NameError, "no histogram with name: %s" % name
1734
1735 - def parse_histos_from_PY8_XML_stream(self, stream, run_id=None, 1736 merging_scale=None, accepted_types_order=[], 1737 consider_reweights='ALL', raw_labels=False):
1738 """Initialize the HwU histograms from an XML stream. Only one run is 1739 used: the first one if run_id is None or the specified run otherwise. 1740 Accepted type order is a filter to select histograms of only a certain 1741 type. The option 'consider_reweights' selects whether one wants to 1742 include all the extra scale/pdf/merging variation weights. 1743 Possible values are 'ALL' or a list of the return types of the 1744 function get_HwU_wgt_label_type().""" 1745 1746 run_nodes = minidom.parse(stream).getElementsByTagName("run") 1747 all_nodes = dict((int(node.getAttribute('id')),node) for 1748 node in run_nodes) 1749 selected_run_node = None 1750 weight_header = None 1751 if run_id is None: 1752 if len(run_nodes)>0: 1753 selected_run_node = all_nodes[min(all_nodes.keys())] 1754 else: 1755 try: 1756 selected_run_node = all_nodes[int(run_id)] 1757 except: 1758 selected_run_node = None 1759 1760 if selected_run_node is None: 1761 if run_id is None: 1762 raise MadGraph5Error, \ 1763 'No histogram was found in the specified XML source.' 1764 else: 1765 raise MadGraph5Error, \ 1766 "Histogram with run_id '%d' was not found in the "%run_id+\ 1767 "specified XML source." 1768 1769 # If raw weight label are asked for, then simply read the weight_labels 1770 # directly as specified in the XML header 1771 if raw_labels: 1772 # Filter empty weights coming from the split 1773 weight_label_list = [wgt.strip() for wgt in 1774 str(selected_run_node.getAttribute('header')).split(';') if 1775 not re.match('^\s*$',wgt)] 1776 ordered_weight_label_list = [w for w in weight_label_list if w not\ 1777 in ['xmin','xmax']] 1778 # Remove potential repetition of identical weight labels 1779 filtered_ordered_weight_label_list = [] 1780 for wgt_label in ordered_weight_label_list: 1781 if wgt_label not in filtered_ordered_weight_label_list: 1782 filtered_ordered_weight_label_list.append(wgt_label) 1783 1784 selected_weights = dict([ (wgt_pos, 1785 [wgt if wgt not in ['xmin','xmax'] else HwU.mandatory_weights[wgt]]) 1786 for wgt_pos, wgt in enumerate(weight_label_list) if wgt in 1787 filtered_ordered_weight_label_list+['xmin','xmax']]) 1788 1789 return self.retrieve_plots_from_XML_source(selected_run_node, 1790 selected_weights, filtered_ordered_weight_label_list, 1791 raw_labels=True) 1792 1793 # Now retrieve the header and save all weight labels as dictionaries 1794 # with key being properties and their values as value. If the property 1795 # does not defined a value, then put None as a value 1796 all_weights = [] 1797 for wgt_position, wgt_label in \ 1798 enumerate(str(selected_run_node.getAttribute('header')).split(';')): 1799 if not re.match('^\s*$',wgt_label) is None: 1800 continue 1801 all_weights.append({'POSITION':wgt_position}) 1802 for wgt_item in wgt_label.strip().split('_'): 1803 property = wgt_item.strip().split('=') 1804 if len(property) == 2: 1805 all_weights[-1][property[0].strip()] = property[1].strip() 1806 elif len(property)==1: 1807 all_weights[-1][property[0].strip()] = None 1808 else: 1809 raise MadGraph5Error, \ 1810 "The weight label property %s could not be parsed."%wgt_item 1811 1812 # Now make sure that for all weights, there is 'PDF', 'MUF' and 'MUR' 1813 # and 'MERGING' defined. If absent we specify '-1' which implies that 1814 # the 'default' value was used (whatever it was). 1815 # Also cast them in the proper type 1816 for wgt_label in all_weights: 1817 for mandatory_attribute in ['PDF','MUR','MUF','MERGING','ALPSFACT']: 1818 if mandatory_attribute not in wgt_label: 1819 wgt_label[mandatory_attribute] = '-1' 1820 if mandatory_attribute=='PDF': 1821 wgt_label[mandatory_attribute] = int(wgt_label[mandatory_attribute]) 1822 elif mandatory_attribute in ['MUR','MUF','MERGING','ALPSFACT']: 1823 wgt_label[mandatory_attribute] = float(wgt_label[mandatory_attribute]) 1824 1825 # If merging cut is negative, then pick only the one of the central scale 1826 # If not specified, then take them all but use the PDF and scale weight 1827 # of the central merging_scale for the variation. 1828 if merging_scale is None or merging_scale < 0.0: 1829 merging_scale_chosen = all_weights[2]['MERGING'] 1830 else: 1831 merging_scale_chosen = merging_scale 1832 1833 # Central weight parameters are enforced to be those of the third weight 1834 central_PDF = all_weights[2]['PDF'] 1835 # Assume central scale is one, unless specified. 1836 central_MUR = all_weights[2]['MUR'] if all_weights[2]['MUR']!=-1.0 else 1.0 1837 central_MUF = all_weights[2]['MUF'] if all_weights[2]['MUF']!=-1.0 else 1.0 1838 central_alpsfact = all_weights[2]['ALPSFACT'] if all_weights[2]['ALPSFACT']!=-1.0 else 1.0 1839 1840 # Dictionary of selected weights with their position as key and the 1841 # list of weight labels they correspond to. 1842 selected_weights = {} 1843 # Treat the first four weights in a special way: 1844 if 'xmin' not in all_weights[0] or \ 1845 'xmax' not in all_weights[1] or \ 1846 'Weight' not in all_weights[2] or \ 1847 'WeightError' not in all_weights[3]: 1848 raise MadGraph5Error, 'The first weight entries in the XML HwU '+\ 1849 ' source are not the standard expected ones (xmin, xmax, sigmaCentral, errorCentral)' 1850 selected_weights[0] = ['xmin'] 1851 selected_weights[1] = ['xmax'] 1852 1853 # =========== BEGIN HELPER FUNCTIONS =========== 1854 def get_difference_to_central(weight): 1855 """ Return the list of properties which differ from the central weight. 1856 This disregards the merging scale value for which any central value 1857 can be picked anyway.""" 1858 1859 differences = [] 1860 # If the tag 'Weight' is in the weight label, then this is 1861 # automatically considered as the Event weight (central) for which 1862 # only the merging scale can be different 1863 if 'Weight' in weight: 1864 return set([]) 1865 if weight['MUR'] not in [central_MUR, -1.0] or \ 1866 weight['MUF'] not in [central_MUF, -1.0]: 1867 differences.append('mur_muf_scale') 1868 if weight['PDF'] not in [central_PDF,-1]: 1869 differences.append('pdf') 1870 if weight['ALPSFACT'] not in [central_alpsfact, -1]: 1871 differences.append('ALPSFACT') 1872 return set(differences)
1873 1874 def format_weight_label(weight): 1875 """ Print the weight attributes in a nice order.""" 1876 1877 all_properties = weight.keys() 1878 all_properties.pop(all_properties.index('POSITION')) 1879 ordered_properties = [] 1880 # First add the attributes without value 1881 for property in all_properties: 1882 if weight[property] is None: 1883 ordered_properties.append(property) 1884 1885 ordered_properties.sort() 1886 all_properties = [property for property in all_properties if 1887 not weight[property] is None] 1888 1889 # then add PDF, MUR, MUF and MERGING if present 1890 for property in ['PDF','MUR','MUF','ALPSFACT','MERGING']: 1891 all_properties.pop(all_properties.index(property)) 1892 if weight[property]!=-1: 1893 ordered_properties.append(property) 1894 1895 ordered_properties.extend(sorted(all_properties)) 1896 1897 return '_'.join('%s%s'\ 1898 %(key,'' if weight[key] is None else '=%s'%str(weight[key])) for 1899 key in ordered_properties) 1900 # =========== END HELPER FUNCTIONS =========== 1901 1902 1903 # The central value is not necessarily the 3rd one if a different merging 1904 # cut was selected. 1905 if float(all_weights[2]['MERGING']) == merging_scale_chosen: 1906 selected_weights[2]=['central value'] 1907 else: 1908 for weight_position, weight in enumerate(all_weights): 1909 # Check if that weight corresponds to a central weight 1910 # (conventional label for central weight is 'Weight' 1911 if get_difference_to_central(weight)==set([]): 1912 # Check if the merging scale matches this time 1913 if weight['MERGING']==merging_scale_chosen: 1914 selected_weights[weight_position] = ['central value'] 1915 break 1916 # Make sure a central value was found, throw a warning if found 1917 if 'central value' not in sum(selected_weights.values(),[]): 1918 central_merging_scale = all_weights[2]['MERGING'] 1919 logger.warning('Could not find the central weight for the'+\ 1920 ' chosen merging scale (%f).\n'%merging_scale_chosen+\ 1921 'MG5aMC will chose the original central scale provided which '+\ 1922 'correspond to a merging scale of %s'%("'inclusive'" if 1923 central_merging_scale in [0.0,-1.0] else '%f'%central_merging_scale)) 1924 selected_weights[2]=['central value'] 1925 1926 # The error is always the third entry for now. 1927 selected_weights[3]=['dy'] 1928 1929 # Now process all other weights 1930 for weight_position, weight in enumerate(all_weights[4:]): 1931 # Apply special transformation for the weight label: 1932 # scale variation are stored as: 1933 # ('scale', mu_r, mu_f) for scale variation 1934 # ('pdf',PDF) for PDF variation 1935 # ('merging_scale',float) for merging scale 1936 # ('type',value) for all others (e.g. alpsfact) 1937 variations = get_difference_to_central(weight) 1938 # We know select the 'diagonal' variations where each parameter 1939 # is varied one at a time. 1940 1941 # Accept also if both pdf and mur_muf_scale differ because 1942 # the PDF used for the Event weight is often unknown but the 1943 # mu_r and mu_f variational weight specify it. Same story for 1944 # alpsfact. 1945 if variations in [set(['mur_muf_scale']),set(['pdf','mur_muf_scale'])]: 1946 wgt_label = ('scale',weight['MUR'],weight['MUF']) 1947 if variations in [set(['ALPSFACT']),set(['pdf','ALPSFACT'])]: 1948 wgt_label = ('alpsfact',weight['ALPSFACT']) 1949 if variations == set(['pdf']): 1950 wgt_label = ('pdf',weight['PDF']) 1951 if variations == set([]): 1952 # Unknown weight (might turn out to be taken as a merging variation weight below) 1953 wgt_label = format_weight_label(weight) 1954 1955 # Make sure the merging scale matches the chosen one 1956 if weight['MERGING'] != merging_scale_chosen: 1957 # If a merging_scale was specified, then ignore all other weights 1958 if merging_scale: 1959 continue 1960 # Otherwise consider them also, but for now only if it is for 1961 # the central value parameter (central PDF, central mu_R and mu_F) 1962 if variations == set([]): 1963 # We choose to store the merging variation weight labels as floats 1964 wgt_label = ('merging_scale', weight['MERGING']) 1965 # Make sure that the weight label does not already exist. If it does, 1966 # this means that the source has redundant information and that 1967 # there is no need to specify it again. 1968 if wgt_label in sum(selected_weights.values(),[]): 1969 continue 1970 1971 # Now register the selected weight 1972 try: 1973 selected_weights[weight_position+4].append(wgt_label) 1974 except KeyError: 1975 selected_weights[weight_position+4]=[wgt_label,] 1976 1977 if merging_scale and merging_scale > 0.0 and \ 1978 len(sum(selected_weights.values(),[]))==4: 1979 logger.warning('No additional variation weight was found for the '+\ 1980 'chosen merging scale %f.'%merging_scale) 1981 1982 # Make sure to use the predefined keywords for the mandatory weight labels 1983 for wgt_pos in selected_weights: 1984 for i, weight_label in enumerate(selected_weights[wgt_pos]): 1985 try: 1986 selected_weights[wgt_pos][i] = HwU.mandatory_weights[weight_label] 1987 except KeyError: 1988 pass 1989 1990 # Keep only the weights asked for 1991 if consider_reweights!='ALL': 1992 new_selected_weights = {} 1993 for wgt_position, wgt_labels in selected_weights.items(): 1994 for wgt_label in wgt_labels: 1995 if wgt_label in ['central','stat_error','boundary_xmin','boundary_xmax'] or\ 1996 HwU.get_HwU_wgt_label_type(wgt_label) in consider_reweights: 1997 try: 1998 new_selected_weights[wgt_position].append(wgt_label) 1999 except KeyError: 2000 new_selected_weights[wgt_position] = [wgt_label] 2001 selected_weights = new_selected_weights 2002 2003 # Cache the list of selected weights to be defined at each line 2004 weight_label_list = sum(selected_weights.values(),[]) 2005 2006 # The weight_label list to set to self.bins 2007 ordered_weight_label_list = ['central','stat_error'] 2008 for weight_label in weight_label_list: 2009 if not isinstance(weight_label, str): 2010 ordered_weight_label_list.append(weight_label) 2011 for weight_label in weight_label_list: 2012 if weight_label in ['central','stat_error','boundary_xmin','boundary_xmax']: 2013 continue 2014 if isinstance(weight_label, str): 2015 ordered_weight_label_list.append(weight_label) 2016 2017 # Now that we know the desired weights, retrieve all plots from the 2018 # XML source node. 2019 return self.retrieve_plots_from_XML_source(selected_run_node, 2020 selected_weights, ordered_weight_label_list, raw_labels=False) 2021
2022 - def retrieve_plots_from_XML_source(self, xml_node, 2023 selected_weights, ordered_weight_label_list,raw_labels=False):
2024 """Given an XML node and the selected weights and their ordered list, 2025 import all histograms from the specified XML node.""" 2026 2027 # We now start scanning all the plots 2028 for multiplicity_node in xml_node.getElementsByTagName("jethistograms"): 2029 multiplicity = int(multiplicity_node.getAttribute('njet')) 2030 for histogram in multiplicity_node.getElementsByTagName("histogram"): 2031 # We only consider the histograms with all the weight information 2032 if histogram.getAttribute("weight")!='all': 2033 continue 2034 new_histo = HwU() 2035 hist_name = '%s %s'%(str(histogram.getAttribute('name')), 2036 str(histogram.getAttribute('unit'))) 2037 # prepend the jet multiplicity to the histogram name 2038 new_histo.process_histogram_name('%s |JETSAMPLE@%d'%(hist_name,multiplicity)) 2039 # We do not want to include auxiliary diagrams which would be 2040 # recreated anyway. 2041 if new_histo.type == 'AUX': 2042 continue 2043 # Make sure to exclude the boundaries from the weight 2044 # specification 2045 # Order the weights so that the unreckognized ones go last 2046 new_histo.bins = BinList(weight_labels = ordered_weight_label_list) 2047 hist_data = str(histogram.childNodes[0].data) 2048 for line in hist_data.split('\n'): 2049 if line.strip()=='': 2050 continue 2051 bin_weights = {} 2052 boundaries = [0.0,0.0] 2053 for j, weight in \ 2054 enumerate(HwU.histo_bin_weight_re.finditer(line)): 2055 try: 2056 for wgt_label in selected_weights[j]: 2057 if wgt_label == 'boundary_xmin': 2058 boundaries[0] = float(weight.group('weight')) 2059 elif wgt_label == 'boundary_xmax': 2060 boundaries[1] = float(weight.group('weight')) 2061 else: 2062 if weight.group('weight').upper()=='NAN': 2063 raise MadGraph5Error, \ 2064 "Some weights are found to be 'NAN' in histogram with name '%s'"%hist_name+\ 2065 " and jet sample multiplicity %d."%multiplicity 2066 else: 2067 bin_weights[wgt_label] = \ 2068 float(weight.group('weight')) 2069 except KeyError: 2070 continue 2071 # For this check, we subtract two because of the bin boundaries 2072 if len(bin_weights)!=len(ordered_weight_label_list): 2073 raise MadGraph5Error, \ 2074 'Not all defined weights were found in the XML source.\n'+\ 2075 '%d found / %d expected.'%(len(bin_weights),len(ordered_weight_label_list))+\ 2076 '\nThe missing ones are: %s.'%\ 2077 str(list(set(ordered_weight_label_list)-set(bin_weights.keys())))+\ 2078 "\nIn plot with title '%s' and jet sample multiplicity %d."%\ 2079 (hist_name, multiplicity) 2080 2081 new_histo.bins.append(Bin(tuple(boundaries), bin_weights)) 2082 2083 # if bin_weights['central']!=0.0: 2084 # print '---------' 2085 # print 'multiplicity =',multiplicity 2086 # print 'central =', bin_weights['central'] 2087 # print 'PDF = ', [(key,bin_weights[key]) for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='pdf'] 2088 # print 'PDF min/max =',min(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='pdf'),max(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='pdf') 2089 # print 'scale = ', [(key,bin_weights[key]) for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='scale'] 2090 # print 'scale min/max =',min(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='scale'),max(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='scale') 2091 # print 'merging = ', [(key,bin_weights[key]) for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='merging_scale'] 2092 # print 'merging min/max =',min(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='merging_scale'),max(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='merging_scale') 2093 # print 'alpsfact = ', [(key,bin_weights[key]) for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='alpsfact'] 2094 # print 'alpsfact min/max =',min(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='alpsfact'),max(bin_weights[key] for key in bin_weights if HwU.get_HwU_wgt_label_type(key)=='alpsfact') 2095 # print '---------' 2096 # stop 2097 2098 # Finally remove auxiliary weights 2099 if not raw_labels: 2100 new_histo.trim_auxiliary_weights() 2101 2102 # And add it to the list 2103 self.append(new_histo)
2104
2105 - def output(self, path, format='gnuplot',number_of_ratios = -1, 2106 uncertainties=['scale','pdf','statitistical','merging_scale','alpsfact'], 2107 use_band = None, 2108 ratio_correlations=True, arg_string='', 2109 jet_samples_to_keep=None, 2110 auto_open=True, 2111 lhapdfconfig='lhapdf-config'):
2112 """ Ouput this histogram to a file, stream or string if path is kept to 2113 None. The supported format are for now. Chose whether to print the header 2114 or not.""" 2115 2116 if len(self)==0: 2117 return MadGraph5Error, 'No histograms stored in the list yet.' 2118 2119 if not format in HwU.output_formats_implemented: 2120 raise MadGraph5Error, "The specified output format '%s'"%format+\ 2121 " is not yet supported. Supported formats are %s."\ 2122 %HwU.output_formats_implemented 2123 2124 if isinstance(path, str) and not any(ext in os.path.basename(path) \ 2125 for ext in ['.Hwu','.ps','.gnuplot','.pdf']): 2126 output_base_name = os.path.basename(path) 2127 HwU_stream = open(path+'.HwU','w') 2128 else: 2129 raise MadGraph5Error, "The path argument of the output function of"+\ 2130 " the HwUList instance must be file path without its extension." 2131 2132 HwU_output_list = [] 2133 # If the format is just the raw HwU source, then simply write them 2134 # out all in sequence. 2135 if format == 'HwU': 2136 HwU_output_list.extend(self[0].get_HwU_source(print_header=True)) 2137 for histo in self[1:]: 2138 HwU_output_list.extend(histo.get_HwU_source()) 2139 HwU_output_list.extend(['','']) 2140 HwU_stream.write('\n'.join(HwU_output_list)) 2141 HwU_stream.close() 2142 return 2143 2144 # Now we consider that we are attempting a gnuplot output. 2145 if format == 'gnuplot': 2146 gnuplot_stream = open(path+'.gnuplot','w') 2147 2148 # Now group all the identified matching histograms in a list 2149 matching_histo_lists = HwUList([HwUList([self[0]])]) 2150 for histo in self[1:]: 2151 matched = False 2152 for histo_list in matching_histo_lists: 2153 if histo.test_plot_compability(histo_list[0], 2154 consider_type=False, consider_unknown_weight_labels=True): 2155 histo_list.append(histo) 2156 matched = True 2157 break 2158 if not matched: 2159 matching_histo_lists.append(HwUList([histo])) 2160 2161 self[:] = matching_histo_lists 2162 2163 # Write the gnuplot header 2164 gnuplot_output_list_v4 = [ 2165 """ 2166 ################################################################################ 2167 # 2168 # This gnuplot file was generated by MadGraph5_aMC@NLO project, a program which 2169 # automatically generates Feynman diagrams and matrix elements for arbitrary 2170 # high-energy processes in the Standard Model and beyond. It also perform the 2171 # integration and/or generate events for these processes, at LO and NLO accuracy. 2172 # 2173 # For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch 2174 # 2175 ################################################################################ 2176 # %s 2177 reset 2178 2179 set lmargin 10 2180 set rmargin 0 2181 set terminal postscript portrait enhanced mono dashed lw 1.0 "Helvetica" 9 2182 # The pdf terminal offers transparency support, but you will have to adapt things a bit 2183 #set terminal pdf enhanced font "Helvetica 12" lw 1.0 dashed size 29.7cm, 21cm 2184 set key font ",9" 2185 set key samplen "2" 2186 set output "%s.ps" 2187 2188 # This is the "PODO" color palette of gnuplot v.5, but with the order 2189 # changed: palette of colors selected to be easily distinguishable by 2190 # color-blind individuals with either protanopia or deuteranopia. Bang 2191 # Wong [2011] Nature Methods 8, 441. 2192 2193 set style line 1 lt 1 lc rgb "#009e73" lw 2.5 2194 set style line 11 lt 2 lc rgb "#009e73" lw 2.5 2195 set style line 21 lt 4 lc rgb "#009e73" lw 2.5 2196 set style line 31 lt 6 lc rgb "#009e73" lw 2.5 2197 set style line 41 lt 8 lc rgb "#009e73" lw 2.5 2198 2199 set style line 2 lt 1 lc rgb "#0072b2" lw 2.5 2200 set style line 12 lt 2 lc rgb "#0072b2" lw 2.5 2201 set style line 22 lt 4 lc rgb "#0072b2" lw 2.5 2202 set style line 32 lt 6 lc rgb "#0072b2" lw 2.5 2203 set style line 42 lt 8 lc rgb "#0072b2" lw 2.5 2204 2205 set style line 3 lt 1 lc rgb "#d55e00" lw 2.5 2206 set style line 13 lt 2 lc rgb "#d55e00" lw 2.5 2207 set style line 23 lt 4 lc rgb "#d55e00" lw 2.5 2208 set style line 33 lt 6 lc rgb "#d55e00" lw 2.5 2209 set style line 43 lt 8 lc rgb "#d55e00" lw 2.5 2210 2211 set style line 4 lt 1 lc rgb "#f0e442" lw 2.5 2212 set style line 14 lt 2 lc rgb "#f0e442" lw 2.5 2213 set style line 24 lt 4 lc rgb "#f0e442" lw 2.5 2214 set style line 34 lt 6 lc rgb "#f0e442" lw 2.5 2215 set style line 44 lt 8 lc rgb "#f0e442" lw 2.5 2216 2217 set style line 5 lt 1 lc rgb "#56b4e9" lw 2.5 2218 set style line 15 lt 2 lc rgb "#56b4e9" lw 2.5 2219 set style line 25 lt 4 lc rgb "#56b4e9" lw 2.5 2220 set style line 35 lt 6 lc rgb "#56b4e9" lw 2.5 2221 set style line 45 lt 8 lc rgb "#56b4e9" lw 2.5 2222 2223 set style line 6 lt 1 lc rgb "#cc79a7" lw 2.5 2224 set style line 16 lt 2 lc rgb "#cc79a7" lw 2.5 2225 set style line 26 lt 4 lc rgb "#cc79a7" lw 2.5 2226 set style line 36 lt 6 lc rgb "#cc79a7" lw 2.5 2227 set style line 46 lt 8 lc rgb "#cc79a7" lw 2.5 2228 2229 set style line 7 lt 1 lc rgb "#e69f00" lw 2.5 2230 set style line 17 lt 2 lc rgb "#e69f00" lw 2.5 2231 set style line 27 lt 4 lc rgb "#e69f00" lw 2.5 2232 set style line 37 lt 6 lc rgb "#e69f00" lw 2.5 2233 set style line 47 lt 8 lc rgb "#e69f00" lw 2.5 2234 2235 set style line 8 lt 1 lc rgb "black" lw 2.5 2236 set style line 18 lt 2 lc rgb "black" lw 2.5 2237 set style line 28 lt 4 lc rgb "black" lw 2.5 2238 set style line 38 lt 6 lc rgb "black" lw 2.5 2239 set style line 48 lt 7 lc rgb "black" lw 2.5 2240 2241 2242 set style line 999 lt 1 lc rgb "gray" lw 2.5 2243 2244 safe(x,y,a) = (y == 0.0 ? a : x/y) 2245 2246 set style data histeps 2247 set key invert 2248 2249 """%(arg_string,output_base_name) 2250 ] 2251 2252 gnuplot_output_list_v5 = [ 2253 """ 2254 ################################################################################ 2255 # 2256 # This gnuplot file was generated by MadGraph5_aMC@NLO project, a program which 2257 # automatically generates Feynman diagrams and matrix elements for arbitrary 2258 # high-energy processes in the Standard Model and beyond. It also perform the 2259 # integration and/or generate events for these processes, at LO and NLO accuracy. 2260 # 2261 # For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch 2262 # 2263 ################################################################################ 2264 # %s 2265 reset 2266 2267 set lmargin 10 2268 set rmargin 0 2269 set terminal postscript portrait enhanced color "Helvetica" 9 2270 # The pdf terminal offers transparency support, but you will have to adapt things a bit 2271 #set terminal pdf enhanced font "Helvetica 12" lw 1.0 dashed size 29.7cm, 21cm 2272 set key font ",9" 2273 set key samplen "2" 2274 set output "%s.ps" 2275 2276 # This is the "PODO" color palette of gnuplot v.5, but with the order 2277 # changed: palette of colors selected to be easily distinguishable by 2278 # color-blind individuals with either protanopia or deuteranopia. Bang 2279 # Wong [2011] Nature Methods 8, 441. 2280 2281 set style line 1 lt 1 lc rgb "#009e73" lw 1.3 2282 set style line 101 lt 1 lc rgb "#009e73" lw 1.3 dt (6,3) 2283 set style line 11 lt 2 lc rgb "#009e73" lw 1.3 dt (6,3) 2284 set style line 21 lt 4 lc rgb "#009e73" lw 1.3 dt (3,2) 2285 set style line 31 lt 6 lc rgb "#009e73" lw 1.3 dt (2,1) 2286 set style line 41 lt 8 lc rgb "#009e73" lw 1.3 dt (4,3) 2287 2288 set style line 2 lt 1 lc rgb "#0072b2" lw 1.3 2289 set style line 102 lt 1 lc rgb "#0072b2" lw 1.3 dt (6,3) 2290 set style line 12 lt 2 lc rgb "#0072b2" lw 1.3 dt (6,3) 2291 set style line 22 lt 4 lc rgb "#0072b2" lw 1.3 dt (3,2) 2292 set style line 32 lt 6 lc rgb "#0072b2" lw 1.3 dt (2,1) 2293 set style line 42 lt 8 lc rgb "#0072b2" lw 1.3 dt (4,3) 2294 2295 2296 set style line 3 lt 1 lc rgb "#d55e00" lw 1.3 2297 set style line 103 lt 1 lc rgb "#d55e00" lw 1.3 dt (6,3) 2298 set style line 13 lt 2 lc rgb "#d55e00" lw 1.3 dt (6,3) 2299 set style line 23 lt 4 lc rgb "#d55e00" lw 1.3 dt (3,2) 2300 set style line 33 lt 6 lc rgb "#d55e00" lw 1.3 dt (2,1) 2301 set style line 43 lt 8 lc rgb "#d55e00" lw 1.3 dt (4,3) 2302 2303 set style line 4 lt 1 lc rgb "#f0e442" lw 1.3 2304 set style line 104 lt 1 lc rgb "#f0e442" lw 1.3 dt (6,3) 2305 set style line 14 lt 2 lc rgb "#f0e442" lw 1.3 dt (6,3) 2306 set style line 24 lt 4 lc rgb "#f0e442" lw 1.3 dt (3,2) 2307 set style line 34 lt 6 lc rgb "#f0e442" lw 1.3 dt (2,1) 2308 set style line 44 lt 8 lc rgb "#f0e442" lw 1.3 dt (4,3) 2309 2310 set style line 5 lt 1 lc rgb "#56b4e9" lw 1.3 2311 set style line 105 lt 1 lc rgb "#56b4e9" lw 1.3 dt (6,3) 2312 set style line 15 lt 2 lc rgb "#56b4e9" lw 1.3 dt (6,3) 2313 set style line 25 lt 4 lc rgb "#56b4e9" lw 1.3 dt (3,2) 2314 set style line 35 lt 6 lc rgb "#56b4e9" lw 1.3 dt (2,1) 2315 set style line 45 lt 8 lc rgb "#56b4e9" lw 1.3 dt (4,3) 2316 2317 set style line 6 lt 1 lc rgb "#cc79a7" lw 1.3 2318 set style line 106 lt 1 lc rgb "#cc79a7" lw 1.3 dt (6,3) 2319 set style line 16 lt 2 lc rgb "#cc79a7" lw 1.3 dt (6,3) 2320 set style line 26 lt 4 lc rgb "#cc79a7" lw 1.3 dt (3,2) 2321 set style line 36 lt 6 lc rgb "#cc79a7" lw 1.3 dt (2,1) 2322 set style line 46 lt 8 lc rgb "#cc79a7" lw 1.3 dt (4,3) 2323 2324 set style line 7 lt 1 lc rgb "#e69f00" lw 1.3 2325 set style line 107 lt 1 lc rgb "#e69f00" lw 1.3 dt (6,3) 2326 set style line 17 lt 2 lc rgb "#e69f00" lw 1.3 dt (6,3) 2327 set style line 27 lt 4 lc rgb "#e69f00" lw 1.3 dt (3,2) 2328 set style line 37 lt 6 lc rgb "#e69f00" lw 1.3 dt (2,1) 2329 set style line 47 lt 8 lc rgb "#e69f00" lw 1.3 dt (4,3) 2330 2331 set style line 8 lt 1 lc rgb "black" lw 1.3 2332 set style line 108 lt 1 lc rgb "black" lw 1.3 dt (6,3) 2333 set style line 18 lt 2 lc rgb "black" lw 1.3 dt (6,3) 2334 set style line 28 lt 4 lc rgb "black" lw 1.3 dt (3,2) 2335 set style line 38 lt 6 lc rgb "black" lw 1.3 dt (2,1) 2336 set style line 48 lt 8 lc rgb "black" lw 1.3 dt (4,3) 2337 2338 2339 set style line 999 lt 1 lc rgb "gray" lw 1.3 2340 2341 safe(x,y,a) = (y == 0.0 ? a : x/y) 2342 2343 set style data histeps 2344 set key invert 2345 2346 """%(arg_string,output_base_name) 2347 ] 2348 2349 # determine the gnuplot version 2350 try: 2351 p = subprocess.Popen(['gnuplot', '--version'], \ 2352 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 2353 except OSError: 2354 # assume that version 4 of gnuplot is the default if 2355 # gnuplot could not be found 2356 gnuplot_output_list=gnuplot_output_list_v5 2357 else: 2358 output, _ = p.communicate() 2359 if float(output.split()[1]) < 5. : 2360 gnuplot_output_list=gnuplot_output_list_v4 2361 else: 2362 gnuplot_output_list=gnuplot_output_list_v5 2363 2364 2365 # Now output each group one by one 2366 # Block position keeps track of the gnuplot data_block index considered 2367 block_position = 0 2368 for histo_group in self: 2369 # Output this group 2370 block_position = histo_group.output_group(HwU_output_list, 2371 gnuplot_output_list, block_position,output_base_name+'.HwU', 2372 number_of_ratios=number_of_ratios, 2373 uncertainties = uncertainties, 2374 use_band = use_band, 2375 ratio_correlations = ratio_correlations, 2376 jet_samples_to_keep=jet_samples_to_keep, 2377 lhapdfconfig = lhapdfconfig) 2378 2379 # Now write the tail of the gnuplot command file 2380 gnuplot_output_list.extend([ 2381 "unset multiplot", 2382 '!ps2pdf "%s.ps" &> /dev/null'%output_base_name]) 2383 if auto_open: 2384 gnuplot_output_list.append( 2385 '!open "%s.pdf" &> /dev/null'%output_base_name) 2386 2387 # Now write result to stream and close it 2388 gnuplot_stream.write('\n'.join(gnuplot_output_list)) 2389 HwU_stream.write('\n'.join(HwU_output_list)) 2390 gnuplot_stream.close() 2391 HwU_stream.close() 2392 2393 logger.debug("Histograms have been written out at "+\ 2394 "%s.[HwU|gnuplot]' and can "%output_base_name+\ 2395 "now be rendered by invoking gnuplot.")
2396
2397 - def output_group(self, HwU_out, gnuplot_out, block_position, HwU_name, 2398 number_of_ratios = -1, 2399 uncertainties = ['scale','pdf','statitistical','merging_scale','alpsfact'], 2400 use_band = None, 2401 ratio_correlations = True, 2402 jet_samples_to_keep=None, 2403 lhapdfconfig='lhapdf-config'):
2404 2405 """ This functions output a single group of histograms with either one 2406 histograms untyped (i.e. type=None) or two of type 'NLO' and 'LO' 2407 respectively.""" 2408 2409 # This function returns the main central plot line, making sure that 2410 # negative distribution are displayed in dashed style 2411 def get_main_central_plot_lines(HwU_name, block_position, color_index, 2412 title, show_mc_uncertainties): 2413 """ Returns two plot lines, one for the negative contributions in 2414 dashed and one with the positive ones in solid.""" 2415 2416 template = "'%(hwu)s' index %(ind)d using (($1+$2)/2):%(data)s%(stat_col)s%(stat_err)s%(ls)s%(title)s" 2417 template_no_stat = "'%(hwu)s' index %(ind)d using (($1+$2)/2):%(data)s%(ls)s%(title)s" 2418 rep_dic = {'hwu':HwU_name, 2419 'ind':block_position, 2420 'ls':' ls %d'%color_index, 2421 'title':" title '%s'"%title, 2422 'stat_col': ':4', 2423 'stat_err': ' w yerrorbar', 2424 'data':'3', 2425 'linetype':''} 2426 2427 # This would be the original output 2428 # return [template_no_stat%rep_dic]+\ 2429 # ([template%rep_dic] if show_mc_uncertainties else []) 2430 2431 # The use of sqrt(-1) is just a trick to prevent the line to display 2432 res = [] 2433 rep_dic['data'] = '($3 < 0 ? sqrt(-1) : $3)' 2434 res.append(template_no_stat%rep_dic) 2435 rep_dic['title'] = " title ''" 2436 if show_mc_uncertainties: 2437 res.append(template%rep_dic) 2438 rep_dic['data'] = '($3 >= 0 ? sqrt(-1) : abs($3))' 2439 rep_dic['ls'] = ' ls %d'%(100+color_index) 2440 res.append(template_no_stat%rep_dic) 2441 if show_mc_uncertainties: 2442 res.append(template%rep_dic) 2443 return res
2444 2445 # This bool can be modified later to decide whether to use uncertainty 2446 # bands or not 2447 # ======== 2448 def get_uncertainty_lines(HwU_name, block_position, 2449 var_pos, color_index,title, ratio=False, band=False): 2450 """ Return a string line corresponding to the plotting of the 2451 uncertainty. Band is to chose wether to display uncertainty with 2452 a band or two lines.""" 2453 2454 # This perl substitution regular expression copies each line of the 2455 # HwU source and swap the x1 and x2 coordinate of the second copy. 2456 # So if input is: 2457 # 2458 # blabla 2459 # +0.01e+01 0.3 4 5 6 2460 # +0.03e+01 0.5 7 8 9 2461 # ... 2462 # 2463 # The output will be 2464 # 2465 # blabla 2466 # +0.01e+01 0.3 4 5 6 2467 # 0.3 +0.01e+01 4 5 6 2468 # +0.03e+01 0.5 7 8 9 2469 # 0.5 +0.03e+01 7 8 9 2470 # ... 2471 # 2472 copy_swap_re = r"perl -pe 's/^\s*(?<x1>[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?)\s*(?<x2>[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?)(?<rest>.*)\n/ $+{x1} $+{x2} $+{rest}\n$+{x2} $+{x1} $+{rest}\n/g'" 2473 # Gnuplot escapes the antislash, so we must esacape then once more O_o. 2474 # Gnuplot doesn't have raw strings, what a shame... 2475 copy_swap_re = copy_swap_re.replace('\\','\\\\') 2476 # For the ratio, we must divide by the central value 2477 position = '(safe($%d,$3,1.0)-1.0)' if ratio else '%d' 2478 if not band: 2479 return ["'%s' index %d using (($1+$2)/2):%s ls %d title '%s'"\ 2480 %(HwU_name,block_position, position%(var_pos),color_index,title), 2481 "'%s' index %d using (($1+$2)/2):%s ls %d title ''"\ 2482 %(HwU_name,block_position, position%(var_pos+1),color_index)] 2483 else: 2484 return [' "<%s %s" index %d using 1:%s:%s with filledcurve ls %d fs transparent solid 0.2 title \'%s\''%\ 2485 (copy_swap_re,HwU_name,block_position, 2486 position%var_pos,position%(var_pos+1),color_index,title)] 2487 # ======== 2488 2489 2490 layout_geometry = [(0.0, 0.5, 1.0, 0.4 ), 2491 (0.0, 0.35, 1.0, 0.15), 2492 (0.0, 0.2, 1.0, 0.15)] 2493 layout_geometry.reverse() 2494 2495 # Group histograms which just differ by jet multiplicity and add their 2496 # sum as first plot 2497 matching_histo_lists = HwUList([HwUList([self[0]])]) 2498 for histo in self[1:]: 2499 matched = False 2500 for histo_list in matching_histo_lists: 2501 if hasattr(histo, 'jetsample') and histo.jetsample >= 0 and \ 2502 histo.type == histo_list[0].type: 2503 matched = True 2504 histo_list.append(histo) 2505 break 2506 if not matched: 2507 matching_histo_lists.append(HwUList([histo])) 2508 2509 # For each group of histograms with different jet multiplicities, we 2510 # define one at the beginning which is the sum. 2511 self[:] = [] 2512 for histo_group in matching_histo_lists: 2513 # First create a plot that sums all jet multiplicities for each type 2514 # (that is, only if jet multiplicities are defined) 2515 if len(histo_group)==1: 2516 self.append(histo_group[0]) 2517 continue 2518 # If there is already a histogram summing them, then don't create 2519 # a copy of it. 2520 if any(hist.jetsample==-1 for hist in histo_group if 2521 hasattr(hist, 'jetsample')): 2522 self.extend(histo_group) 2523 continue 2524 summed_histogram = copy.copy(histo_group[0]) 2525 for histo in histo_group[1:]: 2526 summed_histogram = summed_histogram + histo 2527 summed_histogram.jetsample = -1 2528 self.append(summed_histogram) 2529 self.extend(histo_group) 2530 2531 # Remove the curve of individual jet samples if they are not desired 2532 if not jet_samples_to_keep is None: 2533 self[:] = filter(lambda histo: 2534 (not hasattr(histo,'jetsample')) or (histo.jetsample == -1) or 2535 (histo.jetsample in jet_samples_to_keep), self) 2536 2537 # This function is to create the ratio histograms if the user turned off 2538 # correlations. 2539 def ratio_no_correlations(wgtsA, wgtsB): 2540 new_wgts = {} 2541 for label, wgt in wgtsA.items(): 2542 if wgtsB['central']==0.0 and wgt==0.0: 2543 new_wgts[label] = 0.0 2544 continue 2545 elif wgtsB['central']==0.0: 2546 # It is ok to skip the warning here. 2547 # logger.debug('Warning:: A bin with finite weight '+ 2548 # 'was divided by a bin with zero weight.') 2549 new_wgts[label] = 0.0 2550 continue 2551 new_wgts[label] = (wgtsA[label]/wgtsB['central']) 2552 return new_wgts 2553 2554 # First compute the ratio of all the histograms from the second to the 2555 # number_of_ratios+1 ones in the list to the first histogram. 2556 n_histograms = len(self) 2557 ratio_histos = HwUList([]) 2558 # A counter to keep track of the number of ratios included 2559 n_ratios_included = 0 2560 for i, histo in enumerate(self[1:]): 2561 if not hasattr(histo,'jetsample') or histo.jetsample==self[0].jetsample: 2562 n_ratios_included += 1 2563 else: 2564 continue 2565 2566 if number_of_ratios >=0 and n_ratios_included > number_of_ratios: 2567 break 2568 2569 if ratio_correlations: 2570 ratio_histos.append(histo/self[0]) 2571 else: 2572 ratio_histos.append(self[0].__class__.combine(histo, self[0], 2573 ratio_no_correlations)) 2574 if self[0].type=='NLO' and self[1].type=='LO': 2575 ratio_histos[-1].title += '1/K-factor' 2576 elif self[0].type=='LO' and self[1].type=='NLO': 2577 ratio_histos[-1].title += 'K-factor' 2578 else: 2579 ratio_histos[-1].title += ' %s/%s'%( 2580 self[1].type if self[1].type else '(%d)'%(i+2), 2581 self[0].type if self[0].type else '(1)') 2582 # By setting its type to aux, we make sure this histogram will be 2583 # filtered out if the .HwU file output here would be re-loaded later. 2584 ratio_histos[-1].type = 'AUX' 2585 self.extend(ratio_histos) 2586 2587 # Compute scale variation envelope for all diagrams 2588 if 'scale' in uncertainties: 2589 (mu_var_pos,mu) = self[0].set_uncertainty(type='all_scale') 2590 else: 2591 (mu_var_pos,mu) = (None,[None]) 2592 2593 if 'pdf' in uncertainties: 2594 (PDF_var_pos,pdf) = self[0].set_uncertainty(type='PDF',lhapdfconfig=lhapdfconfig) 2595 else: 2596 (PDF_var_pos,pdf) = (None,[None]) 2597 2598 if 'merging_scale' in uncertainties: 2599 (merging_var_pos,merging) = self[0].set_uncertainty(type='merging') 2600 else: 2601 (merging_var_pos,merging) = (None,[None]) 2602 if 'alpsfact' in uncertainties: 2603 (alpsfact_var_pos,alpsfact) = self[0].set_uncertainty(type='alpsfact') 2604 else: 2605 (alpsfact_var_pos,alpsfact) = (None,[None]) 2606 2607 uncertainties_present = list(uncertainties) 2608 if PDF_var_pos is None and 'pdf' in uncertainties_present: 2609 uncertainties_present.remove('pdf') 2610 if mu_var_pos is None and 'scale' in uncertainties_present: 2611 uncertainties_present.remove('scale') 2612 if merging_var_pos is None and 'merging' in uncertainties_present: 2613 uncertainties_present.remove('merging') 2614 if alpsfact_var_pos is None and 'alpsfact' in uncertainties_present: 2615 uncertainties_present.remove('alpsfact') 2616 no_uncertainties = len(uncertainties_present)==0 2617 2618 # If the 'use_band' option is None we should adopt a default which is 2619 try: 2620 uncertainties_present.remove('statistical') 2621 except: 2622 pass 2623 if use_band is None: 2624 # For clarity, it is better to only use bands only for one source 2625 # of uncertainty 2626 if len(uncertainties_present)==0: 2627 use_band = [] 2628 elif len(uncertainties_present)==1: 2629 use_band = uncertainties_present 2630 elif 'scale' in uncertainties_present: 2631 use_band = ['scale'] 2632 else: 2633 use_band = [uncertainties_present[0]] 2634 2635 for histo in self[1:]: 2636 if (not mu_var_pos is None) and \ 2637 mu_var_pos != histo.set_uncertainty(type='all_scale')[0]: 2638 raise MadGraph5Error, 'Not all histograms in this group specify'+\ 2639 ' scale uncertainties. It is required to be able to output them'+\ 2640 ' together.' 2641 if (not PDF_var_pos is None) and\ 2642 PDF_var_pos != histo.set_uncertainty(type='PDF',\ 2643 lhapdfconfig=lhapdfconfig)[0]: 2644 raise MadGraph5Error, 'Not all histograms in this group specify'+\ 2645 ' PDF uncertainties. It is required to be able to output them'+\ 2646 ' together.' 2647 if (not merging_var_pos is None) and\ 2648 merging_var_pos != histo.set_uncertainty(type='merging')[0]: 2649 raise MadGraph5Error, 'Not all histograms in this group specify'+\ 2650 ' merging uncertainties. It is required to be able to output them'+\ 2651 ' together.' 2652 if (not alpsfact_var_pos is None) and\ 2653 alpsfact_var_pos != histo.set_uncertainty(type='alpsfact')[0]: 2654 raise MadGraph5Error, 'Not all histograms in this group specify'+\ 2655 ' alpsfact uncertainties. It is required to be able to output them'+\ 2656 ' together.' 2657 2658 2659 # Now output the corresponding HwU histogram data 2660 for i, histo in enumerate(self): 2661 # Print the header the first time only 2662 HwU_out.extend(histo.get_HwU_source(\ 2663 print_header=(block_position==0 and i==0))) 2664 HwU_out.extend(['','']) 2665 2666 # First the global gnuplot header for this histogram group 2667 global_header =\ 2668 """ 2669 ################################################################################ 2670 ### Rendering of the plot titled '%(title)s' 2671 ################################################################################ 2672 2673 set multiplot 2674 set label "%(title)s" font ",13" at graph 0.04, graph 1.05 2675 set xrange [%(xmin).4e:%(xmax).4e] 2676 set bmargin 0 2677 set tmargin 0 2678 set xtics nomirror 2679 set ytics nomirror 2680 set mytics %(mxtics)d 2681 %(set_xtics)s 2682 set key horizontal noreverse maxcols 1 width -4 2683 set label front 'MadGraph5\_aMC\@NLO' font "Courier,11" rotate by 90 at graph 1.02, graph 0.04 2684 """ 2685 2686 # Now the header for each subhistogram 2687 subhistogram_header = \ 2688 """#-- rendering subhistograms '%(subhistogram_type)s' 2689 %(unset label)s 2690 %(set_format_y)s 2691 set yrange [%(ymin).4e:%(ymax).4e] 2692 set origin %(origin_x).4e, %(origin_y).4e 2693 set size %(size_x).4e, %(size_y).4e 2694 set mytics %(mytics)d 2695 %(set_ytics)s 2696 %(set_format_x)s 2697 %(set_yscale)s 2698 %(set_ylabel)s 2699 %(set_histo_label)s 2700 plot \\""" 2701 replacement_dic = {} 2702 2703 replacement_dic['title'] = self[0].get_HwU_histogram_name(format='human-no_type') 2704 # Determine what weight to consider when computing the optimal 2705 # range for the y-axis. 2706 wgts_to_consider = ['central'] 2707 if not mu_var_pos is None: 2708 for mu_var in mu_var_pos: 2709 wgts_to_consider.append(self[0].bins.weight_labels[mu_var]) 2710 wgts_to_consider.append(self[0].bins.weight_labels[mu_var+1]) 2711 wgts_to_consider.append(self[0].bins.weight_labels[mu_var+2]) 2712 if not PDF_var_pos is None: 2713 for PDF_var in PDF_var_pos: 2714 wgts_to_consider.append(self[0].bins.weight_labels[PDF_var]) 2715 wgts_to_consider.append(self[0].bins.weight_labels[PDF_var+1]) 2716 wgts_to_consider.append(self[0].bins.weight_labels[PDF_var+2]) 2717 if not merging_var_pos is None: 2718 for merging_var in merging_var_pos: 2719 wgts_to_consider.append(self[0].bins.weight_labels[merging_var]) 2720 wgts_to_consider.append(self[0].bins.weight_labels[merging_var+1]) 2721 wgts_to_consider.append(self[0].bins.weight_labels[merging_var+2]) 2722 if not alpsfact_var_pos is None: 2723 for alpsfact_var in alpsfact_var_pos: 2724 wgts_to_consider.append(self[0].bins.weight_labels[alpsfact_var]) 2725 wgts_to_consider.append(self[0].bins.weight_labels[alpsfact_var+1]) 2726 wgts_to_consider.append(self[0].bins.weight_labels[alpsfact_var+2]) 2727 2728 (xmin, xmax) = HwU.get_x_optimal_range(self[:2],\ 2729 weight_labels = wgts_to_consider) 2730 replacement_dic['xmin'] = xmin 2731 replacement_dic['xmax'] = xmax 2732 replacement_dic['mxtics'] = 10 2733 replacement_dic['set_xtics'] = 'set xtics auto' 2734 2735 # Add the global header which is now ready 2736 gnuplot_out.append(global_header%replacement_dic) 2737 2738 # Now add the main plot 2739 replacement_dic['subhistogram_type'] = '%s and %s results'%( 2740 str(self[0].type),str(self[1].type)) if len(self)>1 else \ 2741 'single diagram output' 2742 (ymin, ymax) = HwU.get_y_optimal_range(self[:2], 2743 labels = wgts_to_consider, scale=self[0].y_axis_mode) 2744 2745 # Force a linear scale if the detected range is negative 2746 if ymin< 0.0: 2747 self[0].y_axis_mode = 'LIN' 2748 2749 # Already add a margin on upper bound. 2750 if self[0].y_axis_mode=='LOG': 2751 ymax += 10.0 * ymax 2752 ymin -= 0.1 * ymin 2753 else: 2754 ymax += 0.3 * (ymax - ymin) 2755 ymin -= 0.3 * (ymax - ymin) 2756 2757 replacement_dic['ymin'] = ymin 2758 replacement_dic['ymax'] = ymax 2759 replacement_dic['unset label'] = '' 2760 (replacement_dic['origin_x'], replacement_dic['origin_y'], 2761 replacement_dic['size_x'], replacement_dic['size_y']) = layout_geometry.pop() 2762 replacement_dic['mytics'] = 10 2763 # Use default choise for the main histogram 2764 replacement_dic['set_ytics'] = 'set ytics auto' 2765 replacement_dic['set_format_x'] = "set format x ''" if \ 2766 (len(self)-n_histograms>0 or not no_uncertainties) else "set format x" 2767 replacement_dic['set_ylabel'] = 'set ylabel "{/Symbol s} per bin [pb]"' 2768 replacement_dic['set_yscale'] = "set logscale y" if \ 2769 self[0].y_axis_mode=='LOG' else 'unset logscale y' 2770 replacement_dic['set_format_y'] = "set format y '10^{%T}'" if \ 2771 self[0].y_axis_mode=='LOG' else 'unset format' 2772 2773 replacement_dic['set_histo_label'] = "" 2774 gnuplot_out.append(subhistogram_header%replacement_dic) 2775 2776 # Now add the main layout 2777 plot_lines = [] 2778 uncertainty_plot_lines = [] 2779 n=-1 2780 2781 for i, histo in enumerate(self[:n_histograms]): 2782 n=n+1 2783 color_index = n%self.number_line_colors_defined+1 2784 # Label to appear for the lower curves 2785 title = [] 2786 if histo.type is None and not hasattr(histo, 'jetsample'): 2787 title.append('%d'%(i+1)) 2788 else: 2789 if histo.type: 2790 title.append('NLO' if \ 2791 histo.type.split()[0]=='NLO' else histo.type) 2792 if hasattr(histo, 'jetsample'): 2793 if histo.jetsample!=-1: 2794 title.append('jet sample %d'%histo.jetsample) 2795 else: 2796 title.append('all jet samples') 2797 2798 title = ', '.join(title) 2799 # Label for the first curve in the upper plot 2800 if histo.type is None and not hasattr(histo, 'jetsample'): 2801 major_title = 'central value for plot (%d)'%(i+1) 2802 else: 2803 major_title = [] 2804 if not histo.type is None: 2805 major_title.append(histo.type) 2806 if hasattr(histo, 'jetsample'): 2807 if histo.jetsample!=-1: 2808 major_title.append('jet sample %d'%histo.jetsample) 2809 else: 2810 major_title.append('all jet samples') 2811 else: 2812 major_title.append('central value') 2813 major_title = ', '.join(major_title) 2814 2815 if not mu[0] in ['none',None]: 2816 major_title += ', dynamical\_scale\_choice=%s'%mu[0] 2817 if not pdf[0] in ['none',None]: 2818 major_title += ', PDF=%s'%pdf[0].replace('_','\_') 2819 2820 # Do not show uncertainties for individual jet samples (unless first 2821 # or specified explicitely and uniquely) 2822 if not (i!=0 and hasattr(histo,'jetsample') and histo.jetsample!=-1 and \ 2823 not (jet_samples_to_keep and len(jet_samples_to_keep)==1 and 2824 jet_samples_to_keep[0] == histo.jetsample)): 2825 2826 uncertainty_plot_lines.append({}) 2827 2828 # We decide to show uncertainties in the main plot only if they 2829 # are part of a monocolor band. Otherwise, they will only be 2830 # shown in the first subplot. Notice that plotting 'sqrt(-1)' 2831 # is just a trick so as to have only the key printed with no 2832 # line 2833 2834 # Show scale variation for the first central value if available 2835 if not mu_var_pos is None and len(mu_var_pos)>0: 2836 if 'scale' in use_band: 2837 uncertainty_plot_lines[-1]['scale'] = get_uncertainty_lines( 2838 HwU_name, block_position+i, mu_var_pos[0]+4, color_index+10, 2839 '%s, scale variation'%title, band='scale' in use_band) 2840 else: 2841 uncertainty_plot_lines[-1]['scale'] = \ 2842 ["sqrt(-1) ls %d title '%s'"%(color_index+10,'%s, scale variation'%title)] 2843 # And now PDF_variation if available 2844 if not PDF_var_pos is None and len(PDF_var_pos)>0: 2845 if 'pdf' in use_band: 2846 uncertainty_plot_lines[-1]['pdf'] = get_uncertainty_lines( 2847 HwU_name,block_position+i, PDF_var_pos[0]+4, color_index+20, 2848 '%s, PDF variation'%title, band='pdf' in use_band) 2849 else: 2850 uncertainty_plot_lines[-1]['pdf'] = \ 2851 ["sqrt(-1) ls %d title '%s'"%(color_index+20,'%s, PDF variation'%title)] 2852 # And now merging variation if available 2853 if not merging_var_pos is None and len(merging_var_pos)>0: 2854 if 'merging_scale' in use_band: 2855 uncertainty_plot_lines[-1]['merging_scale'] = get_uncertainty_lines( 2856 HwU_name,block_position+i, merging_var_pos[0]+4, color_index+30, 2857 '%s, merging scale variation'%title, band='merging_scale' in use_band) 2858 else: 2859 uncertainty_plot_lines[-1]['merging_scale'] = \ 2860 ["sqrt(-1) ls %d title '%s'"%(color_index+30,'%s, merging scale variation'%title)] 2861 # And now alpsfact variation if available 2862 if not alpsfact_var_pos is None and len(alpsfact_var_pos)>0: 2863 if 'alpsfact' in use_band: 2864 uncertainty_plot_lines[-1]['alpsfact'] = get_uncertainty_lines( 2865 HwU_name,block_position+i, alpsfact_var_pos[0]+4, color_index+40, 2866 '%s, alpsfact variation'%title, band='alpsfact' in use_band) 2867 else: 2868 uncertainty_plot_lines[-1]['alpsfact'] = \ 2869 ["sqrt(-1) ls %d title '%s'"%(color_index+40,'%s, alpsfact variation'%title)] 2870 2871 # plot_lines.append( 2872 # "'%s' index %d using (($1+$2)/2):3 ls %d title '%s'"\ 2873 # %(HwU_name,block_position+i,color_index, major_title)) 2874 # if 'statistical' in uncertainties: 2875 # plot_lines.append( 2876 # "'%s' index %d using (($1+$2)/2):3:4 w yerrorbar ls %d title ''"\ 2877 # %(HwU_name,block_position+i,color_index)) 2878 plot_lines.extend( 2879 get_main_central_plot_lines(HwU_name, block_position+i, 2880 color_index, major_title, 'statistical' in uncertainties)) 2881 2882 # Add additional central scale/PDF curves 2883 if not mu_var_pos is None: 2884 for j,mu_var in enumerate(mu_var_pos): 2885 if j!=0: 2886 n=n+1 2887 color_index = n%self.number_line_colors_defined+1 2888 plot_lines.append( 2889 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\ 2890 %(HwU_name,block_position+i,mu_var+3,color_index,\ 2891 '%s dynamical\_scale\_choice=%s' % (title,mu[j]))) 2892 # And now PDF_variation if available 2893 if not PDF_var_pos is None: 2894 for j,PDF_var in enumerate(PDF_var_pos): 2895 if j!=0: 2896 n=n+1 2897 color_index = n%self.number_line_colors_defined+1 2898 plot_lines.append( 2899 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\ 2900 %(HwU_name,block_position+i,PDF_var+3,color_index,\ 2901 '%s PDF=%s' % (title,pdf[j].replace('_','\_')))) 2902 2903 # Now add the uncertainty lines, those not using a band so that they 2904 # are not covered by those using a band after we reverse plo_lines 2905 for one_plot in uncertainty_plot_lines: 2906 for uncertainty_type, lines in one_plot.items(): 2907 if not uncertainty_type in use_band: 2908 plot_lines.extend(lines) 2909 # then those using a band 2910 for one_plot in uncertainty_plot_lines: 2911 for uncertainty_type, lines in one_plot.items(): 2912 if uncertainty_type in use_band: 2913 plot_lines.extend(lines) 2914 2915 # Reverse so that bands appear first 2916 plot_lines.reverse() 2917 2918 # Add the plot lines 2919 gnuplot_out.append(',\\\n'.join(plot_lines)) 2920 2921 # Now we can add the scale variation ratio 2922 replacement_dic['subhistogram_type'] = 'Relative scale and PDF uncertainty' 2923 2924 if 'statistical' in uncertainties: 2925 wgts_to_consider.append('stat_error') 2926 2927 # This function is just to temporarily create the scale ratio histogram with 2928 # the hwu.combine function. 2929 def rel_scale(wgtsA, wgtsB): 2930 new_wgts = {} 2931 for label, wgt in wgtsA.items(): 2932 if label in wgts_to_consider: 2933 if wgtsB['central']==0.0 and wgt==0.0: 2934 new_wgts[label] = 0.0 2935 continue 2936 elif wgtsB['central']==0.0: 2937 # It is ok to skip the warning here. 2938 # logger.debug('Warning:: A bin with finite weight '+ 2939 # 'was divided by a bin with zero weight.') 2940 new_wgts[label] = 0.0 2941 continue 2942 new_wgts[label] = (wgtsA[label]/wgtsB['central']) 2943 if label != 'stat_error': 2944 new_wgts[label] -= 1.0 2945 else: 2946 new_wgts[label] = wgtsA[label] 2947 return new_wgts 2948 2949 histos_for_subplots = [(i,histo) for i, histo in enumerate(self[:n_histograms]) if 2950 ( not (i!=0 and hasattr(histo,'jetsample') and histo.jetsample!=-1 and \ 2951 not (jet_samples_to_keep and len(jet_samples_to_keep)==1 and 2952 jet_samples_to_keep[0] == histo.jetsample)) )] 2953 2954 # Notice even though a ratio histogram is created here, it 2955 # is not actually used to plot the quantity in gnuplot, but just to 2956 # compute the y range. 2957 (ymin, ymax) = HwU.get_y_optimal_range([histo[1].__class__.combine( 2958 histo[1],histo[1],rel_scale) for histo in histos_for_subplots], 2959 labels = wgts_to_consider, scale='LIN') 2960 2961 # Add a margin on upper and lower bound. 2962 ymax = ymax + 0.2 * (ymax - ymin) 2963 ymin = ymin - 0.2 * (ymax - ymin) 2964 replacement_dic['unset label'] = 'unset label' 2965 replacement_dic['ymin'] = ymin 2966 replacement_dic['ymax'] = ymax 2967 if not no_uncertainties: 2968 (replacement_dic['origin_x'], replacement_dic['origin_y'], 2969 replacement_dic['size_x'], replacement_dic['size_y']) = layout_geometry.pop() 2970 replacement_dic['mytics'] = 2 2971 # replacement_dic['set_ytics'] = 'set ytics %f'%((int(10*(ymax-ymin))/10)/3.0) 2972 replacement_dic['set_ytics'] = 'set ytics auto' 2973 replacement_dic['set_format_x'] = "set format x ''" if \ 2974 len(self)-n_histograms>0 else "set format x" 2975 replacement_dic['set_ylabel'] = 'set ylabel "%s rel.unc."'\ 2976 %('(1)' if self[0].type==None else '%s'%('NLO' if \ 2977 self[0].type.split()[0]=='NLO' else self[0].type)) 2978 replacement_dic['set_yscale'] = "unset logscale y" 2979 replacement_dic['set_format_y'] = 'unset format' 2980 2981 2982 tit='Relative uncertainties w.r.t. central value' 2983 if n_histograms > 1: 2984 tit=tit+'s' 2985 # if (not mu_var_pos is None and 'scale' not in use_band): 2986 # tit=tit+', scale is dashed' 2987 # if (not PDF_var_pos is None and 'pdf' not in use_band): 2988 # tit=tit+', PDF is dotted' 2989 replacement_dic['set_histo_label'] = \ 2990 'set label "%s" font ",9" front at graph 0.03, graph 0.13' % tit 2991 # Simply don't add these lines if there are no uncertainties. 2992 # This meant uncessary extra work, but I no longer car at this point 2993 if not no_uncertainties: 2994 gnuplot_out.append(subhistogram_header%replacement_dic) 2995 2996 # Now add the first subhistogram 2997 plot_lines = [] 2998 uncertainty_plot_lines = [] 2999 n=-1 3000 for (i,histo) in histos_for_subplots: 3001 n=n+1 3002 k=n 3003 color_index = n%self.number_line_colors_defined+1 3004 # Plot uncertainties 3005 if not mu_var_pos is None: 3006 for j,mu_var in enumerate(mu_var_pos): 3007 uncertainty_plot_lines.append({}) 3008 if j==0: 3009 color_index = k%self.number_line_colors_defined+1 3010 else: 3011 n=n+1 3012 color_index = n%self.number_line_colors_defined+1 3013 # Add the central line only if advanced scale variation 3014 if j>0 or mu[j]!='none': 3015 plot_lines.append( 3016 "'%s' index %d using (($1+$2)/2):(safe($%d,$3,1.0)-1.0) ls %d title ''"\ 3017 %(HwU_name,block_position+i,mu_var+3,color_index)) 3018 uncertainty_plot_lines[-1]['scale'] = get_uncertainty_lines( 3019 HwU_name, block_position+i, mu_var+4, color_index+10,'', 3020 ratio=True, band='scale' in use_band) 3021 if not PDF_var_pos is None: 3022 for j,PDF_var in enumerate(PDF_var_pos): 3023 uncertainty_plot_lines.append({}) 3024 if j==0: 3025 color_index = k%self.number_line_colors_defined+1 3026 else: 3027 n=n+1 3028 color_index = n%self.number_line_colors_defined+1 3029 # Add the central line only if advanced pdf variation 3030 if j>0 or pdf[j]!='none': 3031 plot_lines.append( 3032 "'%s' index %d using (($1+$2)/2):(safe($%d,$3,1.0)-1.0) ls %d title ''"\ 3033 %(HwU_name,block_position+i,PDF_var+3,color_index)) 3034 uncertainty_plot_lines[-1]['pdf'] = get_uncertainty_lines( 3035 HwU_name, block_position+i, PDF_var+4, color_index+20,'', 3036 ratio=True, band='pdf' in use_band) 3037 if not merging_var_pos is None: 3038 for j,merging_var in enumerate(merging_var_pos): 3039 uncertainty_plot_lines.append({}) 3040 if j==0: 3041 color_index = k%self.number_line_colors_defined+1 3042 else: 3043 n=n+1 3044 color_index = n%self.number_line_colors_defined+1 3045 if j>0 or merging[j]!='none': 3046 plot_lines.append( 3047 "'%s' index %d using (($1+$2)/2):(safe($%d,$3,1.0)-1.0) ls %d title ''"\ 3048 %(HwU_name,block_position+i,merging_var+3,color_index)) 3049 uncertainty_plot_lines[-1]['merging_scale'] = get_uncertainty_lines( 3050 HwU_name, block_position+i, merging_var+4, color_index+30,'', 3051 ratio=True, band='merging_scale' in use_band) 3052 if not alpsfact_var_pos is None: 3053 for j,alpsfact_var in enumerate(alpsfact_var_pos): 3054 uncertainty_plot_lines.append({}) 3055 if j==0: 3056 color_index = k%self.number_line_colors_defined+1 3057 else: 3058 n=n+1 3059 color_index = n%self.number_line_colors_defined+1 3060 if j>0 or alpsfact[j]!='none': 3061 plot_lines.append( 3062 "'%s' index %d using (($1+$2)/2):(safe($%d,$3,1.0)-1.0) ls %d title ''"\ 3063 %(HwU_name,block_position+i,alpsfact_var+3,color_index)) 3064 uncertainty_plot_lines[-1]['alpsfact'] = get_uncertainty_lines( 3065 HwU_name, block_position+i, alpsfact_var+4, color_index+40,'', 3066 ratio=True, band='alpsfact' in use_band) 3067 3068 if 'statistical' in uncertainties: 3069 plot_lines.append( 3070 "'%s' index %d using (($1+$2)/2):(0.0):(safe($4,$3,0.0)) w yerrorbar ls %d title ''"%\ 3071 (HwU_name,block_position+i,color_index)) 3072 3073 plot_lines.append("0.0 ls 999 title ''") 3074 3075 # Now add the uncertainty lines, those not using a band so that they 3076 # are not covered by those using a band after we reverse plo_lines 3077 for one_plot in uncertainty_plot_lines: 3078 for uncertainty_type, lines in one_plot.items(): 3079 if not uncertainty_type in use_band: 3080 plot_lines.extend(lines) 3081 # then those using a band 3082 for one_plot in uncertainty_plot_lines: 3083 for uncertainty_type, lines in one_plot.items(): 3084 if uncertainty_type in use_band: 3085 plot_lines.extend(lines) 3086 3087 # Reverse so that bands appear first 3088 plot_lines.reverse() 3089 # Add the plot lines 3090 if not no_uncertainties: 3091 gnuplot_out.append(',\\\n'.join(plot_lines)) 3092 3093 # We finish here when no ratio plot are asked for. 3094 if len(self)-n_histograms==0: 3095 # Now add the tail for this group 3096 gnuplot_out.extend(['','unset label','', 3097 '################################################################################']) 3098 # Return the starting data_block position for the next histogram group 3099 return block_position+len(self) 3100 3101 # We can finally add the last subhistograms for the ratios. 3102 ratio_name_long='(' 3103 for i, histo in enumerate(self[:n_histograms]): 3104 if i==0: continue 3105 ratio_name_long+='%d'%(i+1) if histo.type is None else ('NLO' if \ 3106 histo.type.split()[0]=='NLO' else histo.type) 3107 ratio_name_long+=')/' 3108 ratio_name_long+=('(1' if self[0].type==None else '(%s'%('NLO' if \ 3109 self[0].type.split()[0]=='NLO' else self[0].type))+' central value)' 3110 3111 ratio_name_short = 'ratio w.r.t. '+('1' if self[0].type==None else '%s'%('NLO' if \ 3112 self[0].type.split()[0]=='NLO' else self[0].type)) 3113 3114 replacement_dic['subhistogram_type'] = '%s ratio'%ratio_name_long 3115 replacement_dic['set_ylabel'] = 'set ylabel "%s"'%ratio_name_short 3116 3117 (ymin, ymax) = HwU.get_y_optimal_range(self[n_histograms:], 3118 labels = wgts_to_consider, scale='LIN',Kratio = True) 3119 3120 # Add a margin on upper and lower bound. 3121 ymax = ymax + 0.2 * (ymax - ymin) 3122 ymin = ymin - 0.2 * (ymax - ymin) 3123 replacement_dic['unset label'] = 'unset label' 3124 replacement_dic['ymin'] = ymin 3125 replacement_dic['ymax'] = ymax 3126 (replacement_dic['origin_x'], replacement_dic['origin_y'], 3127 replacement_dic['size_x'], replacement_dic['size_y']) = layout_geometry.pop() 3128 replacement_dic['mytics'] = 2 3129 # replacement_dic['set_ytics'] = 'set ytics %f'%((int(10*(ymax-ymin))/10)/10.0) 3130 replacement_dic['set_ytics'] = 'set ytics auto' 3131 replacement_dic['set_format_x'] = "set format x" 3132 replacement_dic['set_yscale'] = "unset logscale y" 3133 replacement_dic['set_format_y'] = 'unset format' 3134 replacement_dic['set_histo_label'] = \ 3135 'set label "%s" font ",9" at graph 0.03, graph 0.13'%ratio_name_long 3136 # 'set label "NLO/LO (K-factor)" font ",9" at graph 0.82, graph 0.13' 3137 gnuplot_out.append(subhistogram_header%replacement_dic) 3138 3139 uncertainty_plot_lines = [] 3140 plot_lines = [] 3141 3142 # Some crap to get the colors right I suppose... 3143 n=-1 3144 n=n+1 3145 if not mu_var_pos is None: 3146 for j,mu_var in enumerate(mu_var_pos): 3147 if j!=0: n=n+1 3148 if not PDF_var_pos is None: 3149 for j,PDF_var in enumerate(PDF_var_pos): 3150 if j!=0: n=n+1 3151 if not merging_var_pos is None: 3152 for j,merging_var in enumerate(merging_var_pos): 3153 if j!=0: n=n+1 3154 if not alpsfact_var_pos is None: 3155 for j,alpsfact_var in enumerate(alpsfact_var_pos): 3156 if j!=0: n=n+1 3157 3158 for i_histo_ratio, histo_ration in enumerate(self[n_histograms:]): 3159 n=n+1 3160 k=n 3161 block_ratio_pos = block_position+n_histograms+i_histo_ratio 3162 color_index = n%self.number_line_colors_defined+1 3163 # Now add the subhistograms 3164 plot_lines.append( 3165 "'%s' index %d using (($1+$2)/2):3 ls %d title ''"%\ 3166 (HwU_name,block_ratio_pos,color_index)) 3167 if 'statistical' in uncertainties: 3168 plot_lines.append( 3169 "'%s' index %d using (($1+$2)/2):3:4 w yerrorbar ls %d title ''"%\ 3170 (HwU_name,block_ratio_pos,color_index)) 3171 3172 # Then the scale variations 3173 if not mu_var_pos is None: 3174 for j,mu_var in enumerate(mu_var_pos): 3175 uncertainty_plot_lines.append({}) 3176 if j==0: 3177 color_index = k%self.number_line_colors_defined+1 3178 else: 3179 n=n+1 3180 color_index = n%self.number_line_colors_defined+1 3181 # Only print out the additional central value for advanced scale variation 3182 if j>0 or mu[j]!='none': 3183 plot_lines.append( 3184 "'%s' index %d using (($1+$2)/2):%d ls %d title ''"\ 3185 %(HwU_name,block_ratio_pos,mu_var+3,color_index)) 3186 uncertainty_plot_lines[-1]['scale'] = get_uncertainty_lines( 3187 HwU_name, block_ratio_pos, mu_var+4, color_index+10,'', 3188 band='scale' in use_band) 3189 if not PDF_var_pos is None: 3190 for j,PDF_var in enumerate(PDF_var_pos): 3191 uncertainty_plot_lines.append({}) 3192 if j==0: 3193 color_index = k%self.number_line_colors_defined+1 3194 else: 3195 n=n+1 3196 color_index = n%self.number_line_colors_defined+1 3197 # Only print out the additional central value for advanced pdf variation 3198 if j>0 or pdf[j]!='none': 3199 plot_lines.append( 3200 "'%s' index %d using (($1+$2)/2):%d ls %d title ''"\ 3201 %(HwU_name,block_ratio_pos,PDF_var+3,color_index)) 3202 uncertainty_plot_lines[-1]['pdf'] = get_uncertainty_lines( 3203 HwU_name, block_ratio_pos, PDF_var+4, color_index+20,'', 3204 band='pdf' in use_band) 3205 if not merging_var_pos is None: 3206 for j,merging_var in enumerate(merging_var_pos): 3207 uncertainty_plot_lines.append({}) 3208 if j==0: 3209 color_index = k%self.number_line_colors_defined+1 3210 else: 3211 n=n+1 3212 color_index = n%self.number_line_colors_defined+1 3213 if j>0 or merging[j]!='none': 3214 plot_lines.append( 3215 "'%s' index %d using (($1+$2)/2):%d ls %d title ''"\ 3216 %(HwU_name,block_ratio_pos,merging_var+3,color_index)) 3217 uncertainty_plot_lines[-1]['merging_scale'] = get_uncertainty_lines( 3218 HwU_name, block_ratio_pos, merging_var+4, color_index+30,'', 3219 band='merging_scale' in use_band) 3220 if not alpsfact_var_pos is None: 3221 for j,alpsfact_var in enumerate(alpsfact_var_pos): 3222 uncertainty_plot_lines.append({}) 3223 if j==0: 3224 color_index = k%self.number_line_colors_defined+1 3225 else: 3226 n=n+1 3227 color_index = n%self.number_line_colors_defined+1 3228 if j>0 or alpsfact[j]!='none': 3229 plot_lines.append( 3230 "'%s' index %d using (($1+$2)/2):%d ls %d title ''"\ 3231 %(HwU_name,block_ratio_pos,alpsfact_var+3,color_index)) 3232 uncertainty_plot_lines[-1]['alpsfact'] = get_uncertainty_lines( 3233 HwU_name, block_ratio_pos, alpsfact_var+4, color_index+40,'', 3234 band='alpsfact' in use_band) 3235 3236 # Now add the uncertainty lines, those not using a band so that they 3237 # are not covered by those using a band after we reverse plo_lines 3238 for one_plot in uncertainty_plot_lines: 3239 for uncertainty_type, lines in one_plot.items(): 3240 if not uncertainty_type in use_band: 3241 plot_lines.extend(lines) 3242 # then those using a band 3243 for one_plot in uncertainty_plot_lines: 3244 for uncertainty_type, lines in one_plot.items(): 3245 if uncertainty_type in use_band: 3246 plot_lines.extend(lines) 3247 3248 plot_lines.append("1.0 ls 999 title ''") 3249 3250 # Reverse so that bands appear first 3251 plot_lines.reverse() 3252 # Add the plot lines 3253 gnuplot_out.append(',\\\n'.join(plot_lines)) 3254 3255 # Now add the tail for this group 3256 gnuplot_out.extend(['','unset label','', 3257 '################################################################################']) 3258 3259 # Return the starting data_block position for the next histogram group 3260 return block_position+len(self) 3261
3262 ################################################################################ 3263 ## matplotlib related function 3264 ################################################################################ 3265 -def plot_ratio_from_HWU(path, ax, hwu_variable, hwu_numerator, hwu_denominator, *args, **opts):
3266 """INPUT: 3267 - path can be a path to HwU or an HwUList instance 3268 - ax is the matplotlib frame where to do the plot 3269 - hwu_variable is the histograms to consider 3270 - hwu_numerator is the numerator of the ratio plot 3271 - hwu_denominator is the denominator of the ratio plot 3272 OUTPUT: 3273 - adding the curves to the plot 3274 - return the HwUList 3275 """ 3276 3277 if isinstance(path, str): 3278 hwu = HwUList(path, raw_labels=True) 3279 else: 3280 hwu = path 3281 3282 if 'hwu_denominator_path' in opts: 3283 print 'found second hwu' 3284 if isinstance(opts['hwu_denominator_path'],str): 3285 hwu2 = HwUList(path, raw_labels=True) 3286 else: 3287 hwu2 = opts['hwu_denominator_path'] 3288 del opts['hwu_denominator_path'] 3289 else: 3290 hwu2 = hwu 3291 3292 3293 select_hist = hwu.get(hwu_variable) 3294 select_hist2 = hwu2.get(hwu_variable) 3295 bins = select_hist.get('bins') 3296 num = select_hist.get(hwu_numerator) 3297 denom = select_hist2.get(hwu_denominator) 3298 ratio = [num[i]/denom[i] if denom[i] else 1 for i in xrange(len(bins))] 3299 if 'drawstyle' not in opts: 3300 opts['drawstyle'] = 'steps' 3301 ax.plot(bins, ratio, *args, **opts) 3302 return hwu
3303
3304 -def plot_from_HWU(path, ax, hwu_variable, hwu_central, *args, **opts):
3305 """INPUT: 3306 - path can be a path to HwU or an HwUList instance 3307 - ax is the matplotlib frame where to do the plot 3308 - hwu_variable is the histograms to consider 3309 - hwu_central is the central curve to consider 3310 - hwu_error is the error band to consider (optional: Default is no band) 3311 - hwu_error_mode is how to compute the error band (optional) 3312 OUTPUT: 3313 - adding the curves to the plot 3314 - return the HwUList 3315 - return the line associated to the central (can be used to get the color) 3316 """ 3317 3318 # Handle optional parameter 3319 if 'hwu_error' in opts: 3320 hwu_error = opts['hwu_error'] 3321 del opts['hwu_error'] 3322 else: 3323 hwu_error = None 3324 3325 if 'hwu_error_mode' in opts: 3326 hwu_error_mode = opts['hwu_error_mode'] 3327 del opts['hwu_error_mode'] 3328 else: 3329 hwu_error_mode = None 3330 3331 if 'hwu_mult' in opts: 3332 hwu_mult = opts['hwu_mult'] 3333 del opts['hwu_mult'] 3334 else: 3335 hwu_mult = 1 3336 3337 if isinstance(path, str): 3338 hwu = HwUList(path, raw_labels=True) 3339 else: 3340 hwu = path 3341 3342 3343 select_hist = hwu.get(hwu_variable) 3344 bins = select_hist.get('bins') 3345 central_value = select_hist.get(hwu_central) 3346 if hwu_mult != 1: 3347 central_value = [hwu_mult*b for b in central_value] 3348 if 'drawstyle' not in opts: 3349 opts['drawstyle'] = 'steps' 3350 H, = ax.plot(bins, central_value, *args, **opts) 3351 3352 # Add error band 3353 if hwu_error: 3354 if not 'hwu_error_mode' in opts: 3355 opts['hwu_error_mode']=None 3356 h_min, h_max = select_hist.get_uncertainty_band(hwu_error, mode=hwu_error_mode) 3357 if hwu_mult != 1: 3358 h_min = [hwu_mult*b for b in h_min] 3359 h_max = [hwu_mult*b for b in h_max] 3360 fill_between_steps(bins, h_min, h_max, ax=ax, facecolor=H.get_color(), 3361 alpha=0.5, edgecolor=H.get_color(),hatch='/') 3362 3363 return hwu, H
3364 3365 3366 3367 3368 3369 3370 if __name__ == "__main__": 3371 main_doc = \ 3372 """ For testing and standalone use. Usage: 3373 python histograms.py <.HwU input_file_path_1> <.HwU input_file_path_2> ... --out=<output_file_path.format> <options> 3374 Where <options> can be a list of the following: 3375 '--help' See this message. 3376 '--gnuplot' or '' output the histograms read to gnuplot 3377 '--HwU' to output the histograms read to the raw HwU source. 3378 '--types=<type1>,<type2>,...' to keep only the type<i> when importing histograms. 3379 '--titles=<title1>,<title2>,...' to keep only the titles which have any of 'title<i>' in them (not necessarily equal to them) 3380 '--n_ratios=<integer>' Specifies how many curves must be considerd for the ratios. 3381 '--no_open' Turn off the automatic processing of the gnuplot output. 3382 '--show_full' to show the complete output of what was read. 3383 '--show_short' to show a summary of what was read. 3384 '--simple_ratios' to turn off correlations and error propagation in the ratio. 3385 '--sum' To sum all identical histograms together 3386 '--average' To average over all identical histograms 3387 '--rebin=<n>' Rebin the plots by merging n-consecutive bins together. 3388 '--assign_types=<type1>,<type2>,...' to assign a type to all histograms of the first, second, etc... files loaded. 3389 '--multiply=<fact1>,<fact2>,...' to multiply all histograms of the first, second, etc... files by the fact1, fact2, etc... 3390 '--no_suffix' Do no add any suffix (like '#1, #2, etc..) to the histograms types. 3391 '--lhapdf-config=<PATH_TO_LHAPDF-CONFIG>' give path to lhapdf-config to compute PDF certainties using LHAPDF (only for lhapdf6) 3392 '--jet_samples=[int1,int2]' Specifies what jet samples to keep. 'None' is the default and keeps them all. 3393 '--central_only' This option specifies to disregard all extra weights, so as to make it possible 3394 to take the ratio of plots with different extra weights specified. 3395 '--keep_all_weights' This option specifies to keep in the HwU produced all the weights, even 3396 those which are not known (i.e. that is scale, PDF or merging variation) 3397 For chosing what kind of variation you want to see on your plot, you can use the following options 3398 '--no_<type>' Turn off the plotting of variations of the chosen type 3399 '--only_<type>' Turn on only the plotting of variations of the chosen type 3400 '--variations=['<type1>',...]' Turn on only the plotting of the variations of the list of chosen types 3401 '--band=['<type1>',...]' Chose for which variations one should use uncertainty bands as opposed to lines 3402 The types can be: pdf, scale, stat, merging or alpsfact 3403 For the last two options one can use ...=all to automatically select all types. 3404 3405 When parsing an XML-formatted plot source output by the Pythia8 driver, the file names can be appended 3406 options as suffixes separated by '|', as follows: 3407 python histograms.py <XML_source_file_name>@<option1>@<option2>@etc.. 3408 These options can be 3409 'run_id=<integer>' Specifies the run_ID from which the plots should be loaded. 3410 By default, the first run is considered and the ones that follow are ignored. 3411 'merging_scale=<float>' This option allows to specify to import only the plots corresponding to a specific 3412 value for the merging scale. 3413 A value of -1 means that only the weights with the same merging scale as the central weight are kept. 3414 By default, all weights are considered. 3415 """ 3416 3417 possible_options=['--help', '--gnuplot', '--HwU', '--types','--n_ratios',\ 3418 '--no_open','--show_full','--show_short','--simple_ratios','--sum','--average','--rebin', \ 3419 '--assign_types','--multiply','--no_suffix', '--out', '--jet_samples', 3420 '--no_scale','--no_pdf','--no_stat','--no_merging','--no_alpsfact', 3421 '--only_scale','--only_pdf','--only_stat','--only_merging','--only_alpsfact', 3422 '--variations','--band','--central_only', '--lhapdf-config','--titles', 3423 '--keep_all_weights'] 3424 n_ratios = -1 3425 uncertainties = ['scale','pdf','statistical','merging_scale','alpsfact'] 3426 # The list of type of uncertainties for which to use bands. None is a 'smart' default 3427 use_band = None 3428 auto_open = True 3429 ratio_correlations = True 3430 consider_reweights = ['pdf','scale','murmuf_scales','merging_scale','alpsfact']
3431 3432 - def log(msg):
3433 print "histograms.py :: %s"%str(msg)
3434 3435 if '--help' in sys.argv or len(sys.argv)==1: 3436 log('\n\n%s'%main_doc) 3437 sys.exit(0) 3438 3439 for arg in sys.argv[1:]: 3440 if arg.startswith('--'): 3441 if arg.split('=')[0] not in possible_options: 3442 log('WARNING: option "%s" not valid. It will be ignored' % arg) 3443 3444 arg_string=' '.join(sys.argv) 3445 3446 OutName = "" 3447 for arg in sys.argv[1:]: 3448 if arg.startswith('--out='): 3449 OutName = arg[6:] 3450 3451 accepted_types = [] 3452 for arg in sys.argv[1:]: 3453 if arg.startswith('--types='): 3454 accepted_types = [(type if type!='None' else None) for type in \ 3455 arg[8:].split(',')] 3456 3457 accepted_titles = [] 3458 for arg in sys.argv[1:]: 3459 if arg.startswith('--titles='): 3460 accepted_titles = [(type if type!='None' else None) for type in \ 3461 arg[9:].split(',')] 3462 3463 assigned_types = [] 3464 for arg in sys.argv[1:]: 3465 if arg.startswith('--assign_types='): 3466 assigned_types = [(type if type!='None' else None) for type in \ 3467 arg[15:].split(',')] 3468 3469 jet_samples_to_keep = None 3470 3471 lhapdfconfig = ['lhapdf-config'] 3472 for arg in sys.argv[1:]: 3473 if arg.startswith('--lhapdf-config='): 3474 lhapdfconfig = arg[16:] 3475 3476 no_suffix = False 3477 if '--no_suffix' in sys.argv: 3478 no_suffix = True 3479 3480 if '--central_only' in sys.argv: 3481 consider_reweights = [] 3482 3483 if '--keep_all_weights' in sys.argv: 3484 consider_reweights = 'ALL' 3485 3486 for arg in sys.argv[1:]: 3487 if arg.startswith('--n_ratios='): 3488 n_ratios = int(arg[11:]) 3489 3490 if '--no_open' in sys.argv: 3491 auto_open = False 3492 3493 variation_type_map={'scale':'scale','merging':'merging_scale','pdf':'pdf', 3494 'stat':'statistical','alpsfact':'alpsfact'} 3495 3496 for arg in sys.argv: 3497 try: 3498 opt, value = arg.split('=') 3499 except ValueError: 3500 continue 3501 if opt=='--jet_samples': 3502 jet_samples_to_keep = eval(value) 3503 if opt=='--variations': 3504 uncertainties=[variation_type_map[type] for type in eval(value, 3505 dict([(key,key) for key in variation_type_map.keys()]+ 3506 [('all',variation_type_map.keys())]))] 3507 if opt=='--band': 3508 use_band=[variation_type_map[type] for type in eval(value, 3509 dict([(key,key) for key in variation_type_map.keys()]+ 3510 [('all',[type for type in variation_type_map.keys() if type!='stat'])]))] 3511 3512 if '--simple_ratios' in sys.argv: 3513 ratio_correlations = False 3514 3515 for arg in sys.argv: 3516 if arg.startswith('--no_') and not arg.startswith('--no_open'): 3517 uncertainties.remove(variation_type_map[arg[5:]]) 3518 if arg.startswith('--only_'): 3519 uncertainties= [variation_type_map[arg[7:]]] 3520 break 3521 3522 # Now remove from the weights considered all those not deemed necessary 3523 # in view of which uncertainties are selected 3524 if isinstance(consider_reweights, list): 3525 naming_map={'pdf':'pdf','scale':'scale', 3526 'merging_scale':'merging_scale','alpsfact':'alpsfact'} 3527 for key in naming_map: 3528 if (not key in uncertainties) and (naming_map[key] in consider_reweights): 3529 consider_reweights.remove(naming_map[key]) 3530 3531 n_files = len([_ for _ in sys.argv[1:] if not _.startswith('--')]) 3532 histo_norm = [1.0]*n_files 3533 3534 for arg in sys.argv[1:]: 3535 if arg.startswith('--multiply='): 3536 histo_norm = [(float(fact) if fact!='' else 1.0) for fact in \ 3537 arg[11:].split(',')] 3538 3539 if '--average' in sys.argv: 3540 histo_norm = [hist/float(n_files) for hist in histo_norm] 3541 3542 log("=======") 3543 histo_list = HwUList([]) 3544 for i, arg in enumerate(sys.argv[1:]): 3545 if arg.startswith('--'): 3546 break 3547 log("Loading histograms from '%s'."%arg) 3548 if OutName=="": 3549 OutName = os.path.basename(arg).split('.')[0]+'_output' 3550 # Make sure to process the potential XML options appended to the filename 3551 file_specification = arg.split('@') 3552 filename = file_specification.pop(0) 3553 file_options = {} 3554 for option in file_specification: 3555 opt, value = option.split('=') 3556 if opt=='run_id': 3557 file_options[opt]=int(value) 3558 if opt=='merging_scale': 3559 file_options[opt]=float(value) 3560 else: 3561 log("Unreckognize file option '%s'."%option) 3562 sys.exit(1) 3563 new_histo_list = HwUList(filename, accepted_types_order=accepted_types, 3564 consider_reweights=consider_reweights, **file_options) 3565 # We filter now the diagrams whose title doesn't match the constraints 3566 if len(accepted_titles)>0: 3567 new_histo_list = HwUList(histo for histo in new_histo_list if 3568 any(t in histo.title for t in accepted_titles)) 3569 for histo in new_histo_list: 3570 if no_suffix or n_files==1: 3571 continue 3572 if not histo.type is None: 3573 histo.type += '|' 3574 else: 3575 histo.type = '' 3576 # Firs option is to give a bit of the name of the source HwU file. 3577 #histo.type += " %s, #%d"%\ 3578 # (os.path.basename(arg).split('.')[0][:3],i+1) 3579 # But it is more elegant to give just the number. 3580 # Overwrite existing number if present. We assume here that one never 3581 # uses the '#' in its custom-defined types, which is a fair assumptions. 3582 try: 3583 suffix = assigned_types[i] 3584 except IndexError: 3585 suffix = "#%d"%(i+1) 3586 try: 3587 histo.type = histo.type[:histo.type.index('#')] + suffix 3588 except ValueError: 3589 histo.type += suffix 3590 3591 if i==0 or all(_ not in ['--sum','--average'] for _ in sys.argv): 3592 for j,hist in enumerate(new_histo_list): 3593 new_histo_list[j]=hist*histo_norm[i] 3594 histo_list.extend(new_histo_list) 3595 continue 3596 3597 if any(_ in sys.argv for _ in ['--sum','--average']): 3598 for j, hist in enumerate(new_histo_list): 3599 # First make sure the plots have the same weight labels and such 3600 hist.test_plot_compability(histo_list[j]) 3601 # Now let the histogram module do the magic and add them. 3602 histo_list[j] += hist*histo_norm[i] 3603 3604 log("A total of %i histograms were found."%len(histo_list)) 3605 log("=======") 3606 3607 n_rebin = 1 3608 for arg in sys.argv[1:]: 3609 if arg.startswith('--rebin='): 3610 n_rebin = int(arg[8:]) 3611 3612 if n_rebin > 1: 3613 for hist in histo_list: 3614 hist.rebin(n_rebin) 3615 3616 if '--gnuplot' in sys.argv or all(arg not in ['--HwU'] for arg in sys.argv): 3617 # Where the magic happens: 3618 histo_list.output(OutName, format='gnuplot', 3619 number_of_ratios = n_ratios, 3620 uncertainties=uncertainties, 3621 ratio_correlations=ratio_correlations, 3622 arg_string=arg_string, 3623 jet_samples_to_keep=jet_samples_to_keep, 3624 use_band=use_band, 3625 auto_open=auto_open, 3626 lhapdfconfig=lhapdfconfig) 3627 # Tell the user that everything went for the best 3628 log("%d histograms have been output in " % len(histo_list)+\ 3629 "the gnuplot format at '%s.[HwU|gnuplot]'." % OutName) 3630 if auto_open: 3631 command = 'gnuplot %s.gnuplot'%OutName 3632 try: 3633 subprocess.call(command,shell=True,stderr=subprocess.PIPE) 3634 except: 3635 log("Automatic processing of the gnuplot card failed. Try the"+\ 3636 " command by hand:\n%s"%command) 3637 else: 3638 sys.exit(0) 3639 3640 if '--HwU' in sys.argv: 3641 log("Histograms data has been output in the HwU format at "+\ 3642 "'%s.HwU'."%OutName) 3643 histo_list.output(OutName, format='HwU') 3644 sys.exit(0) 3645 3646 if '--show_short' in sys.argv or '--show_full' in sys.argv: 3647 for i, histo in enumerate(histo_list): 3648 if i!=0: 3649 log('-------') 3650 log(histo.nice_string(short=(not '--show_full' in sys.argv))) 3651 log("=======")
3652 3653 ######## Routine from https://gist.github.com/thriveth/8352565 3654 ######## To fill for histograms data in matplotlib 3655 -def fill_between_steps(x, y1, y2=0, h_align='right', ax=None, **kwargs):
3656 ''' Fills a hole in matplotlib: fill_between for step plots. 3657 Parameters : 3658 ------------ 3659 x : array-like 3660 Array/vector of index values. These are assumed to be equally-spaced. 3661 If not, the result will probably look weird... 3662 y1 : array-like 3663 Array/vector of values to be filled under. 3664 y2 : array-Like 3665 Array/vector or bottom values for filled area. Default is 0. 3666 **kwargs will be passed to the matplotlib fill_between() function. 3667 ''' 3668 # If no Axes opject given, grab the current one: 3669 if ax is None: 3670 ax = plt.gca() 3671 3672 3673 # First, duplicate the x values 3674 #duplicate the info # xx = numpy.repeat(2)[1:] 3675 xx= []; [(xx.append(d),xx.append(d)) for d in x]; xx = xx[1:] 3676 # Now: the average x binwidth 3677 xstep = x[1] -x[0] 3678 # Now: add one step at end of row. 3679 xx.append(xx[-1] + xstep) 3680 3681 # Make it possible to change step alignment. 3682 if h_align == 'mid': 3683 xx = [X-xstep/2. for X in xx] 3684 elif h_align == 'right': 3685 xx = [X-xstep for X in xx] 3686 3687 # Also, duplicate each y coordinate in both arrays 3688 yy1 = []; [(yy1.append(d),yy1.append(d)) for d in y1] 3689 if isinstance(y1, list): 3690 yy2 = []; [(yy2.append(d),yy2.append(d)) for d in y2] 3691 else: 3692 yy2=y2 3693 if len(yy2) != len(yy1): 3694 yy2 = []; [(yy2.append(d),yy2.append(d)) for d in y2] 3695 3696 # now to the plotting part: 3697 ax.fill_between(xx, yy1, y2=yy2, **kwargs) 3698 3699 return ax
3700 ######## end routine from https://gist.github.com/thriveth/835256 3701