#! /usr/bin/env python
"""
Compare two entities: Maps, map sets, pipelines, or distribution makers. One
kind can be compared against another, so long as the resulting map(s) have
equivalent names and binning. The result each entity specification is formatted
into a MapSet and can be stored to disk so that e.g. re-running a
DistributionMaker is unnecessary to reproduce the results.
"""
# TODO: make use of `MapSet.compare()` method (and/or expand that until it is
# equally useful here)
from argparse import ArgumentParser
from collections import OrderedDict
from copy import deepcopy
import os
import numpy as np
from pisa import EPSILON
from pisa.core.distribution_maker import DistributionMaker
from pisa.core.map import Map, MapSet
from pisa.core.pipeline import Pipeline
from pisa.utils.fileio import mkdir, to_file
from pisa.utils.log import logging, set_verbosity
from pisa.utils.plotter import Plotter
__all__ = ['DISTRIBUTIONMAKER_SOURCE_STR', 'PIPELINE_SOURCE_STR',
'MAP_SOURCE_STR', 'MAPSET_SOURCE_STR',
'parse_args', 'compare', 'main']
__author__ = 'J.L. Lanfranchi'
__license__ = '''Copyright (c) 2014-2017, The IceCube Collaboration
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.'''
DISTRIBUTIONMAKER_SOURCE_STR = (
'DistributionMaker instantiated from multiple pipeline config files'
)
PIPELINE_SOURCE_STR = 'Pipeline instantiated from a pipeline config file'
MAP_SOURCE_STR = 'Map stored on disk'
MAPSET_SOURCE_STR = 'MapSet stored on disk'
[docs]
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None,
asymm_min=None, combine=None, diff_max=None, diff_min=None,
fract_diff_max=None, fract_diff_min=None, json=False, pdf=False,
png=False, ref_abs=False, ref_param_selections=None, sum=None,
test_abs=False, test_param_selections=None):
"""Compare two entities. The result each entity specification is
formatted into a MapSet and stored to disk, so that e.g. re-running
a DistributionMaker is unnecessary to reproduce the results.
Parameters
----------
outdir : string
Store output plots to this directory
ref : string or array of strings
Pipeline settings config file that generates reference output,
or a stored map or map set. Multiple pipelines, maps, or map sets are
supported
ref_abs : bool
Use the absolute value of the reference plot for comparisons
ref_label : string
Label for reference
ref_param-selections : string
Param selections to apply to ref pipeline config(s). Not
applicable if ref specifies stored map or map sets
test : string or array of strings
Pipeline settings config file that generates test output, or a
stored map or map set. Multiple pipelines, maps, or map sets are
supported
test_abs : bool
Use the absolute value of the test plot for comparisons
test_label : string
Label for test
test_param_selections : None or string
Param selections to apply to test pipeline config(s). Not
applicable if test specifies stored map or map sets
combine : None or string or array of strings
Combine by wildcard string, where string globbing (a la command
line) uses asterisk for any number of wildcard characters. Use
single quotes such that asterisks do not get expanded by the
shell. Multiple combine strings supported
sum : None or int
Sum over (and hence remove) the specified axis or axes. I.e.,
project the map onto remaining (unspecified) axis or axes
json : bool
Save output maps in compressed json (json.bz2) format
pdf : bool
Save plots in PDF format. If neither this nor png is
specified, no plots are produced
png : bool
Save plots in PNG format. If neither this nor pdf is specfied,
no plots are produced
diff_min : None or float
Difference plot vmin; if you specify only one of diff_min or
diff_max, symmetric limits are automatically used (min = -max)
diff_max : None or float
Difference plot max; if you specify only one of diff_min or
diff_max, symmetric limits are automatically used (min = -max)
fract_diff_min : None or float
Fractional difference plot vmin; if you specify only one of
fract_diff_min or fract_diff_max, symmetric limits are
automatically used (min = -max)
fract_diff_max : None or float
Fractional difference plot max; if you specify only one of
fract_diff_min or fract_diff_max, symmetric limits are
automatically used (min = -max)
asymm_min : None or float
Asymmetry plot vmin; if you specify only one of asymm_min or
asymm_max, symmetric limits are automatically used (min = -max)
asymm_max : None or float
Fractional difference plot max; if you specify only one of
asymm_min or asymm_max, symmetric limits are automatically used
(min = -max)
Returns
-------
summary_stats : dict
Dictionary containing a summary for each h Map processed
diff : MapSet
MapSet of the difference
- (Test - Ref)
fract_diff : MapSet
MapSet of the fractional difference
- (Test - Ref) / Ref
asymm : MapSet
MapSet of the asymmetric fraction difference or pull
- (Test - Ref) / sqrt(Ref)
"""
ref_plot_label = ref_label
if ref_abs and not ref_label.startswith('abs'):
ref_plot_label = 'abs(%s)' % ref_plot_label
test_plot_label = test_label
if test_abs and not test_label.startswith('abs'):
test_plot_label = 'abs(%s)' % test_plot_label
plot_formats = []
if pdf:
plot_formats.append('pdf')
if png:
plot_formats.append('png')
diff_symm = True
if diff_min is not None and diff_max is None:
diff_max = -diff_min
diff_symm = False
if diff_max is not None and diff_min is None:
diff_min = -diff_max
diff_symm = False
fract_diff_symm = True
if fract_diff_min is not None and fract_diff_max is None:
fract_diff_max = -fract_diff_min
fract_diff_symm = False
if fract_diff_max is not None and fract_diff_min is None:
fract_diff_min = -fract_diff_max
fract_diff_symm = False
asymm_symm = True
if asymm_max is not None and asymm_min is None:
asymm_min = -asymm_max
asymm_symm = False
if asymm_min is not None and asymm_max is None:
asymm_max = -asymm_min
asymm_symm = False
outdir = os.path.expanduser(os.path.expandvars(outdir))
mkdir(outdir)
# Get the reference distribution(s) into the form of a test MapSet
p_ref = None
ref_source = None
if isinstance(ref, Map):
p_ref = MapSet(ref)
ref_source = MAP_SOURCE_STR
elif isinstance(ref, MapSet):
p_ref = ref
ref_source = MAPSET_SOURCE_STR
elif isinstance(ref, Pipeline):
if ref_param_selections is not None:
ref.select_params(ref_param_selections)
p_ref = ref.get_outputs()
ref_source = PIPELINE_SOURCE_STR
elif isinstance(ref, DistributionMaker):
if ref_param_selections is not None:
ref.select_params(ref_param_selections)
p_ref = ref.get_outputs()
ref_source = DISTRIBUTIONMAKER_SOURCE_STR
else:
if len(ref) == 1:
try:
ref_pipeline = Pipeline(config=ref[0])
except:
pass
else:
ref_source = PIPELINE_SOURCE_STR
if ref_param_selections is not None:
ref_pipeline.select_params(ref_param_selections)
p_ref = ref_pipeline.get_outputs()
else:
try:
ref_dmaker = DistributionMaker(pipelines=ref)
except:
pass
else:
ref_source = DISTRIBUTIONMAKER_SOURCE_STR
if ref_param_selections is not None:
ref_dmaker.select_params(ref_param_selections)
p_ref = ref_dmaker.get_outputs()
if p_ref is None:
try:
p_ref = [Map.from_json(f) for f in ref]
except:
pass
else:
ref_source = MAP_SOURCE_STR
p_ref = MapSet(p_ref)
if p_ref is None:
assert ref_param_selections is None
assert len(ref) == 1, 'Can only handle one MapSet'
try:
p_ref = MapSet.from_json(ref[0])
except:
pass
else:
ref_source = MAPSET_SOURCE_STR
if p_ref is None:
raise ValueError(
'Could not instantiate the reference Pipeline, DistributionMaker,'
' Map, or MapSet from ref value(s) %s' % ref
)
ref = p_ref
logging.info('Reference map(s) derived from a ' + ref_source)
# Get the test distribution(s) into the form of a test MapSet
p_test = None
test_source = None
if isinstance(test, Map):
p_test = MapSet(test)
test_source = MAP_SOURCE_STR
elif isinstance(test, MapSet):
p_test = test
test_source = MAPSET_SOURCE_STR
elif isinstance(test, Pipeline):
if test_param_selections is not None:
test.select_params(test_param_selections)
p_test = test.get_outputs()
test_source = PIPELINE_SOURCE_STR
elif isinstance(test, DistributionMaker):
if test_param_selections is not None:
test.select_params(test_param_selections)
p_test = test.get_outputs()
test_source = DISTRIBUTIONMAKER_SOURCE_STR
else:
if len(test) == 1:
try:
test_pipeline = Pipeline(config=test[0])
except:
pass
else:
test_source = PIPELINE_SOURCE_STR
if test_param_selections is not None:
test_pipeline.select_params(test_param_selections)
p_test = test_pipeline.get_outputs()
else:
try:
test_dmaker = DistributionMaker(pipelines=test)
except:
pass
else:
test_source = DISTRIBUTIONMAKER_SOURCE_STR
if test_param_selections is not None:
test_dmaker.select_params(test_param_selections)
p_test = test_dmaker.get_outputs()
if p_test is None:
try:
p_test = [Map.from_json(f) for f in test]
except:
pass
else:
test_source = MAP_SOURCE_STR
p_test = MapSet(p_test)
if p_test is None:
assert test_param_selections is None
assert len(test) == 1, 'Can only handle one MapSet'
try:
p_test = MapSet.from_json(test[0])
except:
pass
else:
test_source = MAPSET_SOURCE_STR
if p_test is None:
raise ValueError(
'Could not instantiate the test Pipeline, DistributionMaker, Map,'
' or MapSet from test value(s) %s' % test
)
test = p_test
logging.info('Test map(s) derived from a ' + test_source)
if combine is not None:
ref = ref.combine_wildcard(combine)
test = test.combine_wildcard(combine)
if isinstance(ref, Map):
ref = MapSet([ref])
if isinstance(test, Map):
test = MapSet([test])
if sum is not None:
ref = ref.sum(sum)
test = test.sum(sum)
# Set the MapSet names according to args passed by user
ref.name = ref_label
test.name = test_label
# Save to disk the maps being plotted (excluding optional aboslute value
# operations)
if json:
refmaps_path = os.path.join(
outdir, 'maps__%s.json.bz2' % ref_label
)
to_file(ref, refmaps_path)
testmaps_path = os.path.join(
outdir, 'maps__%s.json.bz2' % test_label
)
to_file(test, testmaps_path)
if set(test.names) != set(ref.names):
raise ValueError(
'Test map names %s do not match ref map names %s.'
% (sorted(test.names), sorted(ref.names))
)
# Aliases to save keystrokes
def masked(x):
return np.ma.masked_invalid(x.nominal_values)
def zero_to_nan(map):
newmap = deepcopy(map)
mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
newmap.hist[mask] = np.nan
return newmap
reordered_test = []
new_ref = []
diff_maps = []
fract_diff_maps = []
asymm_maps = []
summary_stats = {}
for ref_map in ref:
test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
if ref_abs:
ref_map = abs(ref_map)
if test_abs:
test_map = abs(test_map)
diff_map = test_map - ref_map
fract_diff_map = (test_map - ref_map)/zero_to_nan(ref_map)
asymm_map = (test_map - ref_map)/zero_to_nan(ref_map**0.5)
abs_fract_diff_map = np.abs(fract_diff_map)
new_ref.append(ref_map)
reordered_test.append(test_map)
diff_maps.append(diff_map)
fract_diff_maps.append(fract_diff_map)
asymm_maps.append(asymm_map)
min_ref = np.min(masked(ref_map))
max_ref = np.max(masked(ref_map))
min_test = np.min(masked(test_map))
max_test = np.max(masked(test_map))
total_ref = np.sum(masked(ref_map))
total_test = np.sum(masked(test_map))
mean_ref = np.mean(masked(ref_map))
mean_test = np.mean(masked(test_map))
max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
median_abs_fract_diff = np.median(masked(abs_fract_diff_map))
mean_fract_diff = np.mean(masked(fract_diff_map))
min_fract_diff = np.min(masked(fract_diff_map))
max_fract_diff = np.max(masked(fract_diff_map))
std_fract_diff = np.std(masked(fract_diff_map))
mean_diff = np.mean(masked(diff_map))
min_diff = np.min(masked(diff_map))
max_diff = np.max(masked(diff_map))
std_diff = np.std(masked(diff_map))
median_diff = np.nanmedian(masked(diff_map))
mad_diff = np.nanmedian(masked(np.abs(diff_map)))
median_fract_diff = np.nanmedian(masked(fract_diff_map))
mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))
min_asymm = np.min(masked(fract_diff_map))
max_asymm = np.max(masked(fract_diff_map))
total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))
summary_stats[test_map.name] = OrderedDict([
('min_ref', min_ref),
('max_ref', max_ref),
('total_ref', total_ref),
('mean_ref', mean_ref),
('min_test', min_test),
('max_test', max_test),
('total_test', total_test),
('mean_test', mean_test),
('max_abs_fract_diff', max_abs_fract_diff),
('mean_abs_fract_diff', mean_abs_fract_diff),
('median_abs_fract_diff', median_abs_fract_diff),
('min_fract_diff', min_fract_diff),
('max_fract_diff', max_fract_diff),
('mean_fract_diff', mean_fract_diff),
('std_fract_diff', std_fract_diff),
('median_fract_diff', median_fract_diff),
('mad_fract_diff', mad_fract_diff),
('min_diff', min_diff),
('max_diff', max_diff),
('mean_diff', mean_diff),
('std_diff', std_diff),
('median_diff', median_diff),
('mad_diff', mad_diff),
('min_asymm', min_asymm),
('max_asymm', max_asymm),
('total_asymm', total_asymm),
])
logging.info('Map %s...', ref_map.name)
logging.info(' Ref map(s):')
logging.info(' min :' + ('%.2f' % min_ref).rjust(12))
logging.info(' max :' + ('%.2f' % max_ref).rjust(12))
logging.info(' total :' + ('%.2f' % total_ref).rjust(12))
logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12))
logging.info(' Test map(s):')
logging.info(' min :' + ('%.2f' % min_test).rjust(12))
logging.info(' max :' + ('%.2f' % max_test).rjust(12))
logging.info(' total :' + ('%.2f' % total_test).rjust(12))
logging.info(' mean :' + ('%.2f' % mean_test).rjust(12))
logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):')
logging.info(' max : %.4e', max_abs_fract_diff)
logging.info(' mean : %.4e', mean_abs_fract_diff)
logging.info(' median: %.4e', median_abs_fract_diff)
logging.info(' Fractional difference, (Test - Ref) / Ref:')
logging.info(' min : %.4e', min_fract_diff)
logging.info(' max : %.4e', max_fract_diff)
logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff)
logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff)
logging.info(' Difference, Test - Ref:')
logging.info(' min : %.4e', min_diff)
logging.info(' max : %.4e', max_diff)
logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff)
logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff)
logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)')
logging.info(' min : %.4e', min_asymm)
logging.info(' max : %.4e', max_asymm)
logging.info(' total : %.4e (sum in quadrature)', total_asymm)
logging.info('')
ref = MapSet(new_ref)
test = MapSet(reordered_test)
diff = MapSet(diff_maps)
fract_diff = MapSet(fract_diff_maps)
asymm = MapSet(asymm_maps)
if json:
diff.to_json(os.path.join(
outdir,
'diff__%s__%s.json.bz2' %(test_plot_label, ref_plot_label)
))
fract_diff.to_json(os.path.join(
outdir,
'fract_diff__%s___%s.json.bz2' %(test_plot_label, ref_plot_label)
))
asymm.to_json(os.path.join(
outdir,
'asymm__%s___%s.json.bz2' %(test_plot_label, ref_plot_label)
))
to_file(
summary_stats,
os.path.join(
outdir,
'stats__%s__%s.json.bz2' %(test_plot_label, ref_plot_label)
)
)
for plot_format in plot_formats:
# Plot the raw distributions
plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format,
log=False, annotate=False,
symmetric=False,
ratio=False)
plotter.plot_2d_array(ref, fname='distr__%s'
% ref_plot_label)
plotter.plot_2d_array(test, fname='distr__%s'
% test_plot_label)
# Plot the difference (test - ref)
plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format,
log=False, annotate=False,
symmetric=diff_symm,
ratio=False)
plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
plotter.plot_2d_array(
test - ref,
fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
#vmin=diff_min, vmax=diff_max
)
# Plot the fractional difference (test - ref)/ref
plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format,
log=False,
annotate=False,
symmetric=fract_diff_symm,
ratio=True)
plotter.label = ('(%s-%s)/%s'
% (test_plot_label, ref_plot_label, ref_plot_label))
plotter.plot_2d_array(
(test-ref)/MapSet([zero_to_nan(r) for r in ref]),
fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
#vmin=fract_diff_min, vmax=fract_diff_max
)
# Plot the asymmetry (test - ref)/sqrt(ref)
plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format,
log=False,
annotate=False,
symmetric=asymm_symm,
ratio=True)
plotter.label = (r'$(%s - %s)/\sqrt{%s}$'
% (test_plot_label, ref_plot_label, ref_plot_label))
plotter.plot_2d_array(
(test-ref)/MapSet([zero_to_nan(r**0.5) for r in ref]),
fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
#vmin=asymm_min, vmax=asymm_max
)
return summary_stats, diff, fract_diff, asymm
[docs]
def parse_args():
"""Parse command line arguments"""
parser = ArgumentParser(description=__doc__)
parser.add_argument(
'--outdir', metavar='DIR', type=str, required=True,
help='''Store output plots to this directory.'''
)
parser.add_argument(
'--ref', type=str, required=True, action='append',
help='''Pipeline settings config file that generates reference
output, or a stored map or map set. Repeat --ref option for multiple
pipelines, maps, or map sets'''
)
parser.add_argument(
'--ref-abs', action='store_true',
help='''Use the absolute value of the reference plot for
comparisons.'''
)
parser.add_argument(
'--ref-label', type=str, required=True,
help='''Label for reference'''
)
parser.add_argument(
'--ref-param-selections', type=str, required=False,
action='append',
help='''Param selections to apply to --ref pipeline config(s). Not
applicable if --ref specifies stored map or map sets'''
)
parser.add_argument(
'--test', type=str, required=True, action='append',
help='''Pipeline settings config file that generates test
output, or a stored map or map set. Repeat --test option for multiple
pipelines, maps, or map sets'''
)
parser.add_argument(
'--test-abs', action='store_true',
help='''Use the absolute value of the test plot for
comparisons.'''
)
parser.add_argument(
'--test-label', type=str, required=True,
help='''Label for test'''
)
parser.add_argument(
'--test-param-selections', type=str, required=False,
action='append',
help='''Param selections to apply to --test pipeline config(s). Not
applicable if --test specifies stored map or map sets'''
)
parser.add_argument(
'--combine', type=str, action='append',
help='''Combine by wildcard string, where string globbing (a la command
line) uses asterisk for any number of wildcard characters. Use single
quotes such that asterisks do not get expanded by the shell. Repeat the
--combine option for multiple combine strings.'''
)
parser.add_argument(
'--sum', nargs='+',
help='''Sum over (and hence remove) the specified axis or axes. I.e.,
project the map onto remaining (unspecified) axis or axes.'''
)
parser.add_argument(
'--json', action='store_true',
help='''Save output maps in compressed json (json.bz2) format.'''
)
parser.add_argument(
'--pdf', action='store_true',
help='''Save plots in PDF format. If neither this nor --png is
specified, no plots are produced.'''
)
parser.add_argument(
'--png', action='store_true',
help='''Save plots in PNG format. If neither this nor --pdf is
specfied, no plots are produced.'''
)
parser.add_argument(
'--diff-min', type=float, required=False,
help='''Difference plot vmin; if you specify only one of --diff-min or
--diff-max, symmetric limits are automatically used (min = -max).'''
)
parser.add_argument(
'--diff-max', type=float, required=False,
help='''Difference plot max; if you specify only one of --diff-min or
--diff-max, symmetric limits are automatically used (min = -max).'''
)
parser.add_argument(
'--fract-diff-min', type=float, required=False,
help='''Fractional difference plot vmin; if you specify only one of
--fract-diff-min or --fract-diff-max, symmetric limits are
automatically used (min = -max).'''
)
parser.add_argument(
'--fract-diff-max', type=float, required=False,
help='''Fractional difference plot max; if you specify only one of
--fract-diff-min or --fract-diff-max, symmetric limits are
automatically used (min = -max).'''
)
parser.add_argument(
'--asymm-min', type=float, required=False,
help='''Asymmetry plot vmin; if you specify only one of --asymm-min or
--asymm-max, symmetric limits are automatically used (min = -max).'''
)
parser.add_argument(
'--asymm-max', type=float, required=False,
help='''Fractional difference plot max; if you specify only one of
--asymm-min or --asymm-max, symmetric limits are automatically used
(min = -max).'''
)
parser.add_argument(
'-v', action='count',
help='Set verbosity level; repeat -v for higher level.'
)
args = parser.parse_args()
return args
[docs]
def main():
"""Function used from command-line calls (either `python compare.py` or via
installer's console scripts (see `setup.py`)."""
args = vars(parse_args())
set_verbosity(args.pop('v'))
compare(**args)
if __name__ == '__main__':
main()