test: update summary to work for all configurations (use pandas)

This commit is contained in:
Jędrzej Boczar 2020-02-05 13:41:47 +01:00
parent f9f86d507f
commit 1702e2ad7c

import subprocess
from collections import defaultdict, namedtuple
import yaml
import numpy as np
import pandas as pd
import matplotlib
from matplotlib.ticker import FuncFormatter, PercentFormatter, ScalarFormatter
_summary = True
except ImportError as e:
_summary = False
print('[WARNING] Results summary not available:', e, file=sys.stderr)
from litedram.common import Settings as _Settings
from litedram.common import Settings as _Settings
from .benchmark import LiteDRAMBenchmarkSoC, load_access_pattern
# constructs python regex named group
def ng(name, regex):
return r'(?P<{}>{})'.format(name, regex)
def center(text, width, fillc=' '):
added = width - len(text)
left = added // 2
right = added - left
return fillc * left + text + fillc * right
def human_readable(value):
binary_prefixes = ['', 'k', 'M', 'G', 'T']
mult = 1.0
for prefix in binary_prefixes:
if value * mult < 1024:
mult /= 1024
return mult, prefix
# Benchmark configuration --------------------------------------------------------------------------
class CustomAccess(Settings):
def length(self):
def pattern(self):
# we have to load the file to know pattern length, cache it when requested
if not hasattr(self, '_pattern'):
path = self.pattern_file
benchmark_dir = os.path.dirname(benchmark.__file__)
benchmark_dir = os.path.dirname(benchmark.__file__)
path = os.path.join(benchmark_dir, path)
self._pattern = load_access_pattern(path)
return len(self._pattern)
return self._pattern
def length(self):
return len(self.pattern)
def as_args(self):
return ['--access-pattern=%s' % self.pattern_file]
class BenchmarkConfiguration(Settings):
def length(self):
return self.access_pattern.length
def from_dict(cls, d):
access_cls = CustomAccess if 'pattern_file' in d['access_pattern'] else GeneratedAccess
d['access_pattern'] = access_cls(**d['access_pattern'])
return cls(**d)
def load_yaml(cls, yaml_file):
with open(yaml_file) as f:
description = yaml.safe_load(f)
configs = []
for name, desc in description.items():
if 'access_pattern' in desc:
access = CustomAccess(desc.pop('access_pattern'))
access = GeneratedAccess(desc.pop('bist_length'), desc.pop('bist_random'))
configs.append(cls(name, **desc, access_pattern=access))
desc['name'] = name
return configs
def __repr__(self):
return 'BenchmarkConfiguration(%s)' % self.as_dict()
def soc(self):
if not hasattr(self, '_soc'):
kwargs = dict(
if isinstance(self.access_pattern, GeneratedAccess):
kwargs['bist_length'] = self.access_pattern.bist_length
kwargs['bist_random'] = self.access_pattern.bist_random
elif isinstance(self.access_pattern, CustomAccess):
kwargs['pattern_init'] = self.access_pattern.pattern
raise ValueError(self.access_pattern)
self._soc = LiteDRAMBenchmarkSoC(**kwargs)
return self._soc
# Benchmark results --------------------------------------------------------------------------------
# constructs python regex named group
def ng(name, regex):
return r'(?P<{}>{})'.format(name, regex)
def _compiled_pattern(stage, var):
pattern_fmt = r'{stage}\s+{var}:\s+{value}'
pattern = pattern_fmt.format(
value=ng('value', '[0-9]+'),
return re.compile(pattern)
result = re.search(pattern, benchmark_output)
class BenchmarkResult:
def __init__(self, config, output):
self.config = config
# pre-compiled patterns for all benchmarks
patterns = {
'generator_ticks': _compiled_pattern('BIST-GENERATOR', 'ticks'),
'checker_errors': _compiled_pattern('BIST-CHECKER', 'errors'),
'checker_ticks': _compiled_pattern('BIST-CHECKER', 'ticks'),
def find(pattern, output):
result = pattern.search(output)
assert result is not None, \
'Could not find pattern "%s" in output:\n%s' % (pattern, benchmark_output)
return int(result.group('value'))
def __init__(self, output):
self._output = output
# instantiate the benchmarked soc to check its configuration
self.benchmark_soc = LiteDRAMBenchmarkSoC(**self.config._settings)
for attr, pattern in self.patterns.items():
setattr(self, attr, self.find(pattern, output))
def cmd_count(self):
data_width = self.benchmark_soc.sdram.controller.interface.data_width
return self.config.bist_length / (data_width // 8)
def clk_period(self):
clk_freq = self.benchmark_soc.sdrphy.module.clk_freq
return 1 / clk_freq
def write_bandwidth(self):
return (8 * self.config.bist_length) / (self.generator_ticks * self.clk_period())
def read_bandwidth(self):
return (8 * self.config.bist_length) / (self.checker_ticks * self.clk_period())
def write_efficiency(self):
return self.cmd_count() / self.generator_ticks
def read_efficiency(self):
return self.cmd_count() / self.checker_ticks
def write_latency(self):
assert self.config.bist_length == 1, 'Not a latency benchmark'
return self.generator_ticks
def read_latency(self):
assert self.config.bist_length == 1, 'Not a latency benchmark'
return self.checker_ticks
def parse_output(self, output):
bist_pattern = r'{stage}\s+{var}:\s+{value}'
def find(stage, var):
pattern = bist_pattern.format(
value=ng('value', '[0-9]+'),
result = re.search(pattern, output)
assert result is not None, 'Could not find pattern in output: %s, %s' % (pattern, output)
return int(result.group('value'))
self.generator_ticks = find('BIST-GENERATOR', 'ticks')
self.checker_errors = find('BIST-CHECKER', 'errors')
self.checker_ticks = find('BIST-CHECKER', 'ticks')
def dump_results_json(cls, results, file):
"""Save multiple results in a JSON file.
Only configurations and outpits are saved, as they can be used to reconstruct BenchmarkResult.
# simply use config._settings as it defines the BenchmarkConfiguration
results_raw = [(r.config._settings, r._output) for r in results]
with open(file, 'w') as f:
json.dump(results_raw, f)
def load_results_json(cls, file):
"""Load results from a JSON file."""
with open(file, 'r') as f:
results_raw = json.load(f)
return [cls(BenchmarkConfiguration(**settings), output) for (settings, output) in results_raw]
def __repr__(self):
d = {attr: getattr(self, attr) for attr in self.patterns.keys()}
return 'BenchmarkResult(%s)' % d
# Results summary ----------------------------------------------------------------------------------
def human_readable(value):
binary_prefixes = ['', 'k', 'M', 'G', 'T']
mult = 1.0
for prefix in binary_prefixes:
if value * mult < 1024:
mult /= 1024
return mult, prefix
def clocks_fmt(clocks):
return '{:d} clk'.format(int(clocks))
def bandwidth_fmt(bw):
mult, prefix = human_readable(bw)
return '{:.1f} {}bps'.format(bw * mult, prefix)
def efficiency_fmt(eff):
return '{:.1f} %'.format(eff * 100)
class ResultsSummary:
# value_scaling is a function: value -> (multiplier, prefix)
Fmt = namedtuple('MetricFormatting', ['name', 'unit', 'value_scaling'])
metric_formats = {
'write_bandwidth': Fmt('Write bandwidth', 'bps', lambda value: human_readable(value)),
'read_bandwidth': Fmt('Read bandwidth', 'bps', lambda value: human_readable(value)),
'write_efficiency': Fmt('Write efficiency', '', lambda value: (100, '%')),
'read_efficiency': Fmt('Read efficiency', '', lambda value: (100, '%')),
'write_latency': Fmt('Write latency', 'clk', lambda value: (1, '')),
'read_latency': Fmt('Read latency', 'clk', lambda value: (1, '')),
def __init__(self, run_data, plots_dir='plots'):
self.plots_dir = plots_dir
def __init__(self, results):
self.results = results
# gather results into tabular data
column_mappings = {
'name': lambda d: d.config.name,
'sdram_module': lambda d: d.config.sdram_module,
'sdram_data_width': lambda d: d.config.sdram_data_width,
'bist_length': lambda d: getattr(d.config.access_pattern, 'bist_length', None),
'bist_random': lambda d: getattr(d.config.access_pattern, 'bist_random', None),
'pattern_file': lambda d: getattr(d.config.access_pattern, 'pattern_file', None),
'length': lambda d: d.config.length,
'generator_ticks': lambda d: d.result.generator_ticks,
'checker_errors': lambda d: d.result.checker_errors,
'checker_ticks': lambda d: d.result.checker_ticks,
'ctrl_data_width': lambda d: d.config.soc.sdram.controller.interface.data_width,
'clk_freq': lambda d: d.config.soc.sdrphy.module.clk_freq,
columns = {name: [mapping(data) for data in run_data] for name, mapping, in column_mappings.items()}
self.df = df = pd.DataFrame(columns)
def by_metric(self, metric):
"""Returns pairs of value of the given metric and the configuration used for benchmark"""
for result in self.results:
# omit the results that should not be used to calculate given metric
if result.config.bist_length == 1 and metric not in ['read_latency', 'write_latency'] \
or result.config.bist_length != 1 and metric in ['read_latency', 'write_latency']:
value = getattr(result, metric)()
yield value, result.config
# replace None with NaN
df.fillna(value=np.nan, inplace=True)
def print(self):
legend = '(module, datawidth, length, random, result)'
fmt = ' {module:15} {dwidth:2} {length:4} {random:1} {result}'
# compute other metrics based on ticks and configuration parameters
df['clk_period'] = 1 / df['clk_freq']
df['write_bandwidth'] = (8 * df['length']) / (df['generator_ticks'] * df['clk_period'])
df['read_bandwidth'] = (8 * df['length']) / (df['checker_ticks'] * df['clk_period'])
# store formatted lines per metric
metric_lines = defaultdict(list)
for metric, (_, unit, formatter) in self.metric_formats.items():
for value, config in self.by_metric(metric):
mult, prefix = formatter(value)
value_fmt = '{:5.1f} {}{}' if isinstance(value * mult, float) else '{:5d} {}{}'
result = value_fmt.format(value * mult, prefix, unit)
line = fmt.format(module=config.sdram_module,
df['cmd_count'] = df['length'] / (df['ctrl_data_width'] / 8)
df['write_efficiency'] = df['cmd_count'] / df['generator_ticks']
df['read_efficiency'] = df['cmd_count'] / df['checker_ticks']
# find length of the longest line
max_length = max((len(l) for lines in metric_lines.values() for l in lines))
max_length = max(max_length, len(legend) + 2)
width = max_length + 2
df['write_latency'] = df[df['bist_length'] == 1]['generator_ticks']
df['read_latency'] = df[df['bist_length'] == 1]['checker_ticks']
# print the formatted summary
def header(text):
mid = center(text, width - 6, '=')
return center(mid, width, '-')
print(header(' Summary '))
print(center(legend, width))
for metric, lines in metric_lines.items():
print(center(self.metric_formats[metric].name, width))
for line in lines:
# boolean distinction between latency benchmarks and sequence benchmarks,
# as thier results differ significanly
df['is_latency'] = ~pd.isna(df['write_latency'])
assert (df['is_latency'] == ~pd.isna(df['read_latency'])).all(), \
'write_latency and read_latency should both have a value or both be NaN'
def plot(self, output_dir, backend='Agg', theme='default', save_format='png', **savefig_kwargs):
"""Create plots with benchmark results summary
# data formatting for text summary
self.text_formatters = {
'write_bandwidth': bandwidth_fmt,
'read_bandwidth': bandwidth_fmt,
'write_efficiency': efficiency_fmt,
'read_efficiency': efficiency_fmt,
'write_latency': clocks_fmt,
'read_latency': clocks_fmt,
Default backend is Agg, which is non-GUI backed and only allows
to save figures as files. If a GUI backed is passed, plt.show()
will be called at the end.
# import locally here to be able to run benchmarks without installing matplotlib
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter, PercentFormatter, ScalarFormatter
def bandwidth_formatter_func(value, pos):
mult, prefix = human_readable(value)
return '{:.1f}{}bps'.format(value * mult, prefix)
tick_formatters = {
'write_bandwidth': FuncFormatter(bandwidth_formatter_func),
'read_bandwidth': FuncFormatter(bandwidth_formatter_func),
# data formatting for plot summary
self.plot_xticks_formatters = {
'write_bandwidth': FuncFormatter(lambda value, pos: bandwidth_fmt(value)),
'read_bandwidth': FuncFormatter(lambda value, pos: bandwidth_fmt(value)),
'write_efficiency': PercentFormatter(1.0),
'read_efficiency': PercentFormatter(1.0),
'write_latency': ScalarFormatter(),
'read_latency': ScalarFormatter(),
def config_tick_name(config):
return '{}\n{}, {}, {}'.format(config.sdram_module, config.sdram_data_width,
config.bist_length, int(config.bist_random))
def print_df(self, title, df):
# make sure all data will be shown
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
print('===> {}:'.format(title))
for metric, (name, unit, _) in self.metric_formats.items():
fig = plt.figure()
axis = plt.gca()
def get_summary(self, mask=None, columns=None, column_formatting=None, sort_kwargs=None):
# work on a copy
df = self.df.copy()
values, configs = zip(*self.by_metric(metric))
ticks = np.arange(len(configs))
if sort_kwargs is not None:
df = df.sort_values(**sort_kwargs)
axis.barh(ticks, values, align='center')
axis.set_yticklabels([config_tick_name(c) for c in configs])
if column_formatting is not None:
for column, mapping in column_formatting.items():
old = '_{}'.format(column)
df[old] = df[column].copy()
df[column] = df[column].map(lambda value: mapping(value) if not pd.isna(value) else value)
# force xmax to 100%
if metric in ['write_efficiency', 'read_efficiency']:
df = df[mask] if mask is not None else df
df = df[columns] if columns is not None else df
title = self.metric_formats[metric].name
axis.set_title(title, fontsize=12)
return df
filename = '{}.{}'.format(metric, save_format)
fig.savefig(os.path.join(output_dir, filename), **savefig_kwargs)
def text_summary(self):
for title, df in self.groupped_results():
self.print_df(title, df)
def groupped_results(self, formatted=True):
df = self.df
formatters = self.text_formatters if formatted else {}
common_columns = ['name', 'sdram_module', 'sdram_data_width']
latency_columns = ['write_latency', 'read_latency']
performance_columns = ['write_bandwidth', 'read_bandwidth', 'write_efficiency', 'read_efficiency']
yield 'Latency', self.get_summary(
mask=df['is_latency'] == True,
columns=common_columns + latency_columns,
# yield 'Any access pattern', self.get_summary(
# mask=(df['is_latency'] == False),
# columns=common_columns + performance_columns + ['length', 'bist_random', 'pattern_file'],
# column_formatting=self.text_formatters,
# **kwargs,
# ),
yield 'Custom access pattern', self.get_summary(
mask=(df['is_latency'] == False) & (~pd.isna(df['pattern_file'])),
columns=common_columns + performance_columns + ['length', 'pattern_file'],
yield 'Sequential access pattern', self.get_summary(
mask=(df['is_latency'] == False) & (pd.isna(df['pattern_file'])) & (df['bist_random'] == False),
columns=common_columns + performance_columns + ['bist_length'], # could be length
yield 'Random access pattern', self.get_summary(
mask=(df['is_latency'] == False) & (pd.isna(df['pattern_file'])) & (df['bist_random'] == True),
columns=common_columns + performance_columns + ['bist_length'],
def plot_summary(self, plots_dir='plots', backend='Agg', theme='default', save_format='png', **savefig_kw):
import matplotlib.pyplot as plt
for title, df in self.groupped_results(formatted=False):
for column in self.plot_xticks_formatters.keys():
if column not in df.columns or df[column].empty:
axis = self.plot_df(title, df, column)
# construct path
def path_name(name):
return name.lower().replace(' ', '_')
filename = '{}.{}'.format(path_name(column), save_format)
path = os.path.join(plots_dir, path_name(title), filename)
os.makedirs(os.path.dirname(path), exist_ok=True)
# save figure
axis.get_figure().savefig(path, **savefig_kw)
if backend != 'Agg':
def plot_df(self, title, df, column, save_format='png', save_filename=None):
if save_filename is None:
save_filename = os.path.join(self.plots_dir, title.lower().replace(' ', '_'))
axis = df.plot(kind='barh', x='name', y=column, title=title, grid=True, legend=False)
if column in self.plot_xticks_formatters:
# # force xmax to 100%
# if column in ['write_efficiency', 'read_efficiency']:
# axis.set_xlim(right=1.0)
return axis
# Run ----------------------------------------------------------------------------------------------
def run_benchmark(cmd_args):
# run as separate process, because else we cannot capture all output from verilator
benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py')
command = ['python3', benchmark_script, *cmd_args]
proc = subprocess.run(command, stdout=subprocess.PIPE)
class RunCache(list):
RunData = namedtuple('RunData', ['config', 'result'])
def dump_json(self, filename):
json_data = [{'config': data.config.as_dict(), 'output': data.result._output} for data in self]
with open(filename, 'w') as f:
json.dump(json_data, f)
def load_json(cls, filename):
with open(filename, 'r') as f:
json_data = json.load(f)
loaded = []
for data in json_data:
config = BenchmarkConfiguration.from_dict(data['config'])
result = BenchmarkResult(data['output'])
loaded.append(cls.RunData(config=config, result=result))
return loaded
def run_python(script, args):
command = ['python3', script, *args]
proc = subprocess.run(command, stdout=subprocess.PIPE, cwd=os.path.dirname(script))
return str(proc.stdout)
def run_benchmarks(configurations):
results = []
for name, config in configurations.items():
cmd_args = config.as_args()
print('{}: {}'.format(name, ' '.join(cmd_args)))
output = run_benchmark(cmd_args)
# exit if checker had any read error
result = BenchmarkResult(config, output)
if result.checker_errors != 0:
print('Error during benchmark "{}": checker_errors = {}'.format(
name, result.checker_errors), file=sys.stderr)
return results
def run_benchmark(config):
benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py')
# run as separate process, because else we cannot capture all output from verilator
output = run_python(benchmark_script, config.as_args())
result = BenchmarkResult(output)
# exit if checker had any read error
if result.checker_errors != 0:
raise RuntimeError('Error during benchmark: checker_errors = {}, args = {}'.format(
result.checker_errors, args
return result
def main(argv=None):
def main(argv=None):
parser.add_argument('--plot-transparent', action='store_true', help='Use transparent background when saving plots')
parser.add_argument('--plot-output-dir', default='plots', help='Specify where to save the plots')
parser.add_argument('--plot-theme', default='default', help='Use different matplotlib theme')
parser.add_argument('--ignore-failures', action='store_true', help='Ignore failuers during benchmarking, continue using successful runs only')
parser.add_argument('--results-cache', help="""Use given JSON file as results cache. If the file exists,
it will be loaded instead of running actual benchmarks,
else benchmarks will be run normally, and then saved
@ -370,6 +444,11 @@ def main(argv=None):
to generate different summary without having to rerun benchmarks.""")
args = parser.parse_args(argv)
if not args.results_cache and not _summary:
print('Summary not available and not running with --results-cache - run would not produce any results! Aborting.',
# load and filter configurations
configurations = BenchmarkConfiguration.load_yaml(args.config)
filters = {
filters = {
configurations = filter(f, configurations)
configurations = list(configurations)
cache_exists = args.results_cache and os.path.isfile(args.results_cache)
# load outputs from cache if it exsits
cache_exists = args.results_cache and os.path.isfile(args.results_cache)
if args.results_cache and cache_exists:
cached_results = BenchmarkResult.load_results_json(args.results_cache)
cache = RunCache.load_json(args.results_cache)
# take only those that match configurations
results = [r for r in cached_results if r.config in configurations.values()]
names_to_load = [c.name for c in configurations]
run_data = [data for data in cache if data.config.name in names_to_load]
else: # run all the benchmarks normally
results = run_benchmarks(configurations)
run_data = []
for config in configurations:
print(' {}: {}'.format(config.name, ' '.join(config.as_args())))
run_data.append(RunCache.RunData(config, run_benchmark(config)))
if not args.ignore_failures:
# store outputs in cache
if args.results_cache and not cache_exists:
BenchmarkResult.dump_results_json(results, args.results_cache)
cache = RunCache(run_data)
# display the summary
summary = ResultsSummary(results)
if args.plot:
if not os.path.isdir(args.plot_output_dir):
# display summary
if _summary:
summary = ResultsSummary(run_data)
if args.plot:
if __name__ == "__main__":