From 1702e2ad7c40fbc1e7b3fa452037fabba3deff4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C4=99drzej=20Boczar?= Date: Wed, 5 Feb 2020 13:41:47 +0100 Subject: [PATCH] test: update summary to work for all configurations (use pandas) --- test/run_benchmarks.py | 535 ++++++++++++++++++++++++----------------- 1 file changed, 312 insertions(+), 223 deletions(-) diff --git a/test/run_benchmarks.py b/test/run_benchmarks.py index 598e225..7ee3a53 100755 --- a/test/run_benchmarks.py +++ b/test/run_benchmarks.py @@ -12,6 +12,15 @@ import subprocess from collections import defaultdict, namedtuple import yaml +try: + import numpy as np + import pandas as pd + import matplotlib + from matplotlib.ticker import FuncFormatter, PercentFormatter, ScalarFormatter + _summary = True +except ImportError as e: + _summary = False + print('[WARNING] Results summary not available:', e, file=sys.stderr) from litedram.common import Settings as _Settings @@ -19,24 +28,12 @@ from . import benchmark from .benchmark import LiteDRAMBenchmarkSoC, load_access_pattern -# constructs python regex named group -def ng(name, regex): - return r'(?P<{}>{})'.format(name, regex) - def center(text, width, fillc=' '): added = width - len(text) left = added // 2 right = added - left return fillc * left + text + fillc * right -def human_readable(value): - binary_prefixes = ['', 'k', 'M', 'G', 'T'] - mult = 1.0 - for prefix in binary_prefixes: - if value * mult < 1024: - break - mult /= 1024 - return mult, prefix # Benchmark configuration -------------------------------------------------------------------------- @@ -72,7 +69,7 @@ class CustomAccess(Settings): self.set_attributes(locals()) @property - def length(self): + def pattern(self): # we have to load the file to know pattern length, cache it when requested if not hasattr(self, '_pattern'): path = self.pattern_file @@ -80,7 +77,11 @@ class CustomAccess(Settings): benchmark_dir = os.path.dirname(benchmark.__file__) path = os.path.join(benchmark_dir, path) self._pattern = load_access_pattern(path) - return len(self._pattern) + return self._pattern + + @property + def length(self): + return len(self.pattern) def as_args(self): return ['--access-pattern=%s' % self.pattern_file] @@ -107,247 +108,319 @@ class BenchmarkConfiguration(Settings): def length(self): return self.access_pattern.length + @classmethod + def from_dict(cls, d): + access_cls = CustomAccess if 'pattern_file' in d['access_pattern'] else GeneratedAccess + d['access_pattern'] = access_cls(**d['access_pattern']) + return cls(**d) + @classmethod def load_yaml(cls, yaml_file): with open(yaml_file) as f: description = yaml.safe_load(f) configs = [] for name, desc in description.items(): - if 'access_pattern' in desc: - access = CustomAccess(desc.pop('access_pattern')) - else: - access = GeneratedAccess(desc.pop('bist_length'), desc.pop('bist_random')) - configs.append(cls(name, **desc, access_pattern=access)) + desc['name'] = name + configs.append(cls.from_dict(desc)) return configs def __repr__(self): return 'BenchmarkConfiguration(%s)' % self.as_dict() + @property + def soc(self): + if not hasattr(self, '_soc'): + kwargs = dict( + sdram_module=self.sdram_module, + sdram_data_width=self.sdram_data_width, + ) + if isinstance(self.access_pattern, GeneratedAccess): + kwargs['bist_length'] = self.access_pattern.bist_length + kwargs['bist_random'] = self.access_pattern.bist_random + elif isinstance(self.access_pattern, CustomAccess): + kwargs['pattern_init'] = self.access_pattern.pattern + else: + raise ValueError(self.access_pattern) + self._soc = LiteDRAMBenchmarkSoC(**kwargs) + return self._soc + # Benchmark results -------------------------------------------------------------------------------- +# constructs python regex named group +def ng(name, regex): + return r'(?P<{}>{})'.format(name, regex) + + +def _compiled_pattern(stage, var): + pattern_fmt = r'{stage}\s+{var}:\s+{value}' + pattern = pattern_fmt.format( + stage=stage, + var=var, + value=ng('value', '[0-9]+'), + ) + return re.compile(pattern) + result = re.search(pattern, benchmark_output) + + class BenchmarkResult: - def __init__(self, config, output): - self.config = config + # pre-compiled patterns for all benchmarks + patterns = { + 'generator_ticks': _compiled_pattern('BIST-GENERATOR', 'ticks'), + 'checker_errors': _compiled_pattern('BIST-CHECKER', 'errors'), + 'checker_ticks': _compiled_pattern('BIST-CHECKER', 'ticks'), + } + + @staticmethod + def find(pattern, output): + result = pattern.search(output) + assert result is not None, \ + 'Could not find pattern "%s" in output:\n%s' % (pattern, benchmark_output) + return int(result.group('value')) + + def __init__(self, output): self._output = output - self.parse_output(output) - # instantiate the benchmarked soc to check its configuration - self.benchmark_soc = LiteDRAMBenchmarkSoC(**self.config._settings) + for attr, pattern in self.patterns.items(): + setattr(self, attr, self.find(pattern, output)) - def cmd_count(self): - data_width = self.benchmark_soc.sdram.controller.interface.data_width - return self.config.bist_length / (data_width // 8) - - def clk_period(self): - clk_freq = self.benchmark_soc.sdrphy.module.clk_freq - return 1 / clk_freq - - def write_bandwidth(self): - return (8 * self.config.bist_length) / (self.generator_ticks * self.clk_period()) - - def read_bandwidth(self): - return (8 * self.config.bist_length) / (self.checker_ticks * self.clk_period()) - - def write_efficiency(self): - return self.cmd_count() / self.generator_ticks - - def read_efficiency(self): - return self.cmd_count() / self.checker_ticks - - def write_latency(self): - assert self.config.bist_length == 1, 'Not a latency benchmark' - return self.generator_ticks - - def read_latency(self): - assert self.config.bist_length == 1, 'Not a latency benchmark' - return self.checker_ticks - - def parse_output(self, output): - bist_pattern = r'{stage}\s+{var}:\s+{value}' - - def find(stage, var): - pattern = bist_pattern.format( - stage=stage, - var=var, - value=ng('value', '[0-9]+'), - ) - result = re.search(pattern, output) - assert result is not None, 'Could not find pattern in output: %s, %s' % (pattern, output) - return int(result.group('value')) - - self.generator_ticks = find('BIST-GENERATOR', 'ticks') - self.checker_errors = find('BIST-CHECKER', 'errors') - self.checker_ticks = find('BIST-CHECKER', 'ticks') - - @classmethod - def dump_results_json(cls, results, file): - """Save multiple results in a JSON file. - - Only configurations and outpits are saved, as they can be used to reconstruct BenchmarkResult. - """ - # simply use config._settings as it defines the BenchmarkConfiguration - results_raw = [(r.config._settings, r._output) for r in results] - with open(file, 'w') as f: - json.dump(results_raw, f) - - @classmethod - def load_results_json(cls, file): - """Load results from a JSON file.""" - with open(file, 'r') as f: - results_raw = json.load(f) - return [cls(BenchmarkConfiguration(**settings), output) for (settings, output) in results_raw] + def __repr__(self): + d = {attr: getattr(self, attr) for attr in self.patterns.keys()} + return 'BenchmarkResult(%s)' % d # Results summary ---------------------------------------------------------------------------------- +def human_readable(value): + binary_prefixes = ['', 'k', 'M', 'G', 'T'] + mult = 1.0 + for prefix in binary_prefixes: + if value * mult < 1024: + break + mult /= 1024 + return mult, prefix + + +def clocks_fmt(clocks): + return '{:d} clk'.format(int(clocks)) + + +def bandwidth_fmt(bw): + mult, prefix = human_readable(bw) + return '{:.1f} {}bps'.format(bw * mult, prefix) + + +def efficiency_fmt(eff): + return '{:.1f} %'.format(eff * 100) + + class ResultsSummary: - # value_scaling is a function: value -> (multiplier, prefix) - Fmt = namedtuple('MetricFormatting', ['name', 'unit', 'value_scaling']) - metric_formats = { - 'write_bandwidth': Fmt('Write bandwidth', 'bps', lambda value: human_readable(value)), - 'read_bandwidth': Fmt('Read bandwidth', 'bps', lambda value: human_readable(value)), - 'write_efficiency': Fmt('Write efficiency', '', lambda value: (100, '%')), - 'read_efficiency': Fmt('Read efficiency', '', lambda value: (100, '%')), - 'write_latency': Fmt('Write latency', 'clk', lambda value: (1, '')), - 'read_latency': Fmt('Read latency', 'clk', lambda value: (1, '')), - } + def __init__(self, run_data, plots_dir='plots'): + self.plots_dir = plots_dir - def __init__(self, results): - self.results = results + # gather results into tabular data + column_mappings = { + 'name': lambda d: d.config.name, + 'sdram_module': lambda d: d.config.sdram_module, + 'sdram_data_width': lambda d: d.config.sdram_data_width, + 'bist_length': lambda d: getattr(d.config.access_pattern, 'bist_length', None), + 'bist_random': lambda d: getattr(d.config.access_pattern, 'bist_random', None), + 'pattern_file': lambda d: getattr(d.config.access_pattern, 'pattern_file', None), + 'length': lambda d: d.config.length, + 'generator_ticks': lambda d: d.result.generator_ticks, + 'checker_errors': lambda d: d.result.checker_errors, + 'checker_ticks': lambda d: d.result.checker_ticks, + 'ctrl_data_width': lambda d: d.config.soc.sdram.controller.interface.data_width, + 'clk_freq': lambda d: d.config.soc.sdrphy.module.clk_freq, + } + columns = {name: [mapping(data) for data in run_data] for name, mapping, in column_mappings.items()} + self.df = df = pd.DataFrame(columns) - def by_metric(self, metric): - """Returns pairs of value of the given metric and the configuration used for benchmark""" - for result in self.results: - # omit the results that should not be used to calculate given metric - if result.config.bist_length == 1 and metric not in ['read_latency', 'write_latency'] \ - or result.config.bist_length != 1 and metric in ['read_latency', 'write_latency']: - continue - value = getattr(result, metric)() - yield value, result.config + # replace None with NaN + df.fillna(value=np.nan, inplace=True) - def print(self): - legend = '(module, datawidth, length, random, result)' - fmt = ' {module:15} {dwidth:2} {length:4} {random:1} {result}' + # compute other metrics based on ticks and configuration parameters + df['clk_period'] = 1 / df['clk_freq'] + df['write_bandwidth'] = (8 * df['length']) / (df['generator_ticks'] * df['clk_period']) + df['read_bandwidth'] = (8 * df['length']) / (df['checker_ticks'] * df['clk_period']) - # store formatted lines per metric - metric_lines = defaultdict(list) - for metric, (_, unit, formatter) in self.metric_formats.items(): - for value, config in self.by_metric(metric): - mult, prefix = formatter(value) - value_fmt = '{:5.1f} {}{}' if isinstance(value * mult, float) else '{:5d} {}{}' - result = value_fmt.format(value * mult, prefix, unit) - line = fmt.format(module=config.sdram_module, - dwidth=config.sdram_data_width, - length=config.bist_length, - random=int(config.bist_random), - result=result) - metric_lines[metric].append(line) + df['cmd_count'] = df['length'] / (df['ctrl_data_width'] / 8) + df['write_efficiency'] = df['cmd_count'] / df['generator_ticks'] + df['read_efficiency'] = df['cmd_count'] / df['checker_ticks'] - # find length of the longest line - max_length = max((len(l) for lines in metric_lines.values() for l in lines)) - max_length = max(max_length, len(legend) + 2) - width = max_length + 2 + df['write_latency'] = df[df['bist_length'] == 1]['generator_ticks'] + df['read_latency'] = df[df['bist_length'] == 1]['checker_ticks'] - # print the formatted summary - def header(text): - mid = center(text, width - 6, '=') - return center(mid, width, '-') - print(header(' Summary ')) - print(center(legend, width)) - for metric, lines in metric_lines.items(): - print(center(self.metric_formats[metric].name, width)) - for line in lines: - print(line) - print(header('')) + # boolean distinction between latency benchmarks and sequence benchmarks, + # as thier results differ significanly + df['is_latency'] = ~pd.isna(df['write_latency']) + assert (df['is_latency'] == ~pd.isna(df['read_latency'])).all(), \ + 'write_latency and read_latency should both have a value or both be NaN' - def plot(self, output_dir, backend='Agg', theme='default', save_format='png', **savefig_kwargs): - """Create plots with benchmark results summary + # data formatting for text summary + self.text_formatters = { + 'write_bandwidth': bandwidth_fmt, + 'read_bandwidth': bandwidth_fmt, + 'write_efficiency': efficiency_fmt, + 'read_efficiency': efficiency_fmt, + 'write_latency': clocks_fmt, + 'read_latency': clocks_fmt, + } - Default backend is Agg, which is non-GUI backed and only allows - to save figures as files. If a GUI backed is passed, plt.show() - will be called at the end. - """ - # import locally here to be able to run benchmarks without installing matplotlib - import matplotlib - matplotlib.use(backend) - - import matplotlib.pyplot as plt - import numpy as np - from matplotlib.ticker import FuncFormatter, PercentFormatter, ScalarFormatter - - plt.style.use(theme) - - def bandwidth_formatter_func(value, pos): - mult, prefix = human_readable(value) - return '{:.1f}{}bps'.format(value * mult, prefix) - - tick_formatters = { - 'write_bandwidth': FuncFormatter(bandwidth_formatter_func), - 'read_bandwidth': FuncFormatter(bandwidth_formatter_func), + # data formatting for plot summary + self.plot_xticks_formatters = { + 'write_bandwidth': FuncFormatter(lambda value, pos: bandwidth_fmt(value)), + 'read_bandwidth': FuncFormatter(lambda value, pos: bandwidth_fmt(value)), 'write_efficiency': PercentFormatter(1.0), 'read_efficiency': PercentFormatter(1.0), 'write_latency': ScalarFormatter(), 'read_latency': ScalarFormatter(), } - def config_tick_name(config): - return '{}\n{}, {}, {}'.format(config.sdram_module, config.sdram_data_width, - config.bist_length, int(config.bist_random)) + def print_df(self, title, df): + # make sure all data will be shown + with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None): + print('===> {}:'.format(title)) + print(df) - for metric, (name, unit, _) in self.metric_formats.items(): - fig = plt.figure() - axis = plt.gca() + def get_summary(self, mask=None, columns=None, column_formatting=None, sort_kwargs=None): + # work on a copy + df = self.df.copy() - values, configs = zip(*self.by_metric(metric)) - ticks = np.arange(len(configs)) + if sort_kwargs is not None: + df = df.sort_values(**sort_kwargs) - axis.barh(ticks, values, align='center') - axis.set_yticks(ticks) - axis.set_yticklabels([config_tick_name(c) for c in configs]) - axis.invert_yaxis() - axis.xaxis.set_major_formatter(tick_formatters[metric]) - axis.xaxis.set_tick_params(rotation=30) - axis.grid(True) - axis.spines['top'].set_visible(False) - axis.spines['right'].set_visible(False) - axis.set_axisbelow(True) + if column_formatting is not None: + for column, mapping in column_formatting.items(): + old = '_{}'.format(column) + df[old] = df[column].copy() + df[column] = df[column].map(lambda value: mapping(value) if not pd.isna(value) else value) - # force xmax to 100% - if metric in ['write_efficiency', 'read_efficiency']: - axis.set_xlim(right=1.0) + df = df[mask] if mask is not None else df + df = df[columns] if columns is not None else df - title = self.metric_formats[metric].name - axis.set_title(title, fontsize=12) + return df - plt.tight_layout() - filename = '{}.{}'.format(metric, save_format) - fig.savefig(os.path.join(output_dir, filename), **savefig_kwargs) + def text_summary(self): + for title, df in self.groupped_results(): + self.print_df(title, df) + print() + + def groupped_results(self, formatted=True): + df = self.df + + formatters = self.text_formatters if formatted else {} + + common_columns = ['name', 'sdram_module', 'sdram_data_width'] + latency_columns = ['write_latency', 'read_latency'] + performance_columns = ['write_bandwidth', 'read_bandwidth', 'write_efficiency', 'read_efficiency'] + + yield 'Latency', self.get_summary( + mask=df['is_latency'] == True, + columns=common_columns + latency_columns, + column_formatting=formatters, + ) + # yield 'Any access pattern', self.get_summary( + # mask=(df['is_latency'] == False), + # columns=common_columns + performance_columns + ['length', 'bist_random', 'pattern_file'], + # column_formatting=self.text_formatters, + # **kwargs, + # ), + yield 'Custom access pattern', self.get_summary( + mask=(df['is_latency'] == False) & (~pd.isna(df['pattern_file'])), + columns=common_columns + performance_columns + ['length', 'pattern_file'], + column_formatting=formatters, + ), + yield 'Sequential access pattern', self.get_summary( + mask=(df['is_latency'] == False) & (pd.isna(df['pattern_file'])) & (df['bist_random'] == False), + columns=common_columns + performance_columns + ['bist_length'], # could be length + column_formatting=formatters, + ), + yield 'Random access pattern', self.get_summary( + mask=(df['is_latency'] == False) & (pd.isna(df['pattern_file'])) & (df['bist_random'] == True), + columns=common_columns + performance_columns + ['bist_length'], + column_formatting=formatters, + ), + + def plot_summary(self, plots_dir='plots', backend='Agg', theme='default', save_format='png', **savefig_kw): + matplotlib.use(backend) + import matplotlib.pyplot as plt + plt.style.use(theme) + + for title, df in self.groupped_results(formatted=False): + for column in self.plot_xticks_formatters.keys(): + if column not in df.columns or df[column].empty: + continue + axis = self.plot_df(title, df, column) + + # construct path + def path_name(name): + return name.lower().replace(' ', '_') + + filename = '{}.{}'.format(path_name(column), save_format) + path = os.path.join(plots_dir, path_name(title), filename) + os.makedirs(os.path.dirname(path), exist_ok=True) + + # save figure + axis.get_figure().savefig(path, **savefig_kw) if backend != 'Agg': plt.show() + def plot_df(self, title, df, column, save_format='png', save_filename=None): + if save_filename is None: + save_filename = os.path.join(self.plots_dir, title.lower().replace(' ', '_')) + + axis = df.plot(kind='barh', x='name', y=column, title=title, grid=True, legend=False) + if column in self.plot_xticks_formatters: + axis.xaxis.set_major_formatter(self.plot_xticks_formatters[column]) + axis.xaxis.set_tick_params(rotation=15) + axis.spines['top'].set_visible(False) + axis.spines['right'].set_visible(False) + axis.set_axisbelow(True) + + # # force xmax to 100% + # if column in ['write_efficiency', 'read_efficiency']: + # axis.set_xlim(right=1.0) + + return axis + # Run ---------------------------------------------------------------------------------------------- -def run_benchmark(cmd_args): - # run as separate process, because else we cannot capture all output from verilator - benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py') - command = ['python3', benchmark_script, *cmd_args] - proc = subprocess.run(command, stdout=subprocess.PIPE) +class RunCache(list): + RunData = namedtuple('RunData', ['config', 'result']) + + def dump_json(self, filename): + json_data = [{'config': data.config.as_dict(), 'output': data.result._output} for data in self] + with open(filename, 'w') as f: + json.dump(json_data, f) + + @classmethod + def load_json(cls, filename): + with open(filename, 'r') as f: + json_data = json.load(f) + loaded = [] + for data in json_data: + config = BenchmarkConfiguration.from_dict(data['config']) + result = BenchmarkResult(data['output']) + loaded.append(cls.RunData(config=config, result=result)) + return loaded + + +def run_python(script, args): + command = ['python3', script, *args] + proc = subprocess.run(command, stdout=subprocess.PIPE, cwd=os.path.dirname(script)) return str(proc.stdout) -def run_benchmarks(configurations): - results = [] - for name, config in configurations.items(): - cmd_args = config.as_args() - print('{}: {}'.format(name, ' '.join(cmd_args))) - output = run_benchmark(cmd_args) - # exit if checker had any read error - result = BenchmarkResult(config, output) - if result.checker_errors != 0: - print('Error during benchmark "{}": checker_errors = {}'.format( - name, result.checker_errors), file=sys.stderr) - sys.exit(1) - results.append(result) - return results +def run_benchmark(config): + benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py') + # run as separate process, because else we cannot capture all output from verilator + output = run_python(benchmark_script, config.as_args()) + result = BenchmarkResult(output) + # exit if checker had any read error + if result.checker_errors != 0: + raise RuntimeError('Error during benchmark: checker_errors = {}, args = {}'.format( + result.checker_errors, args + )) + return result def main(argv=None): @@ -363,6 +436,7 @@ def main(argv=None): parser.add_argument('--plot-transparent', action='store_true', help='Use transparent background when saving plots') parser.add_argument('--plot-output-dir', default='plots', help='Specify where to save the plots') parser.add_argument('--plot-theme', default='default', help='Use different matplotlib theme') + parser.add_argument('--ignore-failures', action='store_true', help='Ignore failuers during benchmarking, continue using successful runs only') parser.add_argument('--results-cache', help="""Use given JSON file as results cache. If the file exists, it will be loaded instead of running actual benchmarks, else benchmarks will be run normally, and then saved @@ -370,6 +444,11 @@ def main(argv=None): to generate different summary without having to rerun benchmarks.""") args = parser.parse_args(argv) + if not args.results_cache and not _summary: + print('Summary not available and not running with --results-cache - run would not produce any results! Aborting.', + file=sys.stderr) + sys.exit(1) + # load and filter configurations configurations = BenchmarkConfiguration.load_yaml(args.config) filters = { @@ -382,31 +461,41 @@ def main(argv=None): configurations = filter(f, configurations) configurations = list(configurations) - cache_exists = args.results_cache and os.path.isfile(args.results_cache) - # load outputs from cache if it exsits + cache_exists = args.results_cache and os.path.isfile(args.results_cache) if args.results_cache and cache_exists: - cached_results = BenchmarkResult.load_results_json(args.results_cache) + cache = RunCache.load_json(args.results_cache) + # take only those that match configurations - results = [r for r in cached_results if r.config in configurations.values()] + names_to_load = [c.name for c in configurations] + run_data = [data for data in cache if data.config.name in names_to_load] else: # run all the benchmarks normally - results = run_benchmarks(configurations) + run_data = [] + for config in configurations: + print(' {}: {}'.format(config.name, ' '.join(config.as_args()))) + try: + run_data.append(RunCache.RunData(config, run_benchmark(config))) + except: + if not args.ignore_failures: + raise # store outputs in cache if args.results_cache and not cache_exists: - BenchmarkResult.dump_results_json(results, args.results_cache) + cache = RunCache(run_data) + cache.dump_json(args.results_cache) - # display the summary - summary = ResultsSummary(results) - summary.print() - if args.plot: - if not os.path.isdir(args.plot_output_dir): - os.makedirs(args.plot_output_dir) - summary.plot(args.plot_output_dir, - backend=args.plot_backend, - theme=args.plot_theme, - save_format=args.plot_format, - transparent=args.plot_transparent) + # display summary + if _summary: + summary = ResultsSummary(run_data) + summary.text_summary() + if args.plot: + summary.plot_summary( + plots_dir=args.plot_output_dir, + backend=args.plot_backend, + theme=args.plot_theme, + save_format=args.plot_format, + transparent=args.plot_transparent, + ) if __name__ == "__main__":