From a40817f3a80d7c3c7ce725857e9a935e6f86aa01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C4=99drzej=20Boczar?= <jboczar@antmicro.com>
Date: Fri, 31 Jan 2020 12:57:22 +0100
Subject: [PATCH 1/3] test: add plotting of benchmark results

---
 test/run_benchmarks.py | 209 +++++++++++++++++++++++++++++------------
 1 file changed, 149 insertions(+), 60 deletions(-)

diff --git a/test/run_benchmarks.py b/test/run_benchmarks.py
index 0eb7440..81afe84 100755
--- a/test/run_benchmarks.py
+++ b/test/run_benchmarks.py
@@ -5,9 +5,11 @@
 
 import os
 import re
-import yaml
 import argparse
 import subprocess
+from collections import defaultdict, namedtuple
+
+import yaml
 
 from litedram.common import Settings
 
@@ -18,14 +20,20 @@ from .benchmark import LiteDRAMBenchmarkSoC
 def ng(name, regex):
     return r'(?P<{}>{})'.format(name, regex)
 
+def center(text, width, fillc=' '):
+    added = width - len(text)
+    left = added // 2
+    right = added - left
+    return fillc * left + text + fillc * right
 
 def human_readable(value):
     binary_prefixes = ['', 'k', 'M', 'G', 'T']
+    mult = 1.0
     for prefix in binary_prefixes:
-        if value < 1024:
+        if value * mult < 1024:
             break
-        value /= 1024
-    return value, prefix
+        mult /= 1024
+    return mult, prefix
 
 # Benchmark configuration --------------------------------------------------------------------------
 
@@ -101,61 +109,146 @@ class BenchmarkResult:
 # Results summary ----------------------------------------------------------------------------------
 
 class ResultsSummary:
+    # value_scaling is a function: value -> (multiplier, prefix)
+    Fmt = namedtuple('MetricFormatting', ['name', 'unit', 'value_scaling'])
+    metric_formats = {
+        'write_bandwidth':  Fmt('Write bandwidth',  'bps', lambda value: human_readable(value)),
+        'read_bandwidth':   Fmt('Read bandwidth',   'bps', lambda value: human_readable(value)),
+        'write_efficiency': Fmt('Write efficiency', '',    lambda value: (100, '%')),
+        'read_efficiency':  Fmt('Read efficiency',  '',    lambda value: (100, '%')),
+    }
+
     def __init__(self, results):
         self.results = results
-        # convert results, which map config->metrics to a mapping metric->(config->result)
-        self.write_bandwidth = self.collect('write_bandwidth')
-        self.read_bandwidth = self.collect('read_bandwidth')
-        self.write_efficiency = self.collect('write_efficiency')
-        self.read_efficiency = self.collect('read_efficiency')
 
-    def create_name(self, config):
-        return '{}:{}:{}:{}'.format(
-            config.sdram_module, config.sdram_data_width,
-            config.bist_length, config.bist_random)
-
-    def collect(self, attribute):
-        by_case = {}
+    def by_metric(self, metric):
+        """Returns pairs of value of the given metric and the configuration used for benchmark"""
         for result in self.results:
-            value = getattr(result, attribute)()
-            by_case[self.create_name(result.config)] = value
-        return by_case
-
-    def value_string(self, metric, value):
-        if metric in ['write_bandwidth', 'read_bandwidth']:
-            return '{:6.3f} {}bps'.format(*human_readable(value))
-        elif ['write_efficiency', 'read_efficiency']:
-            return '{:5.1f} %'.format(100 * value)
-        else:
-            raise ValueError()
+            value = getattr(result, metric)()
+            yield value, result.config
 
     def print(self):
-        print('\n---====== Summary ======---')
-        for metric in ['write_bandwidth', 'read_bandwidth', 'write_efficiency', 'read_efficiency']:
-            print(metric)
-            for case, value in getattr(self, metric).items():
-                print('  {:30}  {}'.format(case, self.value_string(metric, value)))
+        legend = '(module, datawidth, length, random, result)'
+        fmt = '   {module:15}  {dwidth:2}  {length:4}  {random:1}    {result}'
 
-    def plot(self):
-        raise NotImplementedError()
+        # store formatted lines per metric
+        metric_lines = defaultdict(list)
+        for metric, (_, unit, formatter) in self.metric_formats.items():
+            for value, config in self.by_metric(metric):
+                mult, prefix = formatter(value)
+                result = '{:5.1f} {}{}'.format(value * mult, prefix, unit)
+                line = fmt.format(module=config.sdram_module,
+                                  dwidth=config.sdram_data_width,
+                                  length=config.bist_length,
+                                  random=int(config.bist_random),
+                                  result=result)
+                metric_lines[metric].append(line)
+
+        # find length of the longest line
+        max_length = max((len(l) for lines in metric_lines.values() for l in lines))
+        max_length = max(max_length, len(legend) + 2)
+        width = max_length + 2
+
+        # print the formatted summary
+        def header(text):
+            mid = center(text, width - 6, '=')
+            return center(mid, width, '-')
+        print(header(' Summary '))
+        print(center(legend, width))
+        for metric, lines in metric_lines.items():
+            print(center(self.metric_formats[metric].name, width))
+            for line in lines:
+                print(line)
+        print(header(''))
+
+    def plot(self, output_dir, backend='Agg', theme='default', save_format='png', **savefig_kwargs):
+        """Create plots with benchmark results summary
+
+        Default backend is Agg, which is non-GUI backed and only allows
+        to save figures as files. If a GUI backed is passed, plt.show()
+        will be called at the end.
+        """
+        # import locally here to be able to run benchmarks without installing matplotlib
+        import matplotlib
+        matplotlib.use(backend)
+
+        import matplotlib.pyplot as plt
+        import numpy as np
+        from matplotlib.ticker import FuncFormatter, PercentFormatter
+
+        plt.style.use(theme)
+
+        def bandwidth_formatter_func(value, pos):
+            mult, prefix = human_readable(value)
+            return '{:.1f}{}bps'.format(value * mult, prefix)
+
+        tick_formatters = {
+            'write_bandwidth':  FuncFormatter(bandwidth_formatter_func),
+            'read_bandwidth':   FuncFormatter(bandwidth_formatter_func),
+            'write_efficiency': PercentFormatter(1.0),
+            'read_efficiency':  PercentFormatter(1.0),
+        }
+
+        def config_tick_name(config):
+            return '{}\n{}, {}, {}'.format(config.sdram_module, config.sdram_data_width,
+                                         config.bist_length, int(config.bist_random))
+
+        for metric, (name, unit, _) in self.metric_formats.items():
+            fig = plt.figure()
+            axis = plt.gca()
+
+            values, configs = zip(*self.by_metric(metric))
+            ticks = np.arange(len(configs))
+
+            axis.barh(ticks, values, align='center')
+            axis.set_yticks(ticks)
+            axis.set_yticklabels([config_tick_name(c) for c in configs])
+            axis.invert_yaxis()
+            axis.xaxis.set_major_formatter(tick_formatters[metric])
+            axis.xaxis.set_tick_params(rotation=30)
+            axis.grid(True)
+            axis.spines['top'].set_visible(False)
+            axis.spines['right'].set_visible(False)
+            axis.set_axisbelow(True)
+
+            # force xmax to 100%
+            if metric in ['write_efficiency', 'read_efficiency']:
+                axis.set_xlim(right=1.0)
+
+            title = self.metric_formats[metric].name
+            axis.set_title(title, fontsize=12)
+
+            plt.tight_layout()
+            filename = '{}.{}'.format(metric, save_format)
+            fig.savefig(os.path.join(output_dir, filename), **savefig_kwargs)
+
+        if backend != 'Agg':
+            plt.show()
 
 # Run ----------------------------------------------------------------------------------------------
 
-def run_benchmark(args):
+def run_benchmark(cmd_args):
+    # run as separate process, because else we cannot capture all output from verilator
     benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py')
-    command = ['python3', benchmark_script, *args]
+    command = ['python3', benchmark_script, *cmd_args]
     proc = subprocess.run(command, capture_output=True, text=True, check=True)
     return proc.stdout
 
 
-def main():
+def main(argv=None):
     parser = argparse.ArgumentParser(
-        description='Run LiteDRAM benchmarks and collect the results')
-    parser.add_argument('--yaml', required=True, help='Load benchmark configurations from YAML file')
-    parser.add_argument('--names', nargs='*', help='Limit benchmarks to given names')
-    parser.add_argument('--regex', help='Limit benchmarks to names matching the regex')
-    parser.add_argument('--not-regex', help='Limit benchmarks to names not matching the regex')
-    args = parser.parse_args()
+        description='Run LiteDRAM benchmarks and collect the results.')
+    parser.add_argument('--yaml',             required=True,       help='Load benchmark configurations from YAML file')
+    parser.add_argument('--names',            nargs='*',           help='Limit benchmarks to given names')
+    parser.add_argument('--regex',                                 help='Limit benchmarks to names matching the regex')
+    parser.add_argument('--not-regex',                             help='Limit benchmarks to names not matching the regex')
+    parser.add_argument('--plot',             action='store_true', help='Generate plots with results summary')
+    parser.add_argument('--plot-format',      default='png',       help='Specify plots file format (default=png)')
+    parser.add_argument('--plot-backend',     default='Agg',       help='Optionally specify matplotlib GUI backend')
+    parser.add_argument('--plot-transparent', action='store_true', help='Use transparent background when saving plots')
+    parser.add_argument('--plot-output-dir',  default='plots',     help='Specify where to save the plots')
+    parser.add_argument('--plot-theme',       default='default',   help='Use different matplotlib theme')
+    args = parser.parse_args(argv)
 
     # load and filter configurations
     configurations = BenchmarkConfiguration.load_yaml(args.yaml)
@@ -172,26 +265,22 @@ def main():
     # run the benchmarks
     results = []
     for name, config in configurations.items():
-        args = config.as_args()
-        print('{}: {}'.format(name, ' '.join(args)))
-
-        result = BenchmarkResult(config, run_benchmark(args))
-        results.append(result)
-
-        print("""\
-  write_bandwidth  = {:6.3f} {}bps
-  read_bandwidth   = {:6.3f} {}bps
-  write_efficiency = {:6.2f} %
-  read_efficiency  = {:6.2f} %
-        """.rstrip().format(
-            *human_readable(result.write_bandwidth()),
-            *human_readable(result.read_bandwidth()),
-            result.write_efficiency() * 100,
-            result.read_efficiency() * 100,
-        ))
+        cmd_args = config.as_args()
+        print('{}: {}'.format(name, ' '.join(cmd_args)))
+        results.append(BenchmarkResult(config, run_benchmark(cmd_args)))
 
+    # display the summary
     summary = ResultsSummary(results)
     summary.print()
+    if args.plot:
+        if not os.path.isdir(args.plot_output_dir):
+            os.makedirs(args.plot_output_dir)
+        summary.plot(args.plot_output_dir,
+                     backend=args.plot_backend,
+                     theme=args.plot_theme,
+                     save_format=args.plot_format,
+                     transparent=args.plot_transparent)
+
 
 if __name__ == "__main__":
     main()

From 1a517a308d019379a3c40347d424fc0e1feb06af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C4=99drzej=20Boczar?= <jboczar@antmicro.com>
Date: Fri, 31 Jan 2020 14:16:39 +0100
Subject: [PATCH 2/3] test: cache benchmark results to be able to produce
 multiple summaries

---
 test/run_benchmarks.py | 43 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/test/run_benchmarks.py b/test/run_benchmarks.py
index 81afe84..bfc6d04 100755
--- a/test/run_benchmarks.py
+++ b/test/run_benchmarks.py
@@ -40,7 +40,7 @@ def human_readable(value):
 class BenchmarkConfiguration(Settings):
     def __init__(self, sdram_module, sdram_data_width, bist_length, bist_random):
         self.set_attributes(locals())
-        self._settings = {k: v for k, v in locals().items() if v != self}
+        self._settings = {k: v for k, v in locals().items() if k != 'self'}
 
     def as_args(self):
         args = []
@@ -53,6 +53,12 @@ class BenchmarkConfiguration(Settings):
                 args.extend([arg_string, str(value)])
         return args
 
+    def __eq__(self, other):
+        if not isinstance(other, BenchmarkConfiguration):
+            return NotImplemented
+        return all((getattr(self, setting) == getattr(other, setting)
+                    for setting in self._settings.keys()))
+
     @classmethod
     def load_yaml(cls, yaml_file):
         with open(yaml_file) as f:
@@ -235,6 +241,16 @@ def run_benchmark(cmd_args):
     return proc.stdout
 
 
+def run_benchmarks(configurations):
+    benchmarks = []
+    for name, config in configurations.items():
+        cmd_args = config.as_args()
+        print('{}: {}'.format(name, ' '.join(cmd_args)))
+        output = run_benchmark(cmd_args)
+        benchmarks.append((config, output))
+    return benchmarks
+
+
 def main(argv=None):
     parser = argparse.ArgumentParser(
         description='Run LiteDRAM benchmarks and collect the results.')
@@ -248,6 +264,7 @@ def main(argv=None):
     parser.add_argument('--plot-transparent', action='store_true', help='Use transparent background when saving plots')
     parser.add_argument('--plot-output-dir',  default='plots',     help='Specify where to save the plots')
     parser.add_argument('--plot-theme',       default='default',   help='Use different matplotlib theme')
+    parser.add_argument('--output-cache',                          help='Cache benchmark outputs to given file if it exists, else load them from the file without running benchmarks. This allows to run the script multiple times to produce different outputs from the same run')
     args = parser.parse_args(argv)
 
     # load and filter configurations
@@ -262,14 +279,26 @@ def main(argv=None):
     for f in filters:
         configurations = dict(filter(f, configurations.items()))
 
-    # run the benchmarks
-    results = []
-    for name, config in configurations.items():
-        cmd_args = config.as_args()
-        print('{}: {}'.format(name, ' '.join(cmd_args)))
-        results.append(BenchmarkResult(config, run_benchmark(cmd_args)))
+    cache_exists = args.output_cache and os.path.isfile(args.output_cache)
+
+    # load outputs from cache if it exsits
+    if args.output_cache and cache_exists:
+        import pickle
+        with open(args.output_cache, 'rb') as f:
+            cached_benchmarks = pickle.load(f)
+        # take only those that match configurations
+        benchmarks = [(c, o) for c, o in cached_benchmarks if c in configurations.values()]
+    else:  # run all the benchmarks normally
+        benchmarks = run_benchmarks(configurations)
+
+    # store outputs in cache
+    if args.output_cache and not cache_exists:
+        import pickle
+        with open(args.output_cache, 'wb') as f:
+            pickle.dump(benchmarks, f, pickle.HIGHEST_PROTOCOL)
 
     # display the summary
+    results = [BenchmarkResult(config, output) for config, output in benchmarks]
     summary = ResultsSummary(results)
     summary.print()
     if args.plot:

From 811c73254b6bac0df780fd29cd265b2dcbdccc2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C4=99drzej=20Boczar?= <jboczar@antmicro.com>
Date: Fri, 31 Jan 2020 15:16:37 +0100
Subject: [PATCH 3/3] test: benchmark script exits with error on any checker
 error

---
 test/run_benchmarks.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/run_benchmarks.py b/test/run_benchmarks.py
index bfc6d04..da8a6cd 100755
--- a/test/run_benchmarks.py
+++ b/test/run_benchmarks.py
@@ -5,6 +5,7 @@
 
 import os
 import re
+import sys
 import argparse
 import subprocess
 from collections import defaultdict, namedtuple
@@ -247,7 +248,16 @@ def run_benchmarks(configurations):
         cmd_args = config.as_args()
         print('{}: {}'.format(name, ' '.join(cmd_args)))
         output = run_benchmark(cmd_args)
+
+        # return raw outputs, not BenchmarkResult so that we can store them in a file
         benchmarks.append((config, output))
+
+        # exit if checker had any read error
+        result = BenchmarkResult(config, output)
+        if result.checker_errors != 0:
+            print('Error during benchmark "{}": checker_errors = {}'.format(
+                name, result.checker_errors), file=sys.stderr)
+            sys.exit(1)
     return benchmarks