diff --git a/test/run_benchmarks.py b/test/run_benchmarks.py
index 598e225..7ee3a53 100755
--- a/test/run_benchmarks.py
+++ b/test/run_benchmarks.py
@@ -12,6 +12,15 @@ import subprocess
 from collections import defaultdict, namedtuple
 
 import yaml
+try:
+    import numpy as np
+    import pandas as pd
+    import matplotlib
+    from matplotlib.ticker import FuncFormatter, PercentFormatter, ScalarFormatter
+    _summary = True
+except ImportError as e:
+    _summary = False
+    print('[WARNING] Results summary not available:', e, file=sys.stderr)
 
 from litedram.common import Settings as _Settings
 
@@ -19,24 +28,12 @@ from . import benchmark
 from .benchmark import LiteDRAMBenchmarkSoC, load_access_pattern
 
 
-# constructs python regex named group
-def ng(name, regex):
-    return r'(?P<{}>{})'.format(name, regex)
-
 def center(text, width, fillc=' '):
     added = width - len(text)
     left = added // 2
     right = added - left
     return fillc * left + text + fillc * right
 
-def human_readable(value):
-    binary_prefixes = ['', 'k', 'M', 'G', 'T']
-    mult = 1.0
-    for prefix in binary_prefixes:
-        if value * mult < 1024:
-            break
-        mult /= 1024
-    return mult, prefix
 
 # Benchmark configuration --------------------------------------------------------------------------
 
@@ -72,7 +69,7 @@ class CustomAccess(Settings):
         self.set_attributes(locals())
 
     @property
-    def length(self):
+    def pattern(self):
         # we have to load the file to know pattern length, cache it when requested
         if not hasattr(self, '_pattern'):
             path = self.pattern_file
@@ -80,7 +77,11 @@ class CustomAccess(Settings):
                 benchmark_dir = os.path.dirname(benchmark.__file__)
                 path = os.path.join(benchmark_dir, path)
             self._pattern = load_access_pattern(path)
-        return len(self._pattern)
+        return self._pattern
+
+    @property
+    def length(self):
+        return len(self.pattern)
 
     def as_args(self):
         return ['--access-pattern=%s' % self.pattern_file]
@@ -107,247 +108,319 @@ class BenchmarkConfiguration(Settings):
     def length(self):
         return self.access_pattern.length
 
+    @classmethod
+    def from_dict(cls, d):
+        access_cls = CustomAccess if 'pattern_file' in d['access_pattern'] else GeneratedAccess
+        d['access_pattern'] = access_cls(**d['access_pattern'])
+        return cls(**d)
+
     @classmethod
     def load_yaml(cls, yaml_file):
         with open(yaml_file) as f:
             description = yaml.safe_load(f)
         configs = []
         for name, desc in description.items():
-            if 'access_pattern' in desc:
-                access = CustomAccess(desc.pop('access_pattern'))
-            else:
-                access = GeneratedAccess(desc.pop('bist_length'), desc.pop('bist_random'))
-            configs.append(cls(name, **desc, access_pattern=access))
+            desc['name'] = name
+            configs.append(cls.from_dict(desc))
         return configs
 
     def __repr__(self):
         return 'BenchmarkConfiguration(%s)' % self.as_dict()
 
+    @property
+    def soc(self):
+        if not hasattr(self, '_soc'):
+            kwargs = dict(
+                sdram_module=self.sdram_module,
+                sdram_data_width=self.sdram_data_width,
+            )
+            if isinstance(self.access_pattern, GeneratedAccess):
+                kwargs['bist_length'] = self.access_pattern.bist_length
+                kwargs['bist_random'] = self.access_pattern.bist_random
+            elif isinstance(self.access_pattern, CustomAccess):
+                kwargs['pattern_init'] = self.access_pattern.pattern
+            else:
+                raise ValueError(self.access_pattern)
+            self._soc = LiteDRAMBenchmarkSoC(**kwargs)
+        return self._soc
+
 # Benchmark results --------------------------------------------------------------------------------
 
+# constructs python regex named group
+def ng(name, regex):
+    return r'(?P<{}>{})'.format(name, regex)
+
+
+def _compiled_pattern(stage, var):
+    pattern_fmt = r'{stage}\s+{var}:\s+{value}'
+    pattern = pattern_fmt.format(
+        stage=stage,
+        var=var,
+        value=ng('value', '[0-9]+'),
+    )
+    return re.compile(pattern)
+    result = re.search(pattern, benchmark_output)
+
+
 class BenchmarkResult:
-    def __init__(self, config, output):
-        self.config = config
+    # pre-compiled patterns for all benchmarks
+    patterns = {
+        'generator_ticks': _compiled_pattern('BIST-GENERATOR', 'ticks'),
+        'checker_errors': _compiled_pattern('BIST-CHECKER', 'errors'),
+        'checker_ticks': _compiled_pattern('BIST-CHECKER', 'ticks'),
+    }
+
+    @staticmethod
+    def find(pattern, output):
+        result = pattern.search(output)
+        assert result is not None, \
+            'Could not find pattern "%s" in output:\n%s' % (pattern, benchmark_output)
+        return int(result.group('value'))
+
+    def __init__(self, output):
         self._output = output
-        self.parse_output(output)
-        # instantiate the benchmarked soc to check its configuration
-        self.benchmark_soc = LiteDRAMBenchmarkSoC(**self.config._settings)
+        for attr, pattern in self.patterns.items():
+            setattr(self, attr, self.find(pattern, output))
 
-    def cmd_count(self):
-        data_width = self.benchmark_soc.sdram.controller.interface.data_width
-        return self.config.bist_length / (data_width // 8)
-
-    def clk_period(self):
-        clk_freq = self.benchmark_soc.sdrphy.module.clk_freq
-        return 1 / clk_freq
-
-    def write_bandwidth(self):
-        return (8 * self.config.bist_length) / (self.generator_ticks * self.clk_period())
-
-    def read_bandwidth(self):
-        return (8 * self.config.bist_length) / (self.checker_ticks * self.clk_period())
-
-    def write_efficiency(self):
-        return self.cmd_count() / self.generator_ticks
-
-    def read_efficiency(self):
-        return self.cmd_count() / self.checker_ticks
-
-    def write_latency(self):
-        assert self.config.bist_length == 1, 'Not a latency benchmark'
-        return self.generator_ticks
-
-    def read_latency(self):
-        assert self.config.bist_length == 1, 'Not a latency benchmark'
-        return self.checker_ticks
-
-    def parse_output(self, output):
-        bist_pattern = r'{stage}\s+{var}:\s+{value}'
-
-        def find(stage, var):
-            pattern = bist_pattern.format(
-                stage=stage,
-                var=var,
-                value=ng('value', '[0-9]+'),
-            )
-            result = re.search(pattern, output)
-            assert result is not None, 'Could not find pattern in output: %s, %s' % (pattern, output)
-            return int(result.group('value'))
-
-        self.generator_ticks = find('BIST-GENERATOR', 'ticks')
-        self.checker_errors = find('BIST-CHECKER', 'errors')
-        self.checker_ticks = find('BIST-CHECKER', 'ticks')
-
-    @classmethod
-    def dump_results_json(cls, results, file):
-        """Save multiple results in a JSON file.
-
-        Only configurations and outpits are saved, as they can be used to reconstruct BenchmarkResult.
-        """
-        # simply use config._settings as it defines the BenchmarkConfiguration
-        results_raw = [(r.config._settings, r._output) for r in results]
-        with open(file, 'w') as f:
-            json.dump(results_raw, f)
-
-    @classmethod
-    def load_results_json(cls, file):
-        """Load results from a JSON file."""
-        with open(file, 'r') as f:
-            results_raw = json.load(f)
-        return [cls(BenchmarkConfiguration(**settings), output) for (settings, output) in results_raw]
+    def __repr__(self):
+        d = {attr: getattr(self, attr) for attr in self.patterns.keys()}
+        return 'BenchmarkResult(%s)' % d
 
 # Results summary ----------------------------------------------------------------------------------
 
+def human_readable(value):
+    binary_prefixes = ['', 'k', 'M', 'G', 'T']
+    mult = 1.0
+    for prefix in binary_prefixes:
+        if value * mult < 1024:
+            break
+        mult /= 1024
+    return mult, prefix
+
+
+def clocks_fmt(clocks):
+    return '{:d} clk'.format(int(clocks))
+
+
+def bandwidth_fmt(bw):
+    mult, prefix = human_readable(bw)
+    return '{:.1f} {}bps'.format(bw * mult, prefix)
+
+
+def efficiency_fmt(eff):
+    return '{:.1f} %'.format(eff * 100)
+
+
 class ResultsSummary:
-    # value_scaling is a function: value -> (multiplier, prefix)
-    Fmt = namedtuple('MetricFormatting', ['name', 'unit', 'value_scaling'])
-    metric_formats = {
-        'write_bandwidth':  Fmt('Write bandwidth',  'bps', lambda value: human_readable(value)),
-        'read_bandwidth':   Fmt('Read bandwidth',   'bps', lambda value: human_readable(value)),
-        'write_efficiency': Fmt('Write efficiency', '',    lambda value: (100, '%')),
-        'read_efficiency':  Fmt('Read efficiency',  '',    lambda value: (100, '%')),
-        'write_latency':    Fmt('Write latency',    'clk', lambda value: (1, '')),
-        'read_latency':     Fmt('Read latency',     'clk', lambda value: (1, '')),
-    }
+    def __init__(self, run_data, plots_dir='plots'):
+        self.plots_dir = plots_dir
 
-    def __init__(self, results):
-        self.results = results
+        # gather results into tabular data
+        column_mappings = {
+            'name':             lambda d: d.config.name,
+            'sdram_module':     lambda d: d.config.sdram_module,
+            'sdram_data_width': lambda d: d.config.sdram_data_width,
+            'bist_length':      lambda d: getattr(d.config.access_pattern, 'bist_length', None),
+            'bist_random':      lambda d: getattr(d.config.access_pattern, 'bist_random', None),
+            'pattern_file':     lambda d: getattr(d.config.access_pattern, 'pattern_file', None),
+            'length':           lambda d: d.config.length,
+            'generator_ticks':  lambda d: d.result.generator_ticks,
+            'checker_errors':   lambda d: d.result.checker_errors,
+            'checker_ticks':    lambda d: d.result.checker_ticks,
+            'ctrl_data_width':  lambda d: d.config.soc.sdram.controller.interface.data_width,
+            'clk_freq':         lambda d: d.config.soc.sdrphy.module.clk_freq,
+        }
+        columns = {name: [mapping(data) for data in run_data] for name, mapping, in column_mappings.items()}
+        self.df = df = pd.DataFrame(columns)
 
-    def by_metric(self, metric):
-        """Returns pairs of value of the given metric and the configuration used for benchmark"""
-        for result in self.results:
-            # omit the results that should not be used to calculate given metric
-            if result.config.bist_length == 1 and metric not in ['read_latency', 'write_latency'] \
-                    or result.config.bist_length != 1 and metric in ['read_latency', 'write_latency']:
-                continue
-            value = getattr(result, metric)()
-            yield value, result.config
+        # replace None with NaN
+        df.fillna(value=np.nan, inplace=True)
 
-    def print(self):
-        legend = '(module, datawidth, length, random, result)'
-        fmt = '   {module:15}  {dwidth:2}  {length:4}  {random:1}    {result}'
+        # compute other metrics based on ticks and configuration parameters
+        df['clk_period'] = 1 / df['clk_freq']
+        df['write_bandwidth'] = (8 * df['length']) / (df['generator_ticks'] * df['clk_period'])
+        df['read_bandwidth']  = (8 * df['length']) / (df['checker_ticks'] * df['clk_period'])
 
-        # store formatted lines per metric
-        metric_lines = defaultdict(list)
-        for metric, (_, unit, formatter) in self.metric_formats.items():
-            for value, config in self.by_metric(metric):
-                mult, prefix = formatter(value)
-                value_fmt = '{:5.1f} {}{}' if isinstance(value * mult, float) else '{:5d} {}{}'
-                result = value_fmt.format(value * mult, prefix, unit)
-                line = fmt.format(module=config.sdram_module,
-                                  dwidth=config.sdram_data_width,
-                                  length=config.bist_length,
-                                  random=int(config.bist_random),
-                                  result=result)
-                metric_lines[metric].append(line)
+        df['cmd_count'] = df['length'] / (df['ctrl_data_width'] / 8)
+        df['write_efficiency'] = df['cmd_count'] / df['generator_ticks']
+        df['read_efficiency'] = df['cmd_count'] / df['checker_ticks']
 
-        # find length of the longest line
-        max_length = max((len(l) for lines in metric_lines.values() for l in lines))
-        max_length = max(max_length, len(legend) + 2)
-        width = max_length + 2
+        df['write_latency'] = df[df['bist_length'] == 1]['generator_ticks']
+        df['read_latency'] = df[df['bist_length'] == 1]['checker_ticks']
 
-        # print the formatted summary
-        def header(text):
-            mid = center(text, width - 6, '=')
-            return center(mid, width, '-')
-        print(header(' Summary '))
-        print(center(legend, width))
-        for metric, lines in metric_lines.items():
-            print(center(self.metric_formats[metric].name, width))
-            for line in lines:
-                print(line)
-        print(header(''))
+        # boolean distinction between latency benchmarks and sequence benchmarks,
+        # as thier results differ significanly
+        df['is_latency'] = ~pd.isna(df['write_latency'])
+        assert (df['is_latency'] == ~pd.isna(df['read_latency'])).all(), \
+            'write_latency and read_latency should both have a value or both be NaN'
 
-    def plot(self, output_dir, backend='Agg', theme='default', save_format='png', **savefig_kwargs):
-        """Create plots with benchmark results summary
+        # data formatting for text summary
+        self.text_formatters = {
+            'write_bandwidth':  bandwidth_fmt,
+            'read_bandwidth':   bandwidth_fmt,
+            'write_efficiency': efficiency_fmt,
+            'read_efficiency':  efficiency_fmt,
+            'write_latency':    clocks_fmt,
+            'read_latency':     clocks_fmt,
+        }
 
-        Default backend is Agg, which is non-GUI backed and only allows
-        to save figures as files. If a GUI backed is passed, plt.show()
-        will be called at the end.
-        """
-        # import locally here to be able to run benchmarks without installing matplotlib
-        import matplotlib
-        matplotlib.use(backend)
-
-        import matplotlib.pyplot as plt
-        import numpy as np
-        from matplotlib.ticker import FuncFormatter, PercentFormatter, ScalarFormatter
-
-        plt.style.use(theme)
-
-        def bandwidth_formatter_func(value, pos):
-            mult, prefix = human_readable(value)
-            return '{:.1f}{}bps'.format(value * mult, prefix)
-
-        tick_formatters = {
-            'write_bandwidth':  FuncFormatter(bandwidth_formatter_func),
-            'read_bandwidth':   FuncFormatter(bandwidth_formatter_func),
+        # data formatting for plot summary
+        self.plot_xticks_formatters = {
+            'write_bandwidth':  FuncFormatter(lambda value, pos: bandwidth_fmt(value)),
+            'read_bandwidth':   FuncFormatter(lambda value, pos: bandwidth_fmt(value)),
             'write_efficiency': PercentFormatter(1.0),
             'read_efficiency':  PercentFormatter(1.0),
             'write_latency':    ScalarFormatter(),
             'read_latency':     ScalarFormatter(),
         }
 
-        def config_tick_name(config):
-            return '{}\n{}, {}, {}'.format(config.sdram_module, config.sdram_data_width,
-                                         config.bist_length, int(config.bist_random))
+    def print_df(self, title, df):
+        # make sure all data will be shown
+        with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
+            print('===> {}:'.format(title))
+            print(df)
 
-        for metric, (name, unit, _) in self.metric_formats.items():
-            fig = plt.figure()
-            axis = plt.gca()
+    def get_summary(self, mask=None, columns=None, column_formatting=None, sort_kwargs=None):
+        # work on a copy
+        df = self.df.copy()
 
-            values, configs = zip(*self.by_metric(metric))
-            ticks = np.arange(len(configs))
+        if sort_kwargs is not None:
+            df = df.sort_values(**sort_kwargs)
 
-            axis.barh(ticks, values, align='center')
-            axis.set_yticks(ticks)
-            axis.set_yticklabels([config_tick_name(c) for c in configs])
-            axis.invert_yaxis()
-            axis.xaxis.set_major_formatter(tick_formatters[metric])
-            axis.xaxis.set_tick_params(rotation=30)
-            axis.grid(True)
-            axis.spines['top'].set_visible(False)
-            axis.spines['right'].set_visible(False)
-            axis.set_axisbelow(True)
+        if column_formatting is not None:
+            for column, mapping in column_formatting.items():
+                old = '_{}'.format(column)
+                df[old] = df[column].copy()
+                df[column] = df[column].map(lambda value: mapping(value) if not pd.isna(value) else value)
 
-            # force xmax to 100%
-            if metric in ['write_efficiency', 'read_efficiency']:
-                axis.set_xlim(right=1.0)
+        df = df[mask] if mask is not None else df
+        df = df[columns] if columns is not None else df
 
-            title = self.metric_formats[metric].name
-            axis.set_title(title, fontsize=12)
+        return df
 
-            plt.tight_layout()
-            filename = '{}.{}'.format(metric, save_format)
-            fig.savefig(os.path.join(output_dir, filename), **savefig_kwargs)
+    def text_summary(self):
+        for title, df in self.groupped_results():
+            self.print_df(title, df)
+            print()
+
+    def groupped_results(self, formatted=True):
+        df = self.df
+
+        formatters = self.text_formatters if formatted else {}
+
+        common_columns = ['name', 'sdram_module', 'sdram_data_width']
+        latency_columns = ['write_latency', 'read_latency']
+        performance_columns = ['write_bandwidth', 'read_bandwidth', 'write_efficiency', 'read_efficiency']
+
+        yield 'Latency', self.get_summary(
+            mask=df['is_latency'] == True,
+            columns=common_columns + latency_columns,
+            column_formatting=formatters,
+        )
+        #  yield 'Any access pattern', self.get_summary(
+        #      mask=(df['is_latency'] == False),
+        #      columns=common_columns + performance_columns + ['length', 'bist_random', 'pattern_file'],
+        #      column_formatting=self.text_formatters,
+            #  **kwargs,
+        #  ),
+        yield 'Custom access pattern', self.get_summary(
+            mask=(df['is_latency'] == False) & (~pd.isna(df['pattern_file'])),
+            columns=common_columns + performance_columns + ['length', 'pattern_file'],
+            column_formatting=formatters,
+        ),
+        yield 'Sequential access pattern', self.get_summary(
+            mask=(df['is_latency'] == False) & (pd.isna(df['pattern_file'])) & (df['bist_random'] == False),
+            columns=common_columns + performance_columns + ['bist_length'], # could be length
+            column_formatting=formatters,
+        ),
+        yield 'Random access pattern', self.get_summary(
+            mask=(df['is_latency'] == False) & (pd.isna(df['pattern_file'])) & (df['bist_random'] == True),
+            columns=common_columns + performance_columns + ['bist_length'],
+            column_formatting=formatters,
+        ),
+
+    def plot_summary(self, plots_dir='plots', backend='Agg', theme='default', save_format='png', **savefig_kw):
+        matplotlib.use(backend)
+        import matplotlib.pyplot as plt
+        plt.style.use(theme)
+
+        for title, df in self.groupped_results(formatted=False):
+            for column in self.plot_xticks_formatters.keys():
+                if column not in df.columns or df[column].empty:
+                    continue
+                axis = self.plot_df(title, df, column)
+
+                # construct path
+                def path_name(name):
+                    return name.lower().replace(' ', '_')
+
+                filename = '{}.{}'.format(path_name(column), save_format)
+                path = os.path.join(plots_dir, path_name(title), filename)
+                os.makedirs(os.path.dirname(path), exist_ok=True)
+
+                # save figure
+                axis.get_figure().savefig(path, **savefig_kw)
 
         if backend != 'Agg':
             plt.show()
 
+    def plot_df(self, title, df, column, save_format='png', save_filename=None):
+        if save_filename is None:
+            save_filename = os.path.join(self.plots_dir, title.lower().replace(' ', '_'))
+
+        axis = df.plot(kind='barh', x='name', y=column, title=title, grid=True, legend=False)
+        if column in self.plot_xticks_formatters:
+            axis.xaxis.set_major_formatter(self.plot_xticks_formatters[column])
+            axis.xaxis.set_tick_params(rotation=15)
+        axis.spines['top'].set_visible(False)
+        axis.spines['right'].set_visible(False)
+        axis.set_axisbelow(True)
+
+        #  # force xmax to 100%
+        #  if column in ['write_efficiency', 'read_efficiency']:
+        #      axis.set_xlim(right=1.0)
+
+        return axis
+
 # Run ----------------------------------------------------------------------------------------------
 
-def run_benchmark(cmd_args):
-    # run as separate process, because else we cannot capture all output from verilator
-    benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py')
-    command = ['python3', benchmark_script, *cmd_args]
-    proc = subprocess.run(command, stdout=subprocess.PIPE)
+class RunCache(list):
+    RunData = namedtuple('RunData', ['config', 'result'])
+
+    def dump_json(self, filename):
+        json_data = [{'config': data.config.as_dict(), 'output': data.result._output} for data in self]
+        with open(filename, 'w') as f:
+            json.dump(json_data, f)
+
+    @classmethod
+    def load_json(cls, filename):
+        with open(filename, 'r') as f:
+            json_data = json.load(f)
+        loaded = []
+        for data in json_data:
+            config = BenchmarkConfiguration.from_dict(data['config'])
+            result = BenchmarkResult(data['output'])
+            loaded.append(cls.RunData(config=config, result=result))
+        return loaded
+
+
+def run_python(script, args):
+    command = ['python3', script, *args]
+    proc = subprocess.run(command, stdout=subprocess.PIPE, cwd=os.path.dirname(script))
     return str(proc.stdout)
 
 
-def run_benchmarks(configurations):
-    results = []
-    for name, config in configurations.items():
-        cmd_args = config.as_args()
-        print('{}: {}'.format(name, ' '.join(cmd_args)))
-        output = run_benchmark(cmd_args)
-        # exit if checker had any read error
-        result = BenchmarkResult(config, output)
-        if result.checker_errors != 0:
-            print('Error during benchmark "{}": checker_errors = {}'.format(
-                name, result.checker_errors), file=sys.stderr)
-            sys.exit(1)
-        results.append(result)
-    return results
+def run_benchmark(config):
+    benchmark_script = os.path.join(os.path.dirname(__file__), 'benchmark.py')
+    # run as separate process, because else we cannot capture all output from verilator
+    output = run_python(benchmark_script, config.as_args())
+    result = BenchmarkResult(output)
+    # exit if checker had any read error
+    if result.checker_errors != 0:
+        raise RuntimeError('Error during benchmark: checker_errors = {}, args = {}'.format(
+            result.checker_errors, args
+        ))
+    return result
 
 
 def main(argv=None):
@@ -363,6 +436,7 @@ def main(argv=None):
     parser.add_argument('--plot-transparent', action='store_true', help='Use transparent background when saving plots')
     parser.add_argument('--plot-output-dir',  default='plots',     help='Specify where to save the plots')
     parser.add_argument('--plot-theme',       default='default',   help='Use different matplotlib theme')
+    parser.add_argument('--ignore-failures',  action='store_true', help='Ignore failuers during benchmarking, continue using successful runs only')
     parser.add_argument('--results-cache',                         help="""Use given JSON file as results cache. If the file exists,
                                                                            it will be loaded instead of running actual benchmarks,
                                                                            else benchmarks will be run normally, and then saved
@@ -370,6 +444,11 @@ def main(argv=None):
                                                                            to generate different summary without having to rerun benchmarks.""")
     args = parser.parse_args(argv)
 
+    if not args.results_cache and not _summary:
+        print('Summary not available and not running with --results-cache - run would not produce any results! Aborting.',
+              file=sys.stderr)
+        sys.exit(1)
+
     # load and filter configurations
     configurations = BenchmarkConfiguration.load_yaml(args.config)
     filters = {
@@ -382,31 +461,41 @@ def main(argv=None):
             configurations = filter(f, configurations)
     configurations = list(configurations)
 
-    cache_exists = args.results_cache and os.path.isfile(args.results_cache)
-
     # load outputs from cache if it exsits
+    cache_exists = args.results_cache and os.path.isfile(args.results_cache)
     if args.results_cache and cache_exists:
-        cached_results = BenchmarkResult.load_results_json(args.results_cache)
+        cache = RunCache.load_json(args.results_cache)
+
         # take only those that match configurations
-        results = [r for r in cached_results if r.config in configurations.values()]
+        names_to_load = [c.name for c in configurations]
+        run_data = [data for  data in cache if data.config.name in names_to_load]
     else:  # run all the benchmarks normally
-        results = run_benchmarks(configurations)
+        run_data = []
+        for config in configurations:
+            print('  {}: {}'.format(config.name, ' '.join(config.as_args())))
+            try:
+                run_data.append(RunCache.RunData(config, run_benchmark(config)))
+            except:
+                if not args.ignore_failures:
+                    raise
 
     # store outputs in cache
     if args.results_cache and not cache_exists:
-        BenchmarkResult.dump_results_json(results, args.results_cache)
+        cache = RunCache(run_data)
+        cache.dump_json(args.results_cache)
 
-    # display the summary
-    summary = ResultsSummary(results)
-    summary.print()
-    if args.plot:
-        if not os.path.isdir(args.plot_output_dir):
-            os.makedirs(args.plot_output_dir)
-        summary.plot(args.plot_output_dir,
-                     backend=args.plot_backend,
-                     theme=args.plot_theme,
-                     save_format=args.plot_format,
-                     transparent=args.plot_transparent)
+    # display summary
+    if _summary:
+        summary = ResultsSummary(run_data)
+        summary.text_summary()
+        if args.plot:
+            summary.plot_summary(
+                plots_dir=args.plot_output_dir,
+                backend=args.plot_backend,
+                theme=args.plot_theme,
+                save_format=args.plot_format,
+                transparent=args.plot_transparent,
+            )
 
 
 if __name__ == "__main__":