From 9a3ceba5c15fa232f73468c6f471923c2a390b0b Mon Sep 17 00:00:00 2001 From: Krzysztof Boronski Date: Thu, 26 May 2022 13:32:11 -0500 Subject: [PATCH 1/3] Fix symbicache Signed-off-by: Krzysztof Boronski --- f4pga/__init__.py | 8 +++----- f4pga/cache.py | 31 +++++++++++++++++-------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/f4pga/__init__.py b/f4pga/__init__.py index 530df05..ae795a6 100755 --- a/f4pga/__init__.py +++ b/f4pga/__init__.py @@ -82,7 +82,7 @@ def req_exists(r): """ Checks whether a dependency exists on a drive. """ if type(r) is str: - if not Path(r).is_file() and not Path(r).is_symlink() and not Path(r).is_dir(): + if not Path(r).exists(): return False elif type(r) is list: return not (False in map(req_exists, r)) @@ -146,7 +146,7 @@ def prepare_stage_input(stage: Stage, values: dict, dep_paths: 'dict[str, ]', def update_dep_statuses(paths, consumer: str, symbicache: SymbiCache): if type(paths) is str: - return symbicache.update(paths, consumer) + return symbicache.update(Path(paths), consumer) elif type(paths) is list: for p in paths: return update_dep_statuses(p, consumer, symbicache) @@ -163,8 +163,6 @@ def dep_differ(paths, consumer: str, symbicache: SymbiCache): if type(paths) is str: s = symbicache.get_status(paths, consumer) - if s == 'untracked': - symbicache.update(paths, consumer) return symbicache.get_status(paths, consumer) != 'same' elif type(paths) is list: return True in [dep_differ(p, consumer, symbicache) for p in paths] @@ -361,7 +359,7 @@ class Flow: assert (p_dep.spec != 'req') continue - if self.symbicache: + if self.symbicache is not None: any_dep_differ |= \ update_dep_statuses(self.dep_paths[p_dep.name], provider.name, self.symbicache) diff --git a/f4pga/cache.py b/f4pga/cache.py index 78e4a7b..75a5d41 100755 --- a/f4pga/cache.py +++ b/f4pga/cache.py @@ -2,6 +2,7 @@ from pathlib import Path from zlib import adler32 as zlib_adler32 from json import dump as json_dump, load as json_load, JSONDecodeError +from f4pga.common import sfprint class SymbiCache: """ @@ -42,7 +43,7 @@ class SymbiCache: self.status[path] = {} self.status[path][consumer] = status - def update(self, path: str, consumer: str): + def update(self, path: Path, consumer: str): """ Add/remove a file to.from the tracked files, update checksum if necessary and calculate status. Multiple hashes are stored per file, one for each consumer module. @@ -50,23 +51,25 @@ class SymbiCache: by a module within the active flow. """ - isdir = Path(path).is_dir() - if not (Path(path).is_file() or Path(path).is_symlink() or isdir): - self._try_pop_consumer(path, consumer) + exists = path.exists() + + isdir = path.is_dir() + if not exists: + self._try_pop_consumer(path.as_posix(), consumer) return True hash = 0 # Directories always get '0' hash. - if not isdir: - with Path(path).open('rb') as rfptr: + if (not isdir) and exists: + with path.open('rb') as rfptr: hash = str(zlib_adler32(rfptr.read())) - last_hashes = self.hashes.get(path) + last_hashes = self.hashes.get(path.as_posix()) last_hash = None if last_hashes is None else last_hashes.get(consumer) if hash != last_hash: - self._try_push_consumer_status(path, consumer, 'changed') - self._try_push_consumer_hash(path, consumer, hash) + self._try_push_consumer_status(path.as_posix(), consumer, 'changed') + self._try_push_consumer_hash(path.as_posix(), consumer, hash) return True - self._try_push_consumer_status(path, consumer, 'same') + self._try_push_consumer_status(path.as_posix(), consumer, 'same') return False def get_status(self, path: str, consumer: str): @@ -89,12 +92,12 @@ class SymbiCache: with Path(self.cachefile_path).open('r') as rfptr: self.hashes = json_load(rfptr) except JSONDecodeError as jerr: - print("""WARNING: .symbicache is corrupted! -This will cause flow to re-execute from the beggining.""") + sfprint(0, 'WARNING: .symbicache is corrupted!\n' + 'This will cause flow to re-execute from the beginning.') self.hashes = {} except FileNotFoundError: - print("""Couldn\'t open .symbicache cache file. -This will cause flow to re-execute from the beggining.""") + sfprint(0, 'Couldn\'t open .symbicache cache file.\n' + 'This will cause flow to re-execute from the beginning.') self.hashes = {} def save(self): From 47733138a34a5f123cdfcaee55ccbb62ad18c5cc Mon Sep 17 00:00:00 2001 From: Krzysztof Boronski Date: Fri, 27 May 2022 10:15:12 -0500 Subject: [PATCH 2/3] Rename symbicache to f4cache Signed-off-by: Krzysztof Boronski --- docs/f4pga/DevNotes.md | 4 +-- docs/f4pga/Usage.md | 8 +++--- f4pga/__init__.py | 59 +++++++++++++++++++++--------------------- f4pga/cache.py | 14 +++++----- 4 files changed, 42 insertions(+), 43 deletions(-) diff --git a/docs/f4pga/DevNotes.md b/docs/f4pga/DevNotes.md index de2ea74..982a928 100644 --- a/docs/f4pga/DevNotes.md +++ b/docs/f4pga/DevNotes.md @@ -58,8 +58,8 @@ particular consumer. This is necessary, because the system tries to avoid rebuil when possible and status of each file (modified/unmodified) may differ in regards to individual stages. -Keeping track of status of each file is done using `SymbiCache` class, which is -defined in `sf_cache.py` file. `SymbiCache` is used mostly inside `Flow`'s methods. +Keeping track of status of each file is done using `F4Cache` class, which is +defined in `cache.py` file. `F4Cache` is used mostly inside `Flow`'s methods. ### Internal environmental variable system diff --git a/docs/f4pga/Usage.md b/docs/f4pga/Usage.md index e4affd2..75b7a8e 100644 --- a/docs/f4pga/Usage.md +++ b/docs/f4pga/Usage.md @@ -71,21 +71,21 @@ file or derived from the paths of the dependencies taken by the module. A *flow* is set of *modules* executed in a right order to produce a *target*. -### .symbicache +### .f4cache All *dependencies* are tracked by a modification tracking system which stores hashes of the files -(directories get always `'0'` hash) in `.symbicache` file in the root of the project. +(directories get always `'0'` hash) in `.f4cache` file in the root of the project. When F4PGA constructs a *flow*, it will try to omit execution of modules which would receive the same data on their input. There is a strong _assumption_ there that a *module*'s output remains unchanged if the input configuration isn't changed, ie. *modules* are deterministic. This is might be not true for some tools and in case you really want to re-run -a stage, there's a `--nocache` option that treats the `.symbicache` file as if it was empty. +a stage, there's a `--nocache` option that treats the `.f4cache` file as if it was empty. ### Resolution A *dependency* is said to be *resolved* if it meets one of the following criteria: -* it exists on persistent storage and its hash matches the one stored in .symbicache +* it exists on persistent storage and its hash matches the one stored in .f4cache * there exists such *flow* that all of the dependencies of its modules are *resolved* and it produces the *dependency* in question. diff --git a/f4pga/__init__.py b/f4pga/__init__.py index ae795a6..4e12d5f 100755 --- a/f4pga/__init__.py +++ b/f4pga/__init__.py @@ -35,7 +35,7 @@ from f4pga.common import ( sub as common_sub ) from f4pga.module import * -from f4pga.cache import SymbiCache +from f4pga.cache import F4Cache from f4pga.flow_config import ( ProjectFlowConfig, FlowConfig, @@ -48,7 +48,7 @@ from f4pga.module_inspector import get_module_info from f4pga.stage import Stage from f4pga.argparser import setup_argparser, get_cli_flow_config -SYMBICACHEPATH = '.symbicache' +F4CACHEPATH = '.f4cache' binpath = str(Path(sys_argv[0]).resolve().parent.parent) mypath = str(Path(__file__).resolve().parent) @@ -109,9 +109,9 @@ def map_outputs_to_stages(stages: 'list[Stage]'): 'provider at most.') return os_map -def filter_existing_deps(deps: 'dict[str, ]', symbicache): +def filter_existing_deps(deps: 'dict[str, ]', f4cache): return [(n, p) for n, p in deps.items() \ - if req_exists(p)] # and not dep_differ(p, symbicache)] + if req_exists(p)] # and not dep_differ(p, f4cache)] def get_stage_values_override(og_values: dict, stage: Stage): values = og_values.copy() @@ -144,35 +144,34 @@ def prepare_stage_input(stage: Stage, values: dict, dep_paths: 'dict[str, ]', return stage_mod_cfg -def update_dep_statuses(paths, consumer: str, symbicache: SymbiCache): +def update_dep_statuses(paths, consumer: str, f4cache: F4Cache): if type(paths) is str: - return symbicache.update(Path(paths), consumer) + return f4cache.update(Path(paths), consumer) elif type(paths) is list: for p in paths: - return update_dep_statuses(p, consumer, symbicache) + return update_dep_statuses(p, consumer, f4cache) elif type(paths) is dict: for _, p in paths.items(): - return update_dep_statuses(p, consumer, symbicache) + return update_dep_statuses(p, consumer, f4cache) fatal(-1, 'WRONG PATHS TYPE') -def dep_differ(paths, consumer: str, symbicache: SymbiCache): +def dep_differ(paths, consumer: str, f4cache: F4Cache): """ Check if a dependency differs from its last version, lack of dependency is treated as "differs" """ if type(paths) is str: - s = symbicache.get_status(paths, consumer) - return symbicache.get_status(paths, consumer) != 'same' + return f4cache.get_status(paths, consumer) != 'same' elif type(paths) is list: - return True in [dep_differ(p, consumer, symbicache) for p in paths] + return True in [dep_differ(p, consumer, f4cache) for p in paths] elif type(paths) is dict: - return True in [dep_differ(p, consumer, symbicache) \ + return True in [dep_differ(p, consumer, f4cache) \ for _, p in paths.items()] return False def dep_will_differ(target: str, paths, consumer: str, os_map: 'dict[str, Stage]', run_stages: 'set[str]', - symbicache: SymbiCache): + f4cache: F4Cache): """ Check if a dependency or any of the dependencies it depends on differ from their last versions. @@ -181,8 +180,8 @@ def dep_will_differ(target: str, paths, consumer: str, provider = os_map.get(target) if provider: return (provider.name in run_stages) or \ - dep_differ(paths, consumer, symbicache) - return dep_differ(paths, consumer, symbicache) + dep_differ(paths, consumer, f4cache) + return dep_differ(paths, consumer, f4cache) def _print_unreachable_stage_message(provider: Stage, take: str): sfprint(0, ' Stage ' @@ -214,31 +213,31 @@ class Flow: run_stages: 'set[str]' # Number of stages that relied on outdated version of a (checked) dependency deps_rebuilds: 'dict[str, int]' - symbicache: 'SymbiCache | None' + f4cache: 'F4Cache | None' flow_cfg: FlowConfig def __init__(self, target: str, cfg: FlowConfig, - symbicache: 'SymbiCache | None'): + f4cache: 'F4Cache | None'): self.target = target self.os_map = map_outputs_to_stages(cfg.stages.values()) explicit_deps = cfg.get_dependency_overrides() # print(explicit_deps) - self.dep_paths = dict(filter_existing_deps(explicit_deps, symbicache)) + self.dep_paths = dict(filter_existing_deps(explicit_deps, f4cache)) self.run_stages = set() - self.symbicache = symbicache + self.f4cache = f4cache self.cfg = cfg self.deps_rebuilds = {} self._resolve_dependencies(self.target, set()) def _dep_will_differ(self, dep: str, paths, consumer: str): - if not self.symbicache: # Handle --nocache mode + if not self.f4cache: # Handle --nocache mode return True return dep_will_differ(dep, paths, consumer, self.os_map, self.run_stages, - self.symbicache) + self.f4cache) def _resolve_dependencies(self, dep: str, stages_checked: 'set[str]'): # Initialize the dependency status if necessary @@ -353,16 +352,16 @@ class Flow: else: assert(provider) - any_dep_differ = False if (self.symbicache is not None) else True + any_dep_differ = False if (self.f4cache is not None) else True for p_dep in provider.takes: if not self._build_dep(p_dep.name): assert (p_dep.spec != 'req') continue - if self.symbicache is not None: + if self.f4cache is not None: any_dep_differ |= \ update_dep_statuses(self.dep_paths[p_dep.name], - provider.name, self.symbicache) + provider.name, self.f4cache) # If dependencies remained the same, consider the dep as up-to date # For example, when changing a comment in Verilog source code, @@ -390,9 +389,9 @@ class Flow: def execute(self): self._build_dep(self.target) - if self.symbicache: + if self.f4cache: update_dep_statuses(self.dep_paths[self.target], '__target', - self.symbicache) + self.f4cache) sfprint(0, f'Target `{Style.BRIGHT + self.target + Style.RESET_ALL}` ' f'-> {self.dep_paths[self.target]}') @@ -583,7 +582,7 @@ def cmd_build(args: Namespace): flow = Flow( target=target, cfg=flow_cfg, - symbicache=SymbiCache(SYMBICACHEPATH) if not args.nocache else None + f4cache=F4Cache(F4CACHEPATH) if not args.nocache else None ) dep_print_verbosity = 0 if args.pretend else 2 @@ -600,8 +599,8 @@ def cmd_build(args: Namespace): sfprint(0, e) sfbuild_fail() - if flow.symbicache: - flow.symbicache.save() + if flow.f4cache: + flow.f4cache.save() def cmd_show_dependencies(args: Namespace): """ sfbuild's `showd` command implementation """ diff --git a/f4pga/cache.py b/f4pga/cache.py index 75a5d41..5b60e17 100755 --- a/f4pga/cache.py +++ b/f4pga/cache.py @@ -4,9 +4,9 @@ from json import dump as json_dump, load as json_load, JSONDecodeError from f4pga.common import sfprint -class SymbiCache: +class F4Cache: """ - `SymbiCache` is used to track changes among dependencies and keep the status of the files on a persistent storage. + `F4Cache` is used to track changes among dependencies and keep the status of the files on a persistent storage. Files which are tracked get their checksums calculated and stored in a file. If file's checksum differs from the one saved in a file, that means, the file has changed. """ @@ -91,13 +91,13 @@ class SymbiCache: try: with Path(self.cachefile_path).open('r') as rfptr: self.hashes = json_load(rfptr) - except JSONDecodeError as jerr: - sfprint(0, 'WARNING: .symbicache is corrupted!\n' - 'This will cause flow to re-execute from the beginning.') + except JSONDecodeError: + sfprint(0, f'WARNING: `{self.cachefile_path}` f4cache is corrupted!\n' + 'This will cause flow to re-execute from the beginning.') self.hashes = {} except FileNotFoundError: - sfprint(0, 'Couldn\'t open .symbicache cache file.\n' - 'This will cause flow to re-execute from the beginning.') + sfprint(0, f'Couldn\'t open `{self.cachefile_path}` cache file.\n' + 'This will cause flow to re-execute from the beginning.') self.hashes = {} def save(self): From 607e303e93b4c59e3c4db7876e4e5cf0cd64a7d8 Mon Sep 17 00:00:00 2001 From: Krzysztof Boronski Date: Fri, 27 May 2022 13:36:41 -0500 Subject: [PATCH 3/3] f4cache: Handle status checks without updates. Fix and improve project status reporting Signed-off-by: Krzysztof Boronski --- f4pga/__init__.py | 58 +++++++++++++++++++++++++++++++++++++---------- f4pga/cache.py | 49 +++++++++++++++++++++++++++------------ f4pga/common.py | 8 ++++--- 3 files changed, 86 insertions(+), 29 deletions(-) diff --git a/f4pga/__init__.py b/f4pga/__init__.py index 4e12d5f..acf8fd7 100755 --- a/f4pga/__init__.py +++ b/f4pga/__init__.py @@ -162,6 +162,8 @@ def dep_differ(paths, consumer: str, f4cache: F4Cache): """ if type(paths) is str: + if not Path(paths).exists(): + return True return f4cache.get_status(paths, consumer) != 'same' elif type(paths) is list: return True in [dep_differ(p, consumer, f4cache) for p in paths] @@ -196,6 +198,10 @@ def config_mod_runctx(stage: Stage, values: 'dict[str, ]', dep_paths, config_paths) return ModRunCtx(share_dir_path, binpath, config) +def _process_dep_path(path: str, f4cache: F4Cache): + f4cache.process_file(Path(path)) +_cache_deps = deep(_process_dep_path) + class Flow: """ Describes a complete, configured flow, ready for execution. """ @@ -222,9 +228,11 @@ class Flow: self.os_map = map_outputs_to_stages(cfg.stages.values()) explicit_deps = cfg.get_dependency_overrides() - # print(explicit_deps) - self.dep_paths = dict(filter_existing_deps(explicit_deps, f4cache)) + if f4cache is not None: + for dep in self.dep_paths.values(): + _cache_deps(dep, f4cache) + self.run_stages = set() self.f4cache = f4cache self.cfg = cfg @@ -239,7 +247,11 @@ class Flow: self.os_map, self.run_stages, self.f4cache) - def _resolve_dependencies(self, dep: str, stages_checked: 'set[str]'): + def _resolve_dependencies(self, dep: str, stages_checked: 'set[str]', + skip_dep_warnings: 'set[str]' = None): + if skip_dep_warnings is None: + skip_dep_warnings = set() + # Initialize the dependency status if necessary if self.deps_rebuilds.get(dep) is None: self.deps_rebuilds[dep] = 0 @@ -256,7 +268,7 @@ class Flow: # config if it is. for take in provider.takes: - self._resolve_dependencies(take.name, stages_checked) + self._resolve_dependencies(take.name, stages_checked, skip_dep_warnings) # If any of the required dependencies is unavailable, then the # provider stage cannot be run take_paths = self.dep_paths.get(take.name) @@ -266,9 +278,22 @@ class Flow: if not take_paths and take.spec == 'req': _print_unreachable_stage_message(provider, take) return + + will_differ = False + if take_paths is None: + # TODO: This won't trigger rebuild if an optional dependency got removed + will_differ = False + elif req_exists(take_paths): + will_differ = self._dep_will_differ(take.name, take_paths, provider.name) + else: + will_differ = True - if self._dep_will_differ(take.name, take_paths, provider.name): - sfprint(2, f'{take.name} is causing rebuild for {provider.name}') + + if will_differ: + if take.name not in skip_dep_warnings: + sfprint(2, f'{Style.BRIGHT}{take.name}{Style.RESET_ALL} is causing ' + f'rebuild for `{Style.BRIGHT}{provider.name}{Style.RESET_ALL}`') + skip_dep_warnings.add(take.name) self.run_stages.add(provider.name) self.deps_rebuilds[take.name] += 1 @@ -277,6 +302,9 @@ class Flow: self.cfg.get_dependency_overrides()) outputs = module_map(provider.module, modrunctx) + for output_paths in outputs.values(): + if req_exists(output_paths) and self.f4cache: + _cache_deps(output_paths, self.f4cache) stages_checked.add(provider.name) self.dep_paths.update(outputs) @@ -287,7 +315,6 @@ class Flow: # Verify module's outputs and add paths as values. outs = outputs.keys() - # print(outs) for o in provider.produces: if o.name not in outs: if o.spec == 'req' or (o.spec == 'demand' and \ @@ -350,7 +377,7 @@ class Flow: if req_exists(paths) and not run: return True else: - assert(provider) + assert provider any_dep_differ = False if (self.f4cache is not None) else True for p_dep in provider.takes: @@ -382,17 +409,22 @@ class Flow: self.run_stages.discard(provider.name) - if not req_exists(paths): - raise DependencyNotProducedException(dep, provider.name) + for product in provider.produces: + exists = req_exists(paths) + if (product.spec == 'req') and not exists: + raise DependencyNotProducedException(dep, provider.name) + if exists and self.f4cache: + _cache_deps(self.dep_paths[product.name], self.f4cache) return True def execute(self): self._build_dep(self.target) if self.f4cache: + _cache_deps(self.dep_paths[self.target], self.f4cache) update_dep_statuses(self.dep_paths[self.target], '__target', self.f4cache) - sfprint(0, f'Target `{Style.BRIGHT + self.target + Style.RESET_ALL}` ' + sfprint(0, f'Target {Style.BRIGHT + self.target + Style.RESET_ALL} ' f'-> {self.dep_paths[self.target]}') def display_dep_info(stages: 'Iterable[Stage]'): @@ -595,8 +627,10 @@ def cmd_build(args: Namespace): try: flow.execute() + except AssertionError as e: + raise e except Exception as e: - sfprint(0, e) + sfprint(0, f'{e}') sfbuild_fail() if flow.f4cache: diff --git a/f4pga/cache.py b/f4pga/cache.py index 5b60e17..498ea11 100755 --- a/f4pga/cache.py +++ b/f4pga/cache.py @@ -4,6 +4,12 @@ from json import dump as json_dump, load as json_load, JSONDecodeError from f4pga.common import sfprint +def _get_hash(path: Path): + if not path.is_dir(): + with path.open('rb') as rfptr: + return zlib_adler32(rfptr.read()) + return 0 # Directories always get '0' hash. + class F4Cache: """ `F4Cache` is used to track changes among dependencies and keep the status of the files on a persistent storage. @@ -12,6 +18,7 @@ class F4Cache: """ hashes: 'dict[str, dict[str, str]]' + current_hashes: 'dict[str, str]' status: 'dict[str, str]' cachefile_path: str @@ -21,6 +28,7 @@ class F4Cache: """ self.status = {} + self.current_hashes = {} self.cachefile_path = cachefile_path self.load() @@ -42,6 +50,12 @@ class F4Cache: if not self.status.get(path): self.status[path] = {} self.status[path][consumer] = status + + def process_file(self, path: Path): + """ Process file for tracking with f4cache. """ + + hash = _get_hash(path) + self.current_hashes[path.as_posix()] = hash def update(self, path: Path, consumer: str): """ Add/remove a file to.from the tracked files, update checksum if necessary and calculate status. @@ -51,34 +65,41 @@ class F4Cache: by a module within the active flow. """ - exists = path.exists() + posix_path = path.as_posix() - isdir = path.is_dir() - if not exists: - self._try_pop_consumer(path.as_posix(), consumer) + assert self.current_hashes.get(posix_path) is not None + + if not path.exists(): + self._try_pop_consumer(posix_path, consumer) return True - hash = 0 # Directories always get '0' hash. - if (not isdir) and exists: - with path.open('rb') as rfptr: - hash = str(zlib_adler32(rfptr.read())) - last_hashes = self.hashes.get(path.as_posix()) + hash = self.current_hashes[posix_path] + last_hashes = self.hashes.get(posix_path) last_hash = None if last_hashes is None else last_hashes.get(consumer) if hash != last_hash: - self._try_push_consumer_status(path.as_posix(), consumer, 'changed') - self._try_push_consumer_hash(path.as_posix(), consumer, hash) + self._try_push_consumer_status(posix_path, consumer, 'changed') + self._try_push_consumer_hash(posix_path, consumer, hash) return True - self._try_push_consumer_status(path.as_posix(), consumer, 'same') + self._try_push_consumer_status(posix_path, consumer, 'same') return False def get_status(self, path: str, consumer: str): """ Get status for a file with a given path. - returns 'untracked' if the file is not tracked or hasn't been treated with `update` procedure before calling - `get_status`. + returns 'untracked' if the file is not tracked. """ + + assert self.current_hashes.get(path) is not None + statuses = self.status.get(path) if not statuses: + hashes = self.hashes.get(path) + if hashes is not None: + last_hash = hashes.get(consumer) + if last_hash is not None: + if self.current_hashes[path] != last_hash: + return 'changed' + return 'same' return 'untracked' status = statuses.get(consumer) if not status: diff --git a/f4pga/common.py b/f4pga/common.py index 4510026..b4a5506 100644 --- a/f4pga/common.py +++ b/f4pga/common.py @@ -71,11 +71,13 @@ def deep(fun): """ def d(paths, *args, **kwargs): if type(paths) is str: - return fun(paths) + return fun(paths, *args, **kwargs) elif type(paths) is list: - return [d(p) for p in paths]; + return [d(p, *args, **kwargs) for p in paths]; elif type(paths) is dict: - return dict([(k, d(p)) for k, p in paths.items()]) + return dict([(k, d(p, *args, **kwargs)) for k, p in paths.items()]) + else: + raise RuntimeError(f'paths is of type {type(paths)}') return d