From 7ede8f2cd4c1ce79896f79afc1320d3284a64549 Mon Sep 17 00:00:00 2001 From: mathiasg Date: Mon, 18 Mar 2024 14:11:53 -0400 Subject: [PATCH 1/6] FIX: Avoid directory clobber during zip extraction --- templateflow/conf/_s3.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/templateflow/conf/_s3.py b/templateflow/conf/_s3.py index 57b8f7d1..0d7d300a 100644 --- a/templateflow/conf/_s3.py +++ b/templateflow/conf/_s3.py @@ -80,23 +80,28 @@ def _update_skeleton(skel_file, dest, overwrite=True, silent=False): dest = Path(dest) dest.mkdir(exist_ok=True, parents=True) with ZipFile(skel_file, 'r') as zipref: + allfiles = sorted(zipref.namelist()) + if overwrite: - zipref.extractall(str(dest)) - return True + newfiles = allfiles + else: + current_files = [s.relative_to(dest) for s in dest.glob('**/*')] + existing = sorted({'%s/' % s.parent for s in current_files}) + [ + str(s) for s in current_files + ] + newfiles = sorted(set(allfiles) - set(existing)) - allfiles = zipref.namelist() - current_files = [s.relative_to(dest) for s in dest.glob('**/*')] - existing = sorted({'%s/' % s.parent for s in current_files}) + [ - str(s) for s in current_files - ] - newfiles = sorted(set(allfiles) - set(existing)) if newfiles: if not silent: print( 'Updating TEMPLATEFLOW_HOME using S3. Adding:\n%s' % '\n'.join(newfiles) ) - zipref.extractall(str(dest), members=newfiles) + for fl in newfiles: + localpath = dest / fl + if localpath.is_dir(): # avoid extracting existing directories + continue + zipref.extract(fl, path=dest) return True if not silent: print('TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.') From d225254bbd1845ad8da30ff7f9e1fa7eac94a65b Mon Sep 17 00:00:00 2001 From: mathiasg Date: Mon, 18 Mar 2024 21:22:04 -0400 Subject: [PATCH 2/6] RF: Move autoupdate check to configuration module + Adds function to handle envvar parsing --- templateflow/__init__.py | 14 +------------- templateflow/conf/__init__.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/templateflow/__init__.py b/templateflow/__init__.py index 2bbe5e68..25b8a2c0 100644 --- a/templateflow/__init__.py +++ b/templateflow/__init__.py @@ -37,20 +37,8 @@ del version del PackageNotFoundError -import os - from . import api -from .conf import TF_USE_DATALAD, update - -if not TF_USE_DATALAD and os.getenv('TEMPLATEFLOW_AUTOUPDATE', '1') not in ( - 'false', - 'off', - '0', - 'no', - 'n', -): - # trigger skeleton autoupdate - update(local=True, overwrite=False, silent=True) +from .conf import update __all__ = [ '__copyright__', diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index a95e457e..2ea601f5 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -10,17 +10,35 @@ load_data = Loader(__package__) +def _env_to_bool(envvar: str, default: bool) -> bool: + """Check for environment variable switches and convert to booleans.""" + switches = { + 'on': {'true', 'on', '1', 'yes', 'y'}, + 'off': {'false', 'off', '0', 'no', 'n'}, + } + + val = getenv(envvar, default) + if isinstance(val, str): + if val.lower() in switches['on']: + return True + elif val.lower() in switches['off']: + return False + else: + # TODO: Create templateflow logger + print( + f'{envvar} is set to unknown value <{val}>. ' + f'Falling back to default value <{default}>' + ) + return default + return val + + TF_DEFAULT_HOME = Path.home() / '.cache' / 'templateflow' TF_HOME = Path(getenv('TEMPLATEFLOW_HOME', str(TF_DEFAULT_HOME))) TF_GITHUB_SOURCE = 'https://github.com/templateflow/templateflow.git' TF_S3_ROOT = 'https://templateflow.s3.amazonaws.com' -TF_USE_DATALAD = getenv('TEMPLATEFLOW_USE_DATALAD', 'false').lower() in ( - 'true', - 'on', - '1', - 'yes', - 'y', -) +TF_USE_DATALAD = _env_to_bool('TEMPLATEFLOW_USE_DATALAD', False) +TF_AUTOUPDATE = _env_to_bool('TEMPLATEFLOW_AUTOUPDATE', True) TF_CACHED = True TF_GET_TIMEOUT = 10 @@ -50,7 +68,7 @@ def _init_cache(): if not TF_USE_DATALAD: from ._s3 import update as _update_s3 - _update_s3(TF_HOME, local=True, overwrite=True) + _update_s3(TF_HOME, local=True, overwrite=TF_AUTOUPDATE, silent=True) _init_cache() From f9774ae6c894bab8bac653122e7033459f5ff20c Mon Sep 17 00:00:00 2001 From: mathiasg Date: Wed, 20 Mar 2024 11:30:09 -0400 Subject: [PATCH 3/6] ENH: Display autoupdate in `templateflow config` --- templateflow/cli.py | 3 ++- templateflow/conf/__init__.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/templateflow/cli.py b/templateflow/cli.py index 5e590a29..22b06f14 100644 --- a/templateflow/cli.py +++ b/templateflow/cli.py @@ -31,7 +31,7 @@ from templateflow import __package__, api from templateflow._loader import Loader as _Loader -from templateflow.conf import TF_HOME, TF_USE_DATALAD +from templateflow.conf import TF_HOME, TF_USE_DATALAD, TF_AUTOUPDATE load_data = _Loader(__package__) @@ -91,6 +91,7 @@ def config(): TEMPLATEFLOW_HOME={TF_HOME} TEMPLATEFLOW_USE_DATALAD={'on' if TF_USE_DATALAD else 'off'} + TEMPLATEFLOW_AUTOUPDATE={'on' if TF_AUTOUPDATE else 'off'} """) diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index 2ea601f5..e15bcd6f 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -10,6 +10,7 @@ load_data = Loader(__package__) + def _env_to_bool(envvar: str, default: bool) -> bool: """Check for environment variable switches and convert to booleans.""" switches = { From b3b0846106e1801c33777036eccfa18ab575f90f Mon Sep 17 00:00:00 2001 From: mathiasg Date: Wed, 3 Apr 2024 12:00:25 -0400 Subject: [PATCH 4/6] ENH: Safeguard zip extraction in try/except --- templateflow/conf/_s3.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/templateflow/conf/_s3.py b/templateflow/conf/_s3.py index 0d7d300a..d70c3cb3 100644 --- a/templateflow/conf/_s3.py +++ b/templateflow/conf/_s3.py @@ -99,9 +99,13 @@ def _update_skeleton(skel_file, dest, overwrite=True, silent=False): ) for fl in newfiles: localpath = dest / fl - if localpath.is_dir(): # avoid extracting existing directories + if localpath.exists(): continue - zipref.extract(fl, path=dest) + try: + zipref.extract(fl, path=dest) + except FileExistsError: + # If there is a conflict, do not clobber + pass return True if not silent: print('TEMPLATEFLOW_HOME directory (S3 type) was up-to-date.') From 4b39d29f879c16ebdfe90b290ba5ab447a94914a Mon Sep 17 00:00:00 2001 From: mathiasg Date: Thu, 4 Apr 2024 12:03:13 -0400 Subject: [PATCH 5/6] TST: Add test to simulate running `update()` simultaneously --- templateflow/tests/test_multiproc.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 templateflow/tests/test_multiproc.py diff --git a/templateflow/tests/test_multiproc.py b/templateflow/tests/test_multiproc.py new file mode 100644 index 00000000..0adbfddb --- /dev/null +++ b/templateflow/tests/test_multiproc.py @@ -0,0 +1,27 @@ +import os +from concurrent.futures import ProcessPoolExecutor + +import pytest + +CPUs = os.cpu_count() or 1 + + +def _update(): + from templateflow.conf import update + + update(local=False, overwrite=True, silent=True) + return True + + +@pytest.mark.skipif(CPUs < 2, reason='At least 2 CPUs are required') +def test_multi_proc_update(tmp_path, monkeypatch): + tf_home = tmp_path / 'tf_home' + monkeypatch.setenv('TEMPLATEFLOW_HOME', str(tf_home)) + + futs = [] + with ProcessPoolExecutor(max_workers=2) as executor: + for _ in range(2): + futs.append(executor.submit(_update)) + + for fut in futs: + assert fut.result() From a9f7f5bb729cc92c4e8ee22fadb224fac9ebe885 Mon Sep 17 00:00:00 2001 From: Mathias Goncalves Date: Thu, 4 Apr 2024 21:03:51 -0400 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: Oscar Esteban --- templateflow/__init__.py | 4 ++-- templateflow/conf/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/templateflow/__init__.py b/templateflow/__init__.py index 25b8a2c0..2d2a5ce5 100644 --- a/templateflow/__init__.py +++ b/templateflow/__init__.py @@ -37,8 +37,8 @@ del version del PackageNotFoundError -from . import api -from .conf import update +from templateflow import api +from templateflow.conf import update __all__ = [ '__copyright__', diff --git a/templateflow/conf/__init__.py b/templateflow/conf/__init__.py index e15bcd6f..a75bfd85 100644 --- a/templateflow/conf/__init__.py +++ b/templateflow/conf/__init__.py @@ -31,7 +31,7 @@ def _env_to_bool(envvar: str, default: bool) -> bool: f'Falling back to default value <{default}>' ) return default - return val + return bool(val) TF_DEFAULT_HOME = Path.home() / '.cache' / 'templateflow'