diff --git a/easybuild/framework/easyconfig/format/one.py b/easybuild/framework/easyconfig/format/one.py index acd1af6f6c..fe37fa3f7c 100644 --- a/easybuild/framework/easyconfig/format/one.py +++ b/easybuild/framework/easyconfig/format/one.py @@ -39,6 +39,7 @@ import tempfile from easybuild.base import fancylogger +from easybuild.os_hook import OSProxy from easybuild.framework.easyconfig.format.format import DEPENDENCY_PARAMETERS, EXCLUDED_KEYS_REPLACE_TEMPLATES from easybuild.framework.easyconfig.format.format import FORMAT_DEFAULT_VERSION, GROUPED_PARAMS, LAST_PARAMS from easybuild.framework.easyconfig.format.format import SANITY_CHECK_PATHS_DIRS, SANITY_CHECK_PATHS_FILES @@ -140,7 +141,7 @@ def get_config_dict(self): cfg_copy = {} for key in cfg: # skip special variables like __builtins__, and imported modules (like 'os') - if key != '__builtins__' and "'module'" not in str(type(cfg[key])): + if key != '__builtins__' and "'module'" not in str(type(cfg[key])) and not isinstance(cfg[key], OSProxy): try: cfg_copy[key] = copy.deepcopy(cfg[key]) except Exception as err: diff --git a/easybuild/main.py b/easybuild/main.py index 3108275b01..268d6d840c 100755 --- a/easybuild/main.py +++ b/easybuild/main.py @@ -39,6 +39,7 @@ * Maxime Boissonneault (Compute Canada) * Bart Oldeman (McGill University, Calcul Quebec, Digital Research Alliance of Canada) """ +# flake8: noqa: E402 import copy import os import stat @@ -47,9 +48,13 @@ import traceback from datetime import datetime +# if os.environ.get('EB_ISOLATED_CONTEXTS', '').lower() in ('1', 'true', 'yes'): # IMPORTANT this has to be the first easybuild import as it customises the logging # expect missing log output when this not the case! -from easybuild.tools.build_log import EasyBuildError, print_error_and_exit, print_msg, print_warning, stop_logging +from easybuild import os_hook # Imported to inject hook that replaces system os with our wrapped version +os_hook.install_os_hook() + +from easybuild.tools.build_log import EasyBuildError, print_error_and_exit, print_msg, print_warning, stop_logging # noqa: E402 from easybuild.tools.build_log import EasyBuildExit from easybuild.framework.easyblock import build_and_install_one, inject_checksums, inject_checksums_to_json diff --git a/easybuild/os_hook.py b/easybuild/os_hook.py new file mode 100644 index 0000000000..1724deaa5d --- /dev/null +++ b/easybuild/os_hook.py @@ -0,0 +1,136 @@ +import importlib +import importlib._bootstrap_external +import importlib.abc +import importlib.util +import sys +import types + + +class ProxyLoader(importlib.abc.Loader): + """Loader to create our proxy instead of the real module.""" + proxy_cls = None # To be defined in subclasses + + def create_module(self, spec): + # Import real module safely + sys.meta_path = [f for f in sys.meta_path if not isinstance(f, HookFinder)] + real_module = importlib.import_module(spec.name) + sys.meta_path.insert(0, HookFinder()) + + # Return proxy instead of real module + return self.proxy_cls(real_module) + + def exec_module(self, module): + """Needs to be defined, can be used to alter the module after creation if needed.""" + + +class ModuleProxy(types.ModuleType): + """Generic proxy module to intercept attribute access.""" + overrides = None + module_name = None + + def __init__(self, real): + super().__init__(self.module_name) + self._real = real + # self._not_found = set() + + def __getattr__(self, name): + # Intercept specific attributes + # if name in self.overrides: + # # print(f"Intercepted access to {self.module_name}.{name}, returning override value.") + # pass + # else: + # self._not_found.add(name) + # print("NOTFOUND", self.module_name, sorted(self._not_found)) + return self.overrides.get(name, getattr(self._real, name)) + + def __dir__(self): + return dir(self._real) + + @classmethod + def register_override(cls, name, value): + cls.overrides[name] = value + + @classmethod + def loader(cls): + class Loader(ProxyLoader): + proxy_cls = cls + return Loader() + + +class SubprocessProxy(ModuleProxy): + """Proxy module to intercept subprocess attribute access.""" + overrides = {} + module_name = "subprocess" + + +class OSProxy(ModuleProxy): + """Proxy module to intercept os attribute access.""" + overrides = {} + module_name = "os" + + +class PosixProxy(ModuleProxy): + """Proxy module to intercept posix attribute access.""" + overrides = {} + module_name = "posix" + + +class PosixpathProxy(ModuleProxy): + """Proxy module to intercept posixpath attribute access.""" + overrides = {} + module_name = "posixpath" + + +proxy_map = { + "os": OSProxy, + "subprocess": SubprocessProxy, + "posix": PosixProxy, + "posixpath": PosixpathProxy, + # "builtins": BuiltinProxy, +} + + +class HookFinder(importlib.abc.MetaPathFinder): + """Meta path finder to intercept imports of 'os' and return our proxy.""" + def find_spec(self, fullname, path, target=None): + if fullname in proxy_map: + return importlib.util.spec_from_loader(fullname, proxy_map[fullname].loader()) + return None + + +def install_os_hook(): + """Install the os hooking mechanism to intercept imports of 'os' and return our proxy.""" + if not any(isinstance(f, HookFinder) for f in sys.meta_path): + sys.meta_path.insert(0, HookFinder()) + + # If already imported, replace in place + for name, proxy in proxy_map.items(): + if name in sys.modules and not isinstance(sys.modules[name], proxy): + real_module = sys.modules[name] + sys.modules[name] = proxy(real_module) + + # https://stackoverflow.com/questions/79420610/undertanding-python-import-process-importing-custom-os-module + # Reload system modules that might have already imported os with a different name, at python initialization + # - tempfile imports os as _os and this is happening before we have a chance to install our hook. + # - os.path is a separate module (eg posixpath) that imports os into itself and needs to be reloaded to import + # our hook for eg `os.path.expanduser` to work with `os.environ['HOME'] = '...'` + # - shutil is used in CUDA sanity check with `shutil.which` to find `cuobjdum` + system_modules = [ + "os", "sys", "tempfile", "posixpath", "shutil", "importlib", "io", "glob", + ] + for name in system_modules: + if name in sys.modules: + # print(f"Reloading system module {name} to ensure it imports our os hook.") + importlib.reload(sys.modules[name]) + + # Needed to override how import paths are resolved in case '' is in sys.path indicating the CWD. + # Cannot be reloaded without breaking stuff + importlib._bootstrap_external._os = sys.modules["posix"] + + # To ensure we have the contextes module loaded to set all the function overrides + importlib.import_module("easybuild.tools.contextes") + + sys.modules["posixpath"].os = sys.modules["os"] + sys.modules["posixpath"]._real.os = sys.modules["os"] + sys.modules["io"].os = sys.modules["os"] + # sys.modules["io"]._real.os = sys.modules["os"] diff --git a/easybuild/tools/contextes.py b/easybuild/tools/contextes.py new file mode 100644 index 0000000000..1da166448a --- /dev/null +++ b/easybuild/tools/contextes.py @@ -0,0 +1,260 @@ +import builtins +import copy +import os +import posixpath +import subprocess + +from functools import wraps + +from easybuild import os_hook + + +# take copy of original environemt, so we can restore (parts of) it later +ORIG_OS_ENVIRON = copy.deepcopy(os.environ) +ORIG_CWD = os.getcwd() + + +class EnvironmentContext(dict): + """Environment context manager to track changes to the environment in a specific context.""" + def __init__(self, copy_from=None): + super().__init__() + if copy_from is None: + copy_from = ORIG_OS_ENVIRON + self.update(copy_from.copy()) + self._changes = {} + self._cwd = ORIG_CWD + + @property + def changes(self): + return self._changes + + def clear_changes(self): + """Clear the tracked changes, but keep the current environment state.""" + self._changes.clear() + + def get_context_path(self, path): + """Get the absolute path for a given path in the context of this environment.""" + # print(str(path)) + # if not isinstance(path, str): + # print(f'GET_CONTEXT_PATH: type(path)={type(path)} path={path}, cwd={self._cwd}') + # # print(path.__dict__) + if isinstance(path, int): + return path + _path = path + if path and not os.path.isabs(path): + _path = os.path.normpath(os.path.join(self._cwd, path)) + return _path + + def getcwd(self): + """Get the current working directory in this context.""" + if not os.path.exists(self._cwd): + raise FileNotFoundError("Current working directory '%s' does not exist in this context" % self._cwd) + return self._cwd + + def chdir(self, path): + """Change the current working directory in this context.""" + path = self.get_context_path(path) + if not os.path.exists(path): + raise OSError("Cannot change directory to '%s': No such file or directory" % path) + self._cwd = path + + +_curr_context: EnvironmentContext = EnvironmentContext() + + +def get_context() -> EnvironmentContext: + """ + Return current context for tracking environment changes. + """ + # TODO: Make this function thread-aware so that different threads can have their own context if needed. + return _curr_context + + +class EnvironProxy(): + """Hook into os.environ and replace it with calls from this module to track changes to the environment.""" + def __getattribute__(self, name): + return get_context().__getattribute__(name) + + # This methods do not go through the instance __getattribute__ + def __getitem__(self, key): + return get_context().__getitem__(key) + + def __setitem__(self, key, value): + get_context().__setitem__(key, value) + + def __delitem__(self, key): + get_context().__delitem__(key) + + def __iter__(self): + return get_context().__iter__() + + def __contains__(self, key): + return get_context().__contains__(key) + + def __len__(self): + return get_context().__len__() + + +################################################################################ +# os environment specific overrides +os_hook.OSProxy.register_override('environ', EnvironProxy()) +os_hook.OSProxy.register_override('getenv', lambda key, default=None: get_context().get(key, default)) +os_hook.OSProxy.register_override('unsetenv', lambda key: get_context().pop(key, None)) +os_hook.OSProxy.register_override('pushenv', lambda key, value: get_context().__setitem__(key, value)) + + +################################################################################ +# os CWD specific overrides +def _gcp(path): + """Utility function to get the context path for a given path.""" + return get_context().get_context_path(path) + + +def _gcp_one(func): + """Utility function to wrap a function that takes a single path argument, that can be relative to a directory + file descriptor""" + @wraps(func) + def wrapped(path, *args, **kwargs): + # Exception specific for behavior of pathlib in python<3.11 where the first argument passed can be a + # _NormalAccessor object + # print(f'_gcp_one: path={path} args={args} kwargs={kwargs}') + if path.__class__.__name__ == '_NormalAccessor': + args = list(args) + path = args.pop(0) + + # If dir_fd is specified, the path is relative to that directory and not to the context's CWD, + # to preserve the expected behavior of dir_fd. EG: when calling shutil.rmtree, it can internally use + # os.scandir and recursively delete relative paths, w.r.t the directory file descriptor. + if kwargs.get('dir_fd') is None: + path = _gcp(path) + return func(path, *args, **kwargs) + return wrapped + + +def _gcp_two(func): + """Utility function to wrap a function that takes two path arguments, that can be relative to a directory + file descriptor""" + @wraps(func) + def wrapped(src, dst, *args, **kwargs): + if kwargs.get('src_dir_fd') is None: + src = _gcp(src) + if kwargs.get('dst_dir_fd') is None: + dst = _gcp(dst) + return func(src, dst, *args, **kwargs) + return wrapped + + +_os = os._real +one_path_funcs = [ + 'open', 'listdir', 'mkdir', 'remove', 'rmdir', 'chmod', 'stat', 'lstat', 'chown', + 'access', 'walk', 'readlink', 'unlink', 'utime', 'chroot', + 'makedirs', 'removedirs', 'rmdir', 'statvfs', 'link', 'readlink', + 'mkfifo', 'mknod', 'pathconf', + 'getxattr', 'setxattr', 'listxattr', 'removexattr', + 'scandir', +] +two_path_funcs_dirfd = [ + 'rename', 'link', 'replace' +] + +for proxy in [os_hook.OSProxy, os_hook.PosixProxy]: + proxy.register_override('chdir', lambda path: get_context().chdir(path)) + proxy.register_override('getcwd', lambda: get_context().getcwd()) + for func_name in one_path_funcs: + orig = getattr(_os, func_name) + proxy.register_override(func_name, _gcp_one(orig)) + + for func_name in two_path_funcs_dirfd: + orig = getattr(_os, func_name) + proxy.register_override(func_name, _gcp_two(orig)) + + +@wraps(_os.symlink) +def _wrapped_symlink(src, dst, *args, **kwargs): + """Dedicated wrapper for os.symlink. + The behavior of symlink is a bit special, as the src is not interpreted. + Similar to doing ln -s SRC DST, SRC is not relative to the CWD but will be evaluated when accessing the symlink.""" + + if kwargs.get('dir_fd') is None: + dst = _gcp(dst) + + return _os.symlink(src, dst, *args, **kwargs) + + +os_hook.OSProxy.register_override('symlink', _wrapped_symlink) + +################################################################################ +# posixpath overrides +_posixpath = posixpath._real +os_hook.OSProxy.register_override('path', posixpath) +for func_name in [ + 'abspath', 'exists', + # 'expanduser', + # 'expandvars', + 'getatime', 'getctime', 'getmtime', 'getsize', + 'isfile', 'isdir', 'islink', 'ismount', + 'realpath', +]: + orig = getattr(_posixpath, func_name) + os_hook.PosixpathProxy.register_override(func_name, _gcp_one(orig)) + +for func_name in ['samefile', ]: + orig = getattr(_posixpath, func_name) + os_hook.PosixpathProxy.register_override(func_name, _gcp_two(orig)) + + +def my_relpath(path, start=os.curdir, *args): + return _posixpath.relpath(_gcp(path), _gcp(start), *args) + + +os_hook.PosixpathProxy.register_override( + 'relpath', my_relpath +) + + +################################################################################ +# subprocess.Popen override +class ContextPopen(subprocess._real.Popen): + """Custom Popen class to apply the current context's environment changes when spawning subprocesses.""" + def __init__(self, *args, **kwargs): + context = get_context() + if kwargs.get('env', None) is None: + kwargs['env'] = context + + kwargs['cwd'] = context.get_context_path(kwargs.get('cwd', '.')) + + super().__init__(*args, **kwargs) + + +os_hook.SubprocessProxy.register_override('Popen', ContextPopen) + +################################################################################ +# open() overrides +# os_hook.BuiltinProxy.register_override('open', _gcp_one(open)) + +original_open = builtins.open +# open called as is calls builtins.open under the hood, but proxying builtin itself does not work so we directly +# override builtins.open here to replace `open` calls across the code. +builtins.open = _gcp_one(original_open) +# io.open = context_open(original_open) + + +# import io +# print(os.open) +# print(os._real.open) +# print(io.open) +# import importlib +# importlib.invalidate_caches() +# importlib.reload(io) +# print(io.open) +# exit(0) + +# Needed for python <= 3.7. EG `shutil.copytree` -> `copystat` will behave differently depending on whether `stat` is in +# `supports_follow_symlinks` or not. Since the code tests for `function in os.supports_follow_symlinks` and not for +# `function.__name__ in os.supports_follow_symlinks`, we have to replace the functions in `os.supports_follow_symlinks` +# with the wrapped versions. +if hasattr(os, 'supports_follow_symlinks'): + new_follow_symlinks = set() + for func in os.supports_follow_symlinks: + new_follow_symlinks.add(getattr(os, func.__name__)) + os.supports_follow_symlinks = new_follow_symlinks diff --git a/easybuild/tools/environment.py b/easybuild/tools/environment.py index 86e79c9830..d90e3caa94 100644 --- a/easybuild/tools/environment.py +++ b/easybuild/tools/environment.py @@ -30,23 +30,17 @@ * Toon Willems (Ghent University) * Ward Poelmans (Ghent University) """ -import copy import os from easybuild.base import fancylogger from easybuild.tools.build_log import EasyBuildError, dry_run_msg from easybuild.tools.config import build_option from easybuild.tools.utilities import shell_quote - - -# take copy of original environemt, so we can restore (parts of) it later -ORIG_OS_ENVIRON = copy.deepcopy(os.environ) +from easybuild.tools.contextes import get_context, ORIG_OS_ENVIRON # noqa(F401) _log = fancylogger.getLogger('environment', fname=False) -_changes = {} - def write_changes(filename): """ @@ -54,7 +48,7 @@ def write_changes(filename): """ try: with open(filename, 'w') as script: - for key, changed_value in _changes.items(): + for key, changed_value in get_changes().items(): script.write('export %s=%s\n' % (key, shell_quote(changed_value))) except IOError as err: raise EasyBuildError("Failed to write to %s: %s", filename, err) @@ -65,15 +59,14 @@ def reset_changes(): """ Reset the changes tracked by this module """ - global _changes - _changes = {} + get_context().clear_changes() -def get_changes(): +def get_changes(show_unset=False) -> dict: """ Return tracked changes made in environment. """ - return _changes + return get_context().changes.copy() def setvar(key, value, verbose=True, log_changes=True): @@ -88,9 +81,7 @@ def setvar(key, value, verbose=True, log_changes=True): oldval_info = "previous value: '%s'" % os.environ[key] except KeyError: oldval_info = "previously undefined" - # os.putenv() is not necessary. os.environ will call this. os.environ[key] = value - _changes[key] = value if log_changes: _log.info("Environment variable %s set to %s (%s)", key, value, oldval_info) @@ -171,7 +162,6 @@ def modify_env(old, new, verbose=True, log_changes=True): for key in old_keys: if key not in new_keys: _log.debug("Key in old environment found that is not in new one: %s (%s)", key, old[key]) - os.unsetenv(key) del os.environ[key] diff --git a/test/framework/suite.py b/test/framework/suite.py index acb481bdf3..3e135b50a8 100755 --- a/test/framework/suite.py +++ b/test/framework/suite.py @@ -30,6 +30,11 @@ @author: Toon Willems (Ghent University) @author: Kenneth Hoste (Ghent University) """ +# flake8: noqa: E402 +from easybuild.os_hook import install_os_hook +install_os_hook() + + import glob import os import sys