diff options
-rwxr-xr-x | bin/just-lock.py | 361 |
1 files changed, 359 insertions, 2 deletions
diff --git a/bin/just-lock.py b/bin/just-lock.py index f0206e47..d1895b6e 100755 --- a/bin/just-lock.py +++ b/bin/just-lock.py @@ -14,6 +14,7 @@ # limitations under the License. import fcntl +import hashlib import json import os import shutil @@ -324,6 +325,17 @@ def rewrite_file_repo(repo: Json, remote_type: str, if not Path(path).is_absolute(): changes["path"] = str(Path(root) / path) return dict(repo, **changes) + elif remote_type in ["archive", "zip"]: + # for imports from archives, file repos become archive type with subdir; + # any path is prepended by the subdir provided in the input file, if any + changes = {} + subdir = repo.get("path", ".") + if subdir not in ["", "."]: + existing = remote_stub.get("subdir", ".") + if existing not in ["", "."]: + subdir = os.path.join(existing, subdir) + changes["subdir"] = subdir + return dict(remote_stub, **changes) fail("Unsupported remote type!") @@ -552,6 +564,68 @@ def git_fetch(*, from_repo: Optional[str], to_repo: Optional[str], ### +# CAS utils +## + + +def gc_storage_lock_acquire(is_shared: bool = False) -> TextIO: + """Acquire garbage collector file lock for the local storage.""" + # use same naming scheme as in Just + return lock_acquire(os.path.join(g_ROOT, "protocol-dependent", "gc.lock"), + is_shared) + + +def git_hash(content: bytes, type: str = "blob") -> Tuple[str, bytes]: + """Hash content as a Git object. Returns the hash, as well as the header to + be stored.""" + header = "{} {}\0".format(type, len(content)).encode('utf-8') + h = hashlib.sha1() + h.update(header) + h.update(content) + return h.hexdigest(), header + + +def add_to_cas(data: Union[str, bytes]) -> Tuple[str, str]: + """Add content to local file CAS and return its CAS location and hash.""" + try: + if isinstance(data, str): + data = data.encode('utf-8') + h, _ = git_hash(data) + cas_root = os.path.join( + g_ROOT, f"protocol-dependent/generation-0/git-sha1/casf/{h[0:2]}") + basename = h[2:] + target = os.path.join(cas_root, basename) + tempname = os.path.join(cas_root, "%s.%d" % (basename, os.getpid())) + + if os.path.exists(target): + return target, h + + os.makedirs(cas_root, exist_ok=True) + with open(tempname, "wb") as f: + f.write(data) + f.flush() + os.chmod(f.fileno(), 0o444) + os.fsync(f.fileno()) + os.utime(tempname, (0, 0)) + os.rename(tempname, target) + return target, h + except Exception as ex: + fail("Adding content to CAS failed with:\n%r" % (ex, )) + + +def cas_path(h: str) -> str: + """Get path to local file CAS.""" + return os.path.join( + g_ROOT, f"protocol-dependent/generation-0/git-sha1/casf/{h[0:2]}", + h[2:]) + + +def is_in_cas(h: str) -> bool: + """Check if content is in local file CAS.""" + return os.path.exists(cas_path(h)) + + +### # Import from Git ## @@ -751,6 +825,11 @@ def import_from_git(core_repos: Json, imports_entry: Json) -> Json: return core_repos +### +# Import from file +## + + def import_from_file(core_repos: Json, imports_entry: Json) -> Json: """Handles imports from a local checkout.""" # Set granular logging message @@ -836,6 +915,282 @@ def import_from_file(core_repos: Json, imports_entry: Json) -> Json: ### +# Import from archive +## + + +def archive_fetch(locations: List[str], + *, + content: Optional[str], + sha256: Optional[str] = None, + sha512: Optional[str] = None, + fail_context: str) -> str: + """Make sure an archive is available in local CAS. Try all the remote + locations given. Return the content hash on success.""" + if content is None or not is_in_cas(content): + fetched: bool = False + for source in locations: + data, err_code = run_cmd(g_LAUNCHER + ["wget", "-O", "-", source], + stdout=subprocess.PIPE, + cwd=os.getcwd()) + if err_code == 0: + # Compare with checksums, if given + if sha256 is not None: + actual_hash = hashlib.sha256(data).hexdigest() + if sha256 != actual_hash: + continue + if sha512 is not None: + actual_hash = hashlib.sha512(data).hexdigest() + if sha512 != actual_hash: + continue + # Add to CAS and compare with expected content, if given + _, computed_hash = add_to_cas(data) + if content is not None: + if content != computed_hash: + continue + fetched = True + break + else: + content = computed_hash + fetched = True + break + + if not fetched: + fail(fail_context + + "Failed to fetch archive.\nTried locations:\n%s" % + ("\n".join(["\t%s" % (x, ) for x in locations]), )) + + return cast(str, content) + + +def unpack_archive(content_id: str, *, archive_type: str, unpack_to: str, + fail_context: str) -> None: + """Unpack archive stored as a local CAS blob into a given directory.""" + fail_context += "While unpacking archive %s:\n" % (cas_path(content_id), ) + # ensure destination path is valid + if os.path.exists(unpack_to): + if not os.path.isdir(unpack_to): + fail(fail_context + + "Unpack location %s exists and is not a directory!" % + (unpack_to, )) + if os.listdir(unpack_to): + fail(fail_context + "Cannot unpack to nonempty directory %s" % + (unpack_to, )) + else: + os.makedirs(unpack_to, exist_ok=True) + # unpack based on archive type + if archive_type == "zip": + # try as zip and 7z archives + if run_cmd(g_LAUNCHER + ["unzip", "-d", ".", + cas_path(content_id)], + cwd=unpack_to, + fail_context=None)[1] != 0 and run_cmd( + g_LAUNCHER + ["7z", "x", cas_path(content_id)], + cwd=unpack_to, + fail_context=None)[1] != 0: + fail(fail_context + "Failed to extract zip-like archive %s" % + (cas_path(content_id), )) + else: + # try as tarball + if run_cmd(g_LAUNCHER + ["tar", "xf", cas_path(content_id)], + cwd=unpack_to, + fail_context=None)[1] != 0: + fail(fail_context + "Failed to extract tarball %s" % + (cas_path(content_id), )) + return + + +def archive_checkout(fetch: str, *, archive_type: str, content: Optional[str], + mirrors: List[str], sha256: Optional[str], + sha512: Optional[str], subdir: Optional[str], + fail_context: str) -> Tuple[str, Dict[str, Any], str]: + """Fetch a given remote archive to local CAS, unpack it, check content, + and return the checkout location.""" + fail_context += "While checking out archive %r:\n" % (fetch) + if content is None: + # If content is not known, get it from the definitive source location + content = archive_fetch([fetch], + content=None, + sha256=sha256, + sha512=sha512, + fail_context=fail_context) + else: + # If content known, try the mirrors first, as they are closer + archive_fetch(mirrors + [fetch], + content=content, + sha256=sha256, + sha512=sha512, + fail_context=fail_context) + + workdir: str = tempfile.mkdtemp() + unpack_archive(content, + archive_type=archive_type, + unpack_to=workdir, + fail_context=fail_context) + srcdir = (workdir if subdir is None else os.path.join(workdir, subdir)) + + # Prepare the description stub used to rewrite "file"-type dependencies + repo_stub: Dict[str, Any] = { + "type": "zip" if archive_type == "zip" else "archive", + "fetch": fetch, + "content": content, + } + if mirrors: + repo_stub = dict(repo_stub, **{"mirrors": mirrors}) + if sha256 is not None: + repo_stub = dict(repo_stub, **{"sha256": sha256}) + if sha512 is not None: + repo_stub = dict(repo_stub, **{"sha512": sha512}) + if subdir is not None: + repo_stub = dict(repo_stub, **{"subdir": subdir}) + return srcdir, repo_stub, workdir + + +def import_from_archive(core_repos: Json, imports_entry: Json) -> Json: + """Handles imports from archive-type repositories.""" + # Set granular logging message + fail_context: str = "While importing from source \"archive\":\n" + + # Get the repositories list + repos: List[Any] = imports_entry.get("repos", []) + if not isinstance(repos, list): + fail(fail_context + + "Expected field \"repos\" to be a list, but found:\n%r" % + (json.dumps(repos, indent=2), )) + + # Check if anything is to be done + if not repos: # empty + return core_repos + + # Parse source config fields + fetch: str = imports_entry.get("fetch", None) + if not isinstance(fetch, str): + fail(fail_context + + "Expected field \"fetch\" to be a string, but found:\n%r" % + (json.dumps(fetch, indent=2), )) + + archive_type: str = "archive" # type according to 'just-mr' + tmp_type: Optional[str] = imports_entry.get("type", None) + if tmp_type is not None: + if not isinstance(tmp_type, str): + fail(fail_context + + "Expected field \"type\" to be a string, but found:\n%r" % + (json.dumps(tmp_type, indent=2), )) + if tmp_type not in ["tar", "zip"]: # values expected in input file + warn( + fail_context + + "Field \"type\" has unsupported value %r\nTrying with default value 'tar'" + % (json.dumps(tmp_type), )) + else: + archive_type = "zip" if tmp_type == "zip" else "archive" + + content: Optional[str] = imports_entry.get("content", None) + if content is not None and not isinstance(content, str): + fail(fail_context + + "Expected field \"content\" to be a string, but found:\n%r" % + (json.dumps(content, indent=2), )) + + mirrors: Optional[List[str]] = imports_entry.get("mirrors", []) + if mirrors is not None and not isinstance(mirrors, list): + fail(fail_context + + "Expected field \"mirrors\" to be a list, but found:\n%r" % + (json.dumps(mirrors, indent=2), )) + + sha256: Optional[str] = imports_entry.get("sha256", None) + if sha256 is not None and not isinstance(sha256, str): + fail(fail_context + + "Expected field \"sha256\" to be a string, but found:\n%r" % + (json.dumps(sha256, indent=2), )) + + sha512: Optional[str] = imports_entry.get("sha512", None) + if sha512 is not None and not isinstance(sha512, str): + fail(fail_context + + "Expected field \"sha512\" to be a string, but found:\n%r" % + (json.dumps(sha512, indent=2), )) + + subdir: Optional[str] = imports_entry.get("subdir", None) + if subdir is not None: + if not isinstance(subdir, str): + fail(fail_context + + "Expected field \"subdir\" to be a string, but found:\n%r" % + (json.dumps(subdir, indent=2), )) + if subdir in ["", "."]: + subdir = None # treat as if missing + elif os.path.isabs(subdir): + fail( + fail_context + + "Expected field \"subdir\" to be a relative path, but found:\n%r" + % (json.dumps(subdir, indent=2), )) + + as_plain: Optional[bool] = imports_entry.get("as_plain", False) + if as_plain is not None and not isinstance(as_plain, bool): + fail(fail_context + + "Expected field \"as_plain\" to be a bool, but found:\n%r" % + (json.dumps(as_plain, indent=2), )) + + foreign_config_file: Optional[str] = imports_entry.get("config", None) + if foreign_config_file is not None and not isinstance( + foreign_config_file, str): + fail(fail_context + + "Expected field \"config\" to be a string, but found:\n%r" % + (json.dumps(foreign_config_file, indent=2), )) + + # Fetch archive to local CAS and unpack + srcdir, remote_stub, to_clean_up = archive_checkout( + fetch, + archive_type=archive_type, + content=content, + mirrors=mirrors, + sha256=sha256, + sha512=sha512, + subdir=subdir, + fail_context=fail_context) + + # Read in the foreign config file + if foreign_config_file: + foreign_config_file = os.path.join(srcdir, foreign_config_file) + else: + foreign_config_file = get_repository_config_file( + DEFAULT_JUSTMR_CONFIG_NAME, srcdir) + foreign_config: Json = {} + if as_plain: + foreign_config = {"main": "", "repositories": DEFAULT_REPO} + else: + if (foreign_config_file): + try: + with open(foreign_config_file) as f: + foreign_config = json.load(f) + except OSError: + fail(fail_context + "Failed to open foreign config file %s" % + (foreign_config_file, )) + except Exception as ex: + fail(fail_context + + "Reading foreign config file failed with:\n%r" % (ex, )) + else: + fail(fail_context + + "Failed to find the repository configuration file!") + + # Process the imported repositories, in order + for repo_entry in repos: + if not isinstance(repo_entry, dict): + fail(fail_context + + "Expected \"repos\" entries to be objects, but found:\n%r" % + (json.dumps(repo_entry, indent=2), )) + repo_entry = cast(Json, repo_entry) + + core_repos = handle_import(archive_type, + remote_stub, + repo_entry, + core_repos, + foreign_config, + fail_context=fail_context) + + # Clean up local fetch + try_rmtree(to_clean_up) + return core_repos + + +### # Deduplication logic ## @@ -1140,6 +1495,7 @@ def lock_config(input_file: str) -> Json: # Acquire garbage collector locks git_gc_lock = gc_repo_lock_acquire(is_shared=True) + storage_gc_lock = gc_storage_lock_acquire(is_shared=True) # Handle imports for entry in imports: @@ -1160,8 +1516,8 @@ def lock_config(input_file: str) -> Json: core_config["repositories"] = import_from_file( core_config["repositories"], entry) elif source == "archive": - # TODO(psarbu): Implement source "archive" - warn("Import from source \"archive\" not yet implemented!") + core_config["repositories"] = import_from_archive( + core_config["repositories"], entry) elif source == "git-tree": # TODO(psarbu): Implement source "git-tree" warn("Import from source \"git-tree\" not yet implemented!") @@ -1173,6 +1529,7 @@ def lock_config(input_file: str) -> Json: (json.dumps(entry, indent=2), )) # Release garbage collector locks + lock_release(storage_gc_lock) lock_release(git_gc_lock) # Deduplicate output config |