diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2025-01-22 13:07:11 +0100 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2025-02-20 15:33:53 +0100 |
commit | 1b49795bc13b30882ba8c0886602927095fdba9e (patch) | |
tree | 2a0de7f153d8d4168e16d993a600b49ed50addd6 | |
parent | 781055d845f0aaf3fef579d30eb33b52aacda700 (diff) | |
download | justbuild-1b49795bc13b30882ba8c0886602927095fdba9e.tar.gz |
just-lock: Add support for cloning 'git' repositories
-rwxr-xr-x | bin/just-lock.py | 237 |
1 files changed, 230 insertions, 7 deletions
diff --git a/bin/just-lock.py b/bin/just-lock.py index a17a11c5..3cc6659d 100755 --- a/bin/just-lock.py +++ b/bin/just-lock.py @@ -18,6 +18,7 @@ import hashlib import json import os import shutil +import stat import subprocess import sys import tempfile @@ -77,6 +78,12 @@ class ObjectType(Enum): DIR = 4 +SUPPORTED_BLOB_TYPES: List[ObjectType] = [ + ObjectType.FILE, ObjectType.EXEC, ObjectType.LINK +] + +SHA1_SIZE_BYTES: int = 20 + ### # Global vars ## @@ -339,6 +346,19 @@ def type_to_perm(obj_type: ObjectType) -> str: fail("Unexpected object type %r" % (obj_type, )) +def type_to_string(obj_type: ObjectType) -> str: + """Mapping from Git object type to human-readable string.""" + if obj_type == ObjectType.DIR: + return "DIR" + elif obj_type == ObjectType.LINK: + return "LINK" + elif obj_type == ObjectType.EXEC: + return "EXEC" + elif obj_type == ObjectType.FILE: + return "FILE" + fail("Unexpected object type %r" % (obj_type, )) + + def write_data_to_repo(repo_root: str, data: bytes, *, as_type: str) -> bytes: """Write content of an object of certain type into given repository. Returns the raw id of the written object.""" @@ -390,6 +410,20 @@ def path_to_type(fpath: str) -> ObjectType: fail("Found unsupported filesystem entry %s" % (fpath, )) +def git_to_type(perm: str) -> ObjectType: + """Get type of given Git entry from permission mode.""" + if perm == "40000": + return ObjectType.DIR + elif perm == "120000": + return ObjectType.LINK + elif perm == "100755": + return ObjectType.EXEC + elif perm == "100644": + return ObjectType.FILE + fail("Cannot assign object type for entry with permission code %s" % + (perm, )) + + def get_tree_raw_id(source_dir: str, repo_root: str) -> bytes: """Write the content of the directory recursively to the given repository and return its SHA1 hash and its raw bytes representation.""" @@ -465,10 +499,26 @@ def import_to_git(target: str, *, repo_type: str, content_id: str, return tree_id +def git_tree(*, commit: str, subdir: str, upstream: Optional[str], + fail_context: str) -> str: + """Get Git-tree identifier based on commit. Fails if the commit is not part + of the repository. It is a user error if the referenced Git repository does + not exist.""" + tree = run_cmd(["git", "log", "-n", "1", "--format=%T", commit], + stdout=subprocess.PIPE, + cwd=git_root(upstream=upstream), + fail_context=fail_context)[0].decode('utf-8').strip() + return git_subtree(tree=tree, + subdir=subdir, + upstream=upstream, + fail_context=fail_context) + + def git_subtree(*, tree: str, subdir: str, upstream: Optional[str], fail_context: str) -> str: - """Get Git-tree identifier in a Git tree by subdirectory path. It is a user - error if the referenced Git repository does not exist.""" + """Get Git-tree identifier in a Git tree by subdirectory path. Fails if the + tree is not part of the repository. It is a user error if the referenced Git + repository does not exist.""" if os.path.normpath(subdir) == ".": return tree return run_cmd( @@ -497,6 +547,44 @@ def try_read_object_from_repo(obj_id: str, obj_type: str, *, return result[0] if result[1] == 0 else None +def read_git_tree(tree_id: str, *, upstream: Optional[str], + fail_context: str) -> Dict[str, Tuple[bytes, ObjectType]]: + """Reads a Git tree and returns a list of its entries. Tree entries have as + key their filename and as value a tuple of raw id and object type. Method + fails if the given tree is not part of the repository. It is a user error if + the referenced Git repository does not exist.""" + raw_tree_content = try_read_object_from_repo(tree_id, + "tree", + upstream=upstream) + if raw_tree_content is None: + fail(fail_context + "Failed to read Git tree %s from %s" % + (tree_id, git_root(upstream=upstream))) + + # Parse the raw content; the Git tree format is: + # "<perm> <filename>\0<binary_hash>[next entries...]" + # The hash size for SHA1 is 20 bytes + entries: Dict[str, Tuple[bytes, ObjectType]] = {} + curr_index = 0 + while curr_index < len(raw_tree_content): + # get permission + perm_step = raw_tree_content[curr_index:].find(b' ') + perm: str = raw_tree_content[curr_index:curr_index + + perm_step].decode('utf-8') + curr_index += perm_step + 1 + # get filename + name_step = raw_tree_content[curr_index:].find(b'\0') + filename: str = raw_tree_content[curr_index:curr_index + + name_step].decode('utf-8') + curr_index += name_step + 1 + # get raw id + raw_id: bytes = raw_tree_content[curr_index:curr_index + + SHA1_SIZE_BYTES].hex().encode('utf-8') + curr_index += SHA1_SIZE_BYTES + # store current entry + entries[filename] = (raw_id, git_to_type(perm)) + return entries + + ### # CAS utils ## @@ -560,6 +648,80 @@ def is_in_cas(h: str) -> bool: ### +# Staging utils +## + + +def stage_git_entry(*, fpath: str, obj_id: str, obj_type: ObjectType, + upstream: Optional[str], fail_context: str) -> None: + """Stage specified Git entry, identified by id and object type, to a given + location. It is a user error if the referenced Git repository does not + exist.""" + curr_fail_context = fail_context + "While staging entry %r:\n" % ( + json.dumps({fpath: (obj_id, type_to_string(obj_type))}), ) + # Trees need to get traversed + if obj_type == ObjectType.DIR: + os.makedirs(fpath) + entries = read_git_tree(obj_id, + upstream=upstream, + fail_context=curr_fail_context) + for key, val in entries.items(): + stage_git_entry( + fpath=os.path.join(fpath, key), + obj_id=val[0].decode('utf-8'), + obj_type=val[1], + upstream=upstream, + fail_context=fail_context, # limit log verbosity + ) + # Blobs are read as-is; only do work for supported blob types + elif obj_type in SUPPORTED_BLOB_TYPES: + content = try_read_object_from_repo(obj_id, "blob", upstream=upstream) + if content is None: + fail(curr_fail_context + "Failed to read Git entry!") + try: + if obj_type == ObjectType.LINK: + os.symlink(src=content.decode('utf-8'), dst=fpath) + else: + with open(fpath, "wb") as f: + fstat = os.stat(f.fileno()) + f.write(content) + f.flush() + if obj_type == ObjectType.EXEC: + os.chmod(f.fileno(), fstat.st_mode | stat.S_IEXEC) + os.fsync(f.fileno()) + except OSError: + fail(curr_fail_context + "Failed to write entry") + except Exception as ex: + fail(curr_fail_context + "Writing entry failed with:\n%r" % (ex, )) + else: + # Warn if any unsupported entries were found + warn(curr_fail_context + + "Skipped staging of entry with unsupported type") + return + + +def stage_git_commit(commit: str, *, upstream: Optional[str], stage_to: str, + fail_context: str) -> None: + """Stage into a given directory the tree of a commit from given repository. + Fails if the commit is not part of the repository. It is a user error if the + referenced Git repository does not exist.""" + fail_context += "While trying to stage commit %s\n" % (commit, ) + # Stage underlying Git tree + tree = git_tree(commit=commit, + subdir=".", + upstream=upstream, + fail_context=fail_context) + entries = read_git_tree(tree, upstream=upstream, fail_context=fail_context) + os.makedirs(stage_to, exist_ok=True) # root dir can already exist + for key, val in entries.items(): + stage_git_entry(fpath=os.path.join(stage_to, key), + obj_id=val[0].decode('utf-8'), + obj_type=val[1], + upstream=upstream, + fail_context=fail_context) + + +### # Imports utils ## @@ -1777,6 +1939,15 @@ def import_from_git_tree(core_repos: Json, imports_entry: Json) -> Json: def rewrite_cloned_repo(repos: Json, *, clone_to: str, target_repo: str, ws_root_repo: str) -> Json: """Rewrite description of a locally-cloned repository.""" + # For Git repositories, the clone always contains the whole root tree, + # so the path will need to point to any given subdir; no validation of + # fields is needed, as it was already done pre-cloning + ws_root_desc: Json = repos[ws_root_repo] + if ws_root_desc["repository"]["type"] == "git": + subdir: Optional[str] = ws_root_desc["repository"].get("subdir", None) + if subdir is not None and os.path.normpath(subdir) != ".": + clone_to = os.path.join(clone_to, subdir) + # Set workspace root description new_spec: Json = { "repository": { @@ -1850,6 +2021,59 @@ def clone_repo(repos: Json, known_repo: str, deps_chain: List[str], (fpath, ex)) return fpath + def process_git(repository: Json, *, clone_to: str, + fail_context: str) -> str: + """Process a Git repository and return its commit id.""" + # Parse fields + commit: str = repository.get("commit", None) + if not isinstance(commit, str): + fail(fail_context + + "Expected field \"commit\" to be a string, but found:\n%r" % + (json.dumps(commit, indent=2), )) + # If commit in Git cache, stage it and return + if git_commit_present(commit, upstream=None): + stage_git_commit(commit, + upstream=None, + stage_to=clone_to, + fail_context=fail_context) + return commit + # Parse fields needed for + url: str = repository.get("repository", None) + if not isinstance(url, str): + fail(fail_context + + "Expected field \"url\" to be a string, but found:\n%r" % + (json.dumps(url, indent=2), )) + branch: str = repository.get("branch", None) + if not isinstance(branch, str): + fail(fail_context + + "Expected field \"branch\" to be a string, but found:\n%r" % + (json.dumps(branch, indent=2), )) + mirrors: Optional[List[str]] = repository.get("mirrors", []) + if mirrors is not None and not isinstance(mirrors, list): + fail(fail_context + + "Expected field \"mirrors\" to be a list, but found:\n%r" % + (json.dumps(mirrors, indent=2), )) + # Clone the branch fully and reset to specific commit; in this case, we + # are interested also in the Git history, so a simple commit checkout is + # not enough; try mirrors first, as they are closer + cloned: bool = False + sources = [url] if mirrors is None else mirrors + [url] + for source in sources: + if (run_cmd(g_LAUNCHER + + [g_GIT, "clone", "-b", branch, source, clone_to], + cwd=os.getcwd(), + fail_context=None)[1] == 0 + and run_cmd(g_LAUNCHER + [g_GIT, "reset", "--hard", commit], + cwd=clone_to, + fail_context=None)[1] == 0): + cloned = True + break + if not cloned: + fail(fail_context + + "Failed to clone Git repository.\nTried locations:\n%s" % + ("\n".join(["\t%s" % (x, ) for x in sources]), )) + return commit + def follow_binding(repos: Json, *, repo_name: str, dep_name: str, fail_context: str) -> str: """Follow a named binding.""" @@ -1906,11 +2130,10 @@ def clone_repo(repos: Json, known_repo: str, deps_chain: List[str], report("\tCloned file path %s to %s" % (fpath, clone_to)) elif repo_type == "git": - #TODO(psarbu): Implement git cloning - warn(fail_context + - "Cloning from \"%s\" repositories not yet implemented!" % - (repo_type, )) - result = None + commit = process_git(repository, + clone_to=clone_to, + fail_context=fail_context) + report("\tCloned Git commit %s to %s" % (commit, clone_to)) elif repo_type in ["archive", "zip"]: #TODO(psarbu): Implement archives cloning |