diff options
Diffstat (limited to 'bin/just-mr.py')
-rwxr-xr-x | bin/just-mr.py | 523 |
1 files changed, 523 insertions, 0 deletions
diff --git a/bin/just-mr.py b/bin/just-mr.py new file mode 100755 index 00000000..f71bbefe --- /dev/null +++ b/bin/just-mr.py @@ -0,0 +1,523 @@ +#!/usr/bin/env python3 + +import hashlib +import json +import os +import shutil +import subprocess +import sys +import tempfile + +from optparse import OptionParser +from pathlib import Path + +JUST="just" +ROOT="/justroot" +DISTDIR=[] + +ALWAYS_FILE=False + +GIT_CHECKOUT_LOCATIONS={} + +TAKE_OVER= [ + "bindings", + "target_file_name", + "index_file_name", + "rule_file_name", + "expression_file_name", +] +ALT_DIRS=[ + "target_root", + "rule_root", + "expression_root", + "index_root", +] + +GIT_NOBODY_ENV ={ + "GIT_AUTHOR_DATE": "1970-01-01T00:00Z", + "GIT_AUTHOR_NAME": "Nobody", + "GIT_AUTHOR_EMAIL": "nobody@example.org", + "GIT_COMMITTER_DATE": "1970-01-01T00:00Z", + "GIT_COMMITTER_NAME": "Nobody", + "GIT_COMMITTER_EMAIL": "nobody@example.org", +} + + +def log(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +def fail(s): + log(s) + sys.exit(1) + +def run_cmd(cmd, *, env=None, cwd): + result = subprocess.run( + cmd, cwd=cwd, env=env, + stdout=subprocess.DEVNULL) + if result.returncode != 0: + fail("Command %s in %s failed" + % (cmd, cwd)) + +def read_config(configfile): + if configfile: + with open(configfile) as f: + return json.load(f) + default_config = os.path.join(Path.home(), ".just-repos.json") + + if os.path.isfile(default_config): + with open(default_config) as f: + return json.load(f) + + return {} + +def git_root(*, upstream): + if upstream in GIT_CHECKOUT_LOCATIONS: + return GIT_CHECKOUT_LOCATIONS[upstream] + else: + return os.path.join(ROOT, "git") + +def git_keep(commit, *, upstream): + if upstream in GIT_CHECKOUT_LOCATIONS: + # for those, we assume the referenced commit is kept by + # some branch anyway + return + run_cmd( + ["git", "tag", "-f", "-m", "Keep referenced tree alive", + "keep-%s" % (commit,), commit], + cwd=git_root(upstream=upstream), + env = dict(os.environ, **GIT_NOBODY_ENV), + ) + +def git_init_options(*, upstream): + if upstream in GIT_CHECKOUT_LOCATIONS: + return [] + else: + return ["--bare"] + +def ensure_git(*, upstream): + root = git_root(upstream=upstream) + if os.path.exists(root): + return + os.makedirs(root) + run_cmd(["git", "init"] + git_init_options(upstream=upstream), + cwd=root) + +def git_commit_present(commit, *, upstream): + result = subprocess.run(["git", "show", "--oneline", commit], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + cwd=git_root(upstream=upstream)) + return result.returncode == 0 + +def git_url_is_path(url): + for prefix in ["ssh://", "http://", "https://"]: + if url.startswith(prefix): + return False + return True + +def git_fetch(*, repo, branch): + if git_url_is_path(repo): + repo = os.path.abspath(repo) + run_cmd(["git", "fetch", repo, branch], cwd=git_root(upstream=repo)) + +def subdir_path(checkout, desc): + return os.path.normpath(os.path.join(checkout, desc.get("subdir", "."))) + +def git_tree(*, commit, subdir, upstream): + tree = subprocess.run(["git", "log", "-n", "1", "--format=%T", commit], + stdout=subprocess.PIPE, + cwd=git_root(upstream=upstream)).stdout.decode('utf-8').strip() + return git_subtree(tree=tree, subdir=subdir, upstream=upstream) + +def git_subtree(*, tree, subdir, upstream): + if subdir == ".": + return tree + return subprocess.Popen( + ["git", "cat-file", "--batch-check=%(objectname)"], + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + cwd=git_root(upstream=upstream) + ).communicate(input=("%s:%s" % (tree, subdir)).encode())[0].decode('utf-8').strip() + +def git_checkout_dir(commit): + return os.path.join(ROOT, "workspaces", "git", commit) + +def git_checkout(desc): + commit = desc["commit"] + target = git_checkout_dir(commit) + if ALWAYS_FILE and os.path.exists(target): + return ["file", subdir_path(target, desc)] + repo=desc["repository"] + root = git_root(upstream=repo) + ensure_git(upstream=repo) + if not git_commit_present(commit, upstream=repo): + branch=desc["branch"] + log("Fetching %s from %s (in %s)" % (branch, repo, root)) + git_fetch(repo=repo, branch=branch) + if not git_commit_present(commit, upstream=repo): + fail("Fetching %s from %s failed to fetch %s" + % (branch, repo, commit)) + git_keep(commit, upstream=repo) + if ALWAYS_FILE: + os.makedirs(target) + with tempfile.TemporaryFile() as f: + run_cmd(["git", "archive", commit], + cwd=root, stdout=f) + f.seek(0) + run_cmd(["tar", "x"], cwd=target, stdin=f) + return ["file", subdir_path(target, desc)] + tree = git_tree(commit=commit, subdir=desc.get("subdir", "."), upstream=repo) + return ["git tree", tree, root] + +def update_git(desc): + repo=desc["repository"] + branch=desc["branch"] + lsremote = subprocess.run(["git", "ls-remote", repo, branch], + stdout=subprocess.PIPE).stdout + desc["commit"] = lsremote.decode('utf-8').split('\t')[0] + +def git_hash(content): + header = "blob {}\0".format(len(content)).encode('utf-8') + h = hashlib.sha1() + h.update(header) + h.update(content) + return h.hexdigest() + +def add_to_cas(data): + if isinstance(data, str): + data = data.encode('utf-8') + cas_root = os.path.join(ROOT,"casf") + basename = git_hash(data) + target = os.path.join(cas_root, basename) + tempname = os.path.join(cas_root, "%s.%d" % (basename, os.getpid())) + + if os.path.exists(target): + return target + + os.makedirs(cas_root, exist_ok=True) + with open(tempname, "wb") as f: + f.write(data) + f.flush() + os.fsync(f.fileno()) + os.rename(tempname, target) + return target + +def cas_path(h): + return os.path.join(ROOT, "casf", h) + +def is_in_cas(h): + return os.path.exists(cas_path(h)) + +def add_file_to_cas(filename): + # TODO: avoid going through memory + with open(filename, "rb") as f: + data = f.read() + add_to_cas(data) + +def add_distfile_to_cas(distfile): + for d in DISTDIR: + candidate = os.path.join(d, distfile) + if os.path.exists(candidate): + add_file_to_cas(candidate) + +def archive_checkout_dir(content, repo_type): + return os.path.join(ROOT, "workspaces", repo_type, content) + +def archive_tmp_checkout_dir(content, repo_type): + return os.path.join(ROOT, "tmp-workspaces", repo_type, content) + +def archive_tree_id_file(content, repo_type): + return os.path.join(ROOT, "tree-map", repo_type, content) + +def archive_checkout(desc, repo_type="archive", *, fetch_only=False): + content_id = desc["content"] + target = archive_checkout_dir(content_id, repo_type=repo_type) + if ALWAYS_FILE and os.path.exists(target): + return ["file", subdir_path(target, desc)] + tree_id_file = archive_tree_id_file(content_id, repo_type=repo_type) + if (not ALWAYS_FILE) and os.path.exists(tree_id_file): + with open(tree_id_file) as f: + archive_tree_id = f.read() + return [ + "git tree", + git_subtree(tree=archive_tree_id, subdir=desc.get("subdir", "."), + upstream=None), + git_root(upstream=None), + ] + if not is_in_cas(content_id): + distfile = desc.get("distfile") + if not distfile: + distfile = os.path.basename(desc.get("fetch")) + if distfile: + add_distfile_to_cas(distfile) + if not is_in_cas(content_id): + url = desc["fetch"] + data = subprocess.run(["wget", "-O", "-", url], stdout=subprocess.PIPE).stdout + add_to_cas(data) + if not is_in_cas(content_id): + fail("Failed to fetch a file with id %s from %s" % (content_id, url)) + if fetch_only: + return + if not ALWAYS_FILE: + target = archive_tmp_checkout_dir(content_id, repo_type=repo_type) + os.makedirs(target) + if repo_type == "zip": + run_cmd(["unzip", "-d", ".", cas_path(content_id)], cwd=target) + else: + run_cmd(["tar", "xf", cas_path(content_id)], cwd=target) + if ALWAYS_FILE: + return ["file", subdir_path(target, desc)] + run_cmd(["git", "init"], cwd=target) + run_cmd(["git", "add", "."], cwd=target) + run_cmd( + ["git", "commit", "-m", "Content of %s %r" % (repo_type, content_id)], + cwd=target, + env=dict(os.environ, **GIT_NOBODY_ENV), + ) + + ensure_git(upstream=None) + run_cmd(["git", "fetch", target], + cwd=git_root(upstream=None)) + commit = subprocess.run(["git", "log", "-n", "1", "--format=%H"], + stdout=subprocess.PIPE, + cwd=target).stdout.decode('utf-8').strip() + git_keep(commit, upstream=None) + tree = subprocess.run(["git", "log", "-n", "1", "--format=%T"], + stdout=subprocess.PIPE, + cwd=target).stdout.decode('utf-8').strip() + shutil.rmtree(target) + os.makedirs(os.path.dirname(tree_id_file), exist_ok=True) + with open(tree_id_file, "w") as f: + f.write(tree) + return ["git tree", + git_subtree(tree=tree, subdir=desc.get("subdir", "."), upstream=None), + git_root(upstream=None)] + +def describe_file(desc): + fpath = desc["path"] + return ["file", os.path.abspath(fpath)] + +def resolve_repo(desc, *, seen=None, repos): + seen = seen or [] + if not isinstance(desc, str): + return desc + if desc in seen: + fail("Cyclic reference in repository source definition: %r" % (seen,)) + return resolve_repo(repos[desc]["repository"], + seen = seen + [desc], repos=repos) + +def checkout(desc, *, name, repos): + repo_desc = resolve_repo(desc, repos=repos) + repo_type = repo_desc.get("type") + if repo_type == "git": + return git_checkout(repo_desc) + if repo_type in ["archive", "zip"]: + return archive_checkout(repo_desc, repo_type=repo_type) + if repo_type == "file": + return describe_file(repo_desc) + fail("Unknown repository type %s for %s" + % (repo_type, name)) + +def reachable_repositories(repo, *, repos): + # First compute the set of repositories transitively reachable via bindings + reachable = set() + + def traverse(x): + nonlocal reachable + if x in reachable: + return + reachable.add(x) + bindings = repos[x].get("bindings", {}) + for bound in bindings.values(): + traverse(bound) + + traverse(repo) + + # Now add the repositories that serve as overlay directories for + # targets, rules, etc. Those repositories have to be fetched as well, but + # we do not have to consider their bindings. + to_fetch = reachable.copy() + for x in reachable: + for layer in ALT_DIRS: + if layer in repos[x]: + to_fetch.add(repos[x][layer]) + + return reachable, to_fetch + +def setup(*, config, args, interactive=False): + repos = config.get("repositories", {}) + repos_to_setup = repos.keys() + repos_to_include = repos.keys() + mr_config = {} + main = None + + if args: + if len(args) > 1: + fail("Usage: %s setup [<main repo>]" + % (sys.argv[0], )) + main = args[0] + repos_to_include, repos_to_setup = reachable_repositories(main, + repos=repos) + mr_config["main"] = main + + mr_repos = {} + for repo in repos_to_setup: + desc = repos[repo] + if repo == main and interactive: + config = {} + else: + workspace = checkout(desc.get("repository", {}), name=repo, repos=repos) + config = { "workspace_root": workspace } + for key in TAKE_OVER: + val = desc.get(key, {}) + if val: + config[key] = val + mr_repos[repo] = config + # Alternate directories are specifies as the workspace of + # some other repository. So we have to iterate over all repositories again + # to add those directories. We do this only for the repositories we include + # in the final configuration. + for repo in repos_to_include: + desc = repos[repo] + if repo == main and interactive: + continue + for key in ALT_DIRS: + val = desc.get(key, {}) + if val: + if val == main and interactive: + continue + mr_repos[repo][key] = mr_repos[val]["workspace_root"] + mr_repos_actual = {} + for repo in repos_to_include: + mr_repos_actual[repo] = mr_repos[repo] + mr_config["repositories"] = mr_repos_actual + + return add_to_cas(json.dumps(mr_config, indent=2, sort_keys=True)) + +def build(*, config, args): + if len(args) != 3: + fail("Usage: %s build <repo> <moudle> <target>" % (sys.argv[0],)) + config = setup(config=config, args=[args[0]]) + cmd = [JUST, "build", "-C", config, "--local_build_root", ROOT, + args[1], args[2]] + log("Setup finished, exec %s" % (cmd,)) + os.execvp(JUST, cmd) + +def install(*, config, args): + if len(args) != 4: + fail("Usage: %s install <repo> <moudle> <target> <install-path>" % (sys.argv[0],)) + config = setup(config=config, args=[args[0]]) + cmd = [JUST, "install", "-C", config, "--local_build_root", ROOT, + "-o", args[3], args[1], args[2]] + log("Setup finished, exec %s" % (cmd,)) + os.execvp(JUST, cmd) + +def update(*, config, args): + for repo in args: + desc = config["repositories"][repo]["repository"] + desc = resolve_repo(desc, repos=config["repositories"]) + repo_type = desc.get("type") + if repo_type == "git": + update_git(desc) + else: + fail("Don't know how to update %s repositories" % (repo_type,)) + print(json.dumps(config, indent=2)) + sys.exit(0) + +def fetch(*, config, args): + if args: + print("Warning: ignoring arguments %r" % (args,)) + fetch_dir = None + for d in DISTDIR: + if os.path.isdir(d): + fetch_dir = os.path.abspath(d) + break + if not fetch_dir: + print("No directory found to fetch to, considered %r" % (DISTDIR,)) + sys.exit(1) + print("Fetching to %r" % (fetch_dir,)) + + repos = config["repositories"] + for repo, desc in repos.items(): + if ("repository" in desc and isinstance(desc["repository"], dict) + and desc["repository"]["type"] in ["zip", "archive"]): + repo_desc = desc["repository"] + distfile = repo_desc.get("distfile") or os.path.basename(repo_desc["fetch"]) + content = repo_desc["content"] + print("%r --> %r (content: %s)" % (repo, distfile, content)) + archive_checkout(repo_desc, repo_desc["type"], fetch_only=True) + shutil.copyfile(cas_path(content), os.path.join(fetch_dir, distfile)) + + sys.exit(0) + + +def main(): + parser = OptionParser() + parser.add_option("-C", dest="repository_config", + help="Repository-description file to use", + metavar="FILE") + parser.add_option("-L", dest="checkout_location", + help="Specification file for checkout locations") + parser.add_option("--local_build_root", dest="local_build_root", + help="Root for CAS, repository space, etc", + metavar="PATH") + parser.add_option("--distdir", dest="distdir", action="append", + help="Directory to look for distfiles before fetching", + metavar="PATH") + parser.add_option("--just", dest="just", + help="Path to the just binary", + metavar="PATH") + parser.add_option("--always_file", dest="always_file", action="store_true", + default=False, help="Always create file roots") + + (options, args) = parser.parse_args() + config = read_config(options.repository_config) + global ROOT + ROOT = options.local_build_root or os.path.join(Path.home(), ".cache/just") + global GIT_CHECKOUT_LOCATIONS + if options.checkout_location: + with open(options.checkout_location) as f: + GIT_CHECKOUT_LOCATIONS = json.load(f).get("checkouts",{}).get("git", {}) + elif os.path.isfile(os.path.join(Path().home(), ".just-local.json")): + with open(os.path.join(Path().home(), ".just-local.json")) as f: + GIT_CHECKOUT_LOCATIONS = json.load(f).get("checkouts",{}).get("git", {}) + global DISTDIR + if options.distdir: + DISTDIR = options.distdir + + DISTDIR.append(os.path.join(Path.home(), ".distfiles")) + + global JUST + if options.just: + JUST=os.path.abspath(options.just) + + global ALWAYS_FILE + ALWAYS_FILE=options.always_file + + if not args: + fail("Usage: %s <cmd> [<args>]" % (sys.argv[0],)) + if args[0] == "setup": + # Setup for interactive use, i.e., fetch the required repositories + # and generate an appropriate multi-repository configuration file. + # Store it in the CAS and print its path on stdout. + # + # For the main repository (if specified), leave out the workspace + # so that in the usage of just the workspace is determined from + # the working directory; in this way, working on a checkout of that + # repository is possible, while having all dependencies set up + # correctly. + print(setup(config=config, args=args[1:], interactive=True)) + return + if args[0] == "build": + build(config=config, args=args[1:]) + if args[0] == "install": + install(config=config, args=args[1:]) + if args[0] == "update": + update(config=config, args=args[1:]) + if args[0] == "fetch": + fetch(config=config, args=args[1:]) + fail("Unknown subcommand %s" % (args[0],)) + + +if __name__ == "__main__": + main() |