diff options
author | Alberto Sartori <alberto.sartori@huawei.com> | 2024-06-24 15:49:36 +0200 |
---|---|---|
committer | Klaus Aehlig <klaus.aehlig@huawei.com> | 2024-06-24 15:49:36 +0200 |
commit | f87ad41f72ca4465a0c5b4ba9fd36a7b09e4d4f4 (patch) | |
tree | 24d4a3a5975df2046714c6bc114d5d050bdc61fc /bin/just-import-cargo.py | |
download | rules-rust-f87ad41f72ca4465a0c5b4ba9fd36a7b09e4d4f4.tar.gz |
Initial commit
Co-authored-by: Klaus Aehlig <klaus.aehlig@huawei.com>
Diffstat (limited to 'bin/just-import-cargo.py')
-rwxr-xr-x | bin/just-import-cargo.py | 746 |
1 files changed, 746 insertions, 0 deletions
diff --git a/bin/just-import-cargo.py b/bin/just-import-cargo.py new file mode 100755 index 0000000..af59e7d --- /dev/null +++ b/bin/just-import-cargo.py @@ -0,0 +1,746 @@ +#!/usr/bin/env python3 +# Copyright 2024 Huawei Cloud Computing Technology Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import ArgumentParser +from collections import defaultdict +import glob +from pprint import pprint +import re +import shutil +import sys +import subprocess +import json +import os +from hashlib import sha256 +from tempfile import TemporaryDirectory +from typing import Any, Dict, List, Optional, Tuple, Union, cast + +JSON = Union[str, int, float, bool, None, Dict[str, 'JSON'], List['JSON']] + +# ------- JSON formating ------------ +# inlined to have a self-contained binary +def is_simple(entry: JSON) -> bool: + if isinstance(entry, list): + return len(entry) == 0 + if isinstance(entry, dict): + return len(entry) == 0 + return True + + +def is_short(entry: Any, indent: int) -> bool: + return (len(json.dumps(entry)) + indent) < 80 + + +def hdumps(entry: JSON, *, _current_indent: int = 0) -> str: + if is_short(entry, _current_indent): + return json.dumps(entry) + if isinstance(entry, list) and entry: + result: str = "[ " + hdumps(entry[0], + _current_indent=_current_indent + 2) + for x in entry[1:]: + result += "\n" + " " * _current_indent + ", " + result += hdumps(x, _current_indent=_current_indent + 2) + result += "\n" + " " * _current_indent + "]" + return result + if isinstance(entry, dict) and entry: + result: str = "{ " + is_first: bool = True + for k in entry.keys(): + if not is_first: + result += "\n" + " " * _current_indent + ", " + result += json.dumps(k) + ":" + if is_simple(entry[k]): + result += " " + json.dumps(entry[k]) + elif is_short(entry[k], _current_indent + len(json.dumps(k)) + 4): + result += " " + json.dumps(entry[k]) + else: + result += "\n" + " " * _current_indent + " " + result += hdumps(entry[k], _current_indent=_current_indent + 2) + is_first = False + result += "\n" + " " * _current_indent + "}" + return result + return json.dumps(entry) + +# ------- end JSON formating ------------ + +my_directory = os.path.dirname(os.path.abspath(sys.argv[0])) + +HOME = os.environ.get("HOME", "/") +CACHE_ROOT = os.path.join(HOME, ".cache", "crate-index-just") + +# used to skip from circular dependencies +visited: Dict[str, bool] = {} + +# to generate the repos.json file +repos_json: JSON = dict() + +recompute_targets = False +recompute_repos = False +recompute_sources = False +compute_index = False +root_id = None +root_name = None +root_version = None +root_rep_name = None +root_dir = None +relative_path = "." +repo_root = "/" +IMPORTED_REPOS = set() + + +def split_id(id: str): + try: + # metadata provided by stable cargo, as of 2024-03-18 + name, version, source = id.split() + source = source[1:-1] + except Exception as e: + if id.startswith("registry"): + # "registry+https://github.com/rust-lang/crates.io-index#libc@0.2.153" + source, name_version = id.split("#") + name, version = name_version.split("@") + + elif id.startswith("path"): + # path+file:///home/username/opt/src/commonlibrary_rust_ylong_runtime#ylong_io@1.0.0 + source, name_version = id.split("#") + name, version = name_version.split("@") + else: + print(f"while processing {id=}: {e}", file=sys.stderr) + exit(1) + + return name, version, source + + +def abspath(x: str) -> str: + return os.path.abspath(x) + + +def cargo_metadata( + pkg_name: str, + root_dir: str, + cache_dir: Optional[str] = None, + traverse_dev: bool = False, +) -> JSON: + metadata_path: Optional[str] = None + + x = subprocess.run( + "cargo metadata --format-version 1".split(), + cwd=root_dir, + capture_output=True, + ) + + if x.returncode != 0: + print(f"Failed to run cargo: {x.returncode}: {x.stderr}", + file=sys.stderr) + exit(x.returncode) + d: JSON = json.loads(x.stdout) + if metadata_path: + with open(metadata_path, "w") as f: + print(hdumps(d), file=f) + print(f"stored metadata of {pkg_name} to {metadata_path}", + file=sys.stderr) + return d + + +def cache_entry_from_hash(hash: str) -> Tuple[str, bool]: + os.makedirs(CACHE_ROOT, exist_ok=True) + path = os.path.join(CACHE_ROOT, hash[:2], hash[2:]) + return path, os.path.exists(path) + + +def repo_name(name: str, version: str = "") -> str: + return "-".join((name, version)) + + +def bindings(nodes: Any, id: str, keep_dev: bool = True) -> JSON: + pkg: JSON = {} + for pkg in nodes: + if pkg["id"] == id: + break + b: JSON = {"rules": "rust-rules"} + global compute_index + if compute_index: + b["index"] = "index" + for d in pkg["deps"]: + d_name, d_version, _ = split_id(d["pkg"]) + is_dev = d["dep_kinds"][0]["kind"] == "dev" + if not is_dev or keep_dev: + b[d_name] = repo_name(d_name, d_version) + return b + + +def bindings_from_tree(key: str, tree_out: JSON) -> JSON: + b: JSON = {"rules": "rust-rules"} + global compute_index + if compute_index: + b["index"] = "index" + b.update(tree_out[key]["bindings"]) + return b + + +def cache_entry(name: str, version: str) -> Tuple[str, bool]: + key: str = repo_name(name, version) + hash = sha256(key.encode()).hexdigest() + return cache_entry_from_hash(hash) # returns (entry, present) + + +def fetch_crate_from_crates_io( + name: str, version: str, rep_name: str, tree_out: JSON +) -> str: + entry, present = cache_entry(name, version) + archive = f"{name}.tar.gz" + archive_path = os.path.join(entry, archive) + global recompute_repos, repos_json + if recompute_repos or not present: + fetch_url = f"https://crates.io/api/v1/crates/{name}/{version}/download" + tmp_dir = TemporaryDirectory(prefix="cargo-parser-") + print(f"retrieving: {repo_name(name,version)} from {fetch_url}", end="...", + file=sys.stderr) + curl = subprocess.run( + f"wget {fetch_url} --tries=10 -O {name}.tar.gz".split(), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + cwd=tmp_dir.name, + ) + curl.check_returncode() + os.makedirs(entry, exist_ok=True) + shutil.copyfile(os.path.join(tmp_dir.name, archive), archive_path) + x = subprocess.check_output( + f"git hash-object {name}.tar.gz".split(), cwd=tmp_dir.name + ) + repo: JSON = { + "repository": dict( + type="archive", + fetch=fetch_url, + content=x.decode().split()[0], + subdir=rep_name, + distfile=f"{rep_name}.tar.gz", + ) + } + repos: JSON = dict() + repos[rep_name] = repo + repos[rep_name]["bindings"] = bindings_from_tree(rep_name, tree_out) + repos[rep_name]["target_root"] = "external-deps" + repos[rep_name]["target_file_name"] = f"TARGETS.{rep_name}" + with open(file=os.path.join(entry, "repos.json"), mode="w") as f: + print(hdumps(repos), file=f) + repos_json.update(repos) + print("done", file=sys.stderr) + else: + with open(file=os.path.join(entry, "repos.json")) as f: + d = json.load(f) + repos_json.update(d) + return archive_path + + +def local_repo( + name: str, + version: str, + rep_name: str, + tree_out: JSON, + is_main=False, + path: str = ".", +) -> str: + global repos_json + print(f"local: {repo_name(name,version)}", end="...", file=sys.stderr) + + repo: JSON = { + "repository": dict( + type="file", + path=path, + ) + } + repos: JSON = dict() + key = name if is_main else rep_name + repos[key] = repo + repos[key]["bindings"] = bindings_from_tree(rep_name, tree_out) + + repos_json.update(repos) + print("done", file=sys.stderr) + +def compute_srcs(root_dir: str, name: str, version: str) -> List[Any]: + entry, exists = cache_entry(name, version) + if not exists: + os.makedirs(entry) + srcs_file = os.path.join(entry, "srcs.json") + global recompute_sources + if not recompute_sources and os.path.exists(srcs_file): + with open(srcs_file) as f: + return json.load(f) + + srcs: List[Any] = list() + + for f in glob.glob(f"{root_dir}/**/*.rs", recursive=True): + f = os.path.relpath(f, root_dir) + if ( + not f.startswith("example") + and not f.startswith("test") + and not f.startswith("bench") + ): + srcs.append(f) + + with open(srcs_file, "w") as f: + print(json.dumps(srcs), file=f) + return srcs + + +def to_underscore(x: str): + return x.replace("-", "_") + + +def active_deps( + nodes: Any, pkg: JSON, tree_out: JSON, keep_dev: bool +) -> Tuple[List[Any], JSON]: + p: JSON = {} + for p in nodes: + if p["id"] == pkg["id"]: + break + + b: List[Any] = list() + for d in p["deps"]: + d_name, _, _ = split_id(d["pkg"]) + + is_dev: bool = d["dep_kinds"][0]["kind"] == "dev" + if not is_dev or keep_dev: + b.append(["@", d_name, "", d_name]) + return b, p["features"] + + +def active_deps_from_tree( + key: str, tree_out: JSON, root_name: str = None, is_dev: bool = False +) -> Tuple[List[Any], JSON]: + dep_names = tree_out[key]["bindings"].keys() + deps = [["@", x, "", to_underscore(x)] for x in dep_names] + if is_dev: + deps.append(root_name) + return deps, tree_out[key]["features"] + + +def compute_targets( + metadata: JSON, pkg: JSON, tree_out: JSON, dev: bool = False, is_main: bool = False +) -> JSON: + name, version, _ = split_id(pkg["id"]) + entry, _ = cache_entry(name, version) + targets_file = os.path.join(entry, f"TARGETS.{repo_name(name,version)}") + if os.path.exists(targets_file) and not recompute_targets: + with open(targets_file) as f: + return json.load(f) + root_dir: str = os.path.dirname(pkg["manifest_path"]) + targets: JSON = dict() + has_build_script = False + for t in pkg["targets"]: + t_kind = t["kind"][0] + if t_kind == "custom-build": + has_build_script = True + break + + config = ["ARCH", "HOST_ARCH", "TARGET_ARCH", "ENV", "TOOLCHAIN_CONFIG"] + t: JSON = {} + for t in pkg["targets"]: + t_kind = t["kind"][0] + is_dev_target = t_kind in ["test", "bench", "example"] + unsupported_target = t_kind in ["bench"] + d_build_script: Optional[JSON] = None + rep_name = repo_name_from_id(pkg["id"]) + version_lst = version.split(".") + if not dev and is_dev_target: + continue + if unsupported_target: + print(f"{t['name']}: rust {t_kind} target type not yet supported", + file=sys.stderr) + continue + + if t_kind == "custom-build": + # we will use foreign cargo rules to build + d_build_script = dict() + d_build_script["type"] = "@ rules cargo build_script".split() + d_build_script["name"] = ["build_script"] + crate_root = os.path.relpath(t["src_path"], root_dir) + d_build_script["crate_root"] = [crate_root] + d_build_script["arguments_config"] = config + d_build_script["edition"] = [t["edition"]] + d_build_script["stage"] = [rep_name] + d_build_script["deps"], d_build_script["cargo_features"] = ( + active_deps_from_tree(rep_name, tree_out) + ) + d_build_script["version"] = version_lst + d_build_script["pkg_name"] = [name] + targets["build_script"] = d_build_script + has_build_script = True + continue + + crate_root = os.path.relpath(t["src_path"], root_dir) + d: JSON = dict() + d["type"] = just_type(t, False) + d["name"] = [to_underscore(t["name"])] + d["crate_root"] = [crate_root] + if not is_dev_target: + d["srcs"] = compute_srcs(root_dir, name, version) + d["edition"] = [t["edition"]] + d["arguments_config"] = config + d["deps"], d["cargo_features"] = active_deps_from_tree( + rep_name, tree_out, to_underscore(name), is_dev_target + ) + d["stage"] = [rep_name] + d["version"] = version_lst + d["pkg_name"] = [name] + if not is_main and t["name"] == pkg["name"]: + internal_name = f"{to_underscore(t['name'])}-internal" + d["arguments_config"] = config + targets[to_underscore(t["name"])] = { + "type": "export", + "target": internal_name, + "flexible_config": config, + } + targets[internal_name] = d + if has_build_script: + targets[internal_name]["build_script"] = ["build_script"] + global compute_index + if compute_index: + targets["tree"] = { + "type": "install", + "dirs": [[["TREE", None, "."], rep_name]], + } + targets["cargo-srcs"] = { + "type": "generic", + "cmds": [f"mv index {rep_name}"], + "deps": ["tree", ["@", "index", "", "index"]], + "out_dirs": [rep_name], + } + targets["config.toml"] = { + "type": "generic", + "cmds": [ + f'echo "[source.crates-io]\\nreplace-with = \\"vendored-sources\\"\\n\\n[source.vendored-sources]\\ndirectory = \\"index\\"\\n" > config.toml' + ], + "outs": ["config.toml"], + } + targets["cargo-srcs"]["cmds"].append( + f"mkdir -p {rep_name}/.cargo && mv config.toml {rep_name}/.cargo" + ) + targets["cargo-srcs"]["deps"].append("config.toml") + continue + targets[t["name"]] = d + + for f, _ in pkg["features"].items(): + d = defaultdict(list) + d["type"] = ["@", "rules", "cargo", "feature"] + d["name"] = [f] + targets[f] = d + with open(targets_file, "w") as f: + print(hdumps(targets), file=f) + return targets + + +kinds: Dict[str, str] = dict(bin="binary", lib="library", example="binary") + + +def just_type(target: JSON, use_foreign_cargo: bool) -> List[str]: + if use_foreign_cargo: + return "@ rules cargo library".split() + t: List[str] = ["@", "rules", "rust"] + k: str = target["kind"][0] + return t + [kinds.get(k, k)] + + +def parse_metadata(metadata: JSON, tree_out: JSON) -> None: + nodes = metadata["resolve"]["nodes"] + global root_id, root_name, root_version, root_rep_name + root_id = metadata["resolve"]["root"] + if root_id: + root_name, root_version, _ = split_id(root_id) + root_rep_name = repo_name(root_name, root_version) + + # generate repos.json + for p in nodes: + name, version, source = split_id(p["id"]) + rep_name = repo_name(name, version) + if rep_name in tree_out: + if rep_name == root_rep_name: + local_repo(name, version, rep_name, tree_out, is_main=True, path=relative_path) + continue + + if source == "registry+https://github.com/rust-lang/crates.io-index": + fetch_crate_from_crates_io(name, version, rep_name, tree_out) + elif source.startswith("path+file"): + path = source.split("#")[0][12:] + local_repo( + name, + version, + rep_name, + tree_out, + path=os.path.relpath(path, repo_root), + is_main=True, + ) + + +def repo_name_from_id(id: str): + name, version, _ = split_id(id) + return repo_name(name, version) + + +def parse_tree_entry(x: str) -> Tuple[int, str, str, List[Any]]: + match = re.match(r"^\d+", x) + if not match: + print(f"failed to parse tree entry: {x}", file=sys.stderr) + exit(1) + depth = int(match.group()) + rest = x[match.span()[-1] :].replace('"', "").split() + rest = [x for x in rest if not x.startswith("(")] + name, version, features = split_tree_entry(rest) + return depth, name, version, features + + +def split_tree_entry(x: List[str]) -> Tuple[str, str, List[Any]]: + if len(x) < 2: + print(f"malformed tree entry: {x}", file=sys.stderr) + exit(1) + name: str = x[0] + version: str = x[1][1:] # drop the leading "v" + features: List[Any] = [] + if len(x) == 3: + features = x[2].split(",") + return name, version, features + + +def parse_cargo_tree(root_dir: str) -> JSON: + tree = subprocess.run( + ["cargo", "tree", "--prefix", "depth", "-f", '"{p} {f}"'], + cwd=root_dir, + capture_output=True, + ) + + if tree.returncode != 0: + print(f"Failed to run cargo: {tree.returncode}: {tree.stderr}", + file=sys.stderr) + exit(tree.returncode) + tree = tree.stdout.decode().split("\n") + deps: JSON = defaultdict(lambda: cast(JSON, dict(bindings={}))) + depth_package = {} + for l in tree: + # skip empty lines + if not l: + continue + depth, name, version, features = parse_tree_entry(l) + pkg = repo_name(name, version) + deps[pkg]["features"] = features + deps[pkg]["name"] = name + depth_package[depth] = pkg + if depth > 0: + parent = depth_package[depth - 1] + deps[parent]["bindings"].update({name: pkg}) + + return dict(deps) + +def merge_repos(base_config, new_repos, entry_point_name): + old_names = set(base_config["repositories"].keys()) + entry_name = entry_point_name + if entry_name in old_names: + count = 0 + entry_name = "%s (%d)" % (entry_point_name, count) + while entry_name in old_names: + count += 1 + entry_name = "%s (%d)" % (entry_point_name, count) + print("Adding to repository config the new rust repository %r with dependencies" + % (entry_name,), file=sys.stderr) + + name_mapping = {entry_point_name: entry_name} + for k in new_repos.keys(): + if k == entry_point_name: + continue + preferred_name = "%s/%s" % (entry_name, k) + if not preferred_name in old_names: + name_mapping[k] = preferred_name + old_names.add(preferred_name) + continue + count = 0 + new_name = "%s (%d)" % (preferred_name, count) + while new_name in old_names: + count += 1 + new_name = "%s (%d)" % (preferred_name, count) + name_mapping[k] = new_name + old_names.add(new_name) + renamed_repos = {} + for k, v in new_repos.items(): + if k in IMPORTED_REPOS: + # no renaming done within the definition this repository definition + renamed_repos[name_mapping[k]] = v + else: + if isinstance(v, str): + if v in name_mapping: + renamed_repos[name_mapping[k]] = name_mapping[v] + else: + renamed_repos[name_mapping[k]] = v + else: + # has to be a dict; rename bindings, and roots, if given + new_v = v + ws_root = new_v["repository"] + if isinstance(ws_root, str): + if ws_root in name_mapping: + new_v = dict(new_v, **{"repository": name_mapping[ws_root]}) + bindings = v.get("bindings", {}) + new_bindings = {} + for n, m in bindings.items(): + if m in name_mapping: + new_bindings[n] = name_mapping[m] + else: + new_bindings[n] = m + if new_bindings: + new_v = dict(new_v, **{"bindings": new_bindings}) + for root in ["target_root", "rule_root", "expression_root"]: + if root in new_v: + r_name = new_v[root] + if r_name in name_mapping: + new_v = dict(new_v, **{root: name_mapping[r_name]}) + renamed_repos[name_mapping[k]] = new_v + all_repos = dict(base_config["repositories"], **renamed_repos) + return dict(base_config, **{"repositories": all_repos}) + +def main(): + parser = ArgumentParser() + parser.add_argument( + "root_dir", help="directory where the Cargo.toml file is located; relative to repo_root" + ) + parser.add_argument("-t", "--recompute-targets", action="store_true") + parser.add_argument("-r", "--recompute-repos", action="store_true") + parser.add_argument("-s", "--recompute-sources", action="store_true") + parser.add_argument("-I", "--compute-index", action="store_true") + parser.add_argument( + "-g", + "--to-git", + action="store_true", + ) + parser.add_argument( + "--rules", dest="rules", + help="Rust-rules repository name in the base configuration", + metavar="name") + parser.add_argument("--local-crate-cache", dest="cache_root") + parser.add_argument("--repo-root", dest="repo_root") + args = parser.parse_args() + global repo_root, root_dir, relative_path + repo_root = args.repo_root or "." + repo_root = os.path.abspath(repo_root) + root_dir = os.path.join(repo_root, args.root_dir) + relative_path = os.path.relpath(root_dir, repo_root) + global CACHE_ROOT + if args.cache_root: + CACHE_ROOT = args.cache_root + + tree_out: JSON = parse_cargo_tree(root_dir) + global recompute_targets, recompute_repos, recompute_sources, compute_index, root_name, root_version + recompute_repos = args.recompute_repos + recompute_targets = args.recompute_targets + recompute_sources = args.recompute_sources + compute_index = args.compute_index + pprint(args, stream=sys.stderr) + + base_config = json.load(sys.stdin) + metadata = cargo_metadata( + os.path.basename(os.path.abspath(root_dir)), root_dir, cache_dir=root_dir + ) + parse_metadata(metadata, tree_out) + + if compute_index: + index = sorted(repos_json.keys()) + repos_json["index"] = { + "repository": {"type": "distdir", "repositories": index}, + "target_file_name": "TARGETS.index", + "target_root": "external-deps", + } + + repos_json["external-deps"] = { + "repository": {"type": "file", + "path": "etc/deps-rust"} + } + + if args.rules: + repos_json["rust-rules-root"] = base_config["repositories"][args.rules] + global IMPORTED_REPOS + IMPORTED_REPOS.add("rust-rules-root") + else: + print( + "Warning: adding local path to this checkout to the repository configuration", + file=sys.stderr) + repos_json["rust-rules-root"] = { + "repository": + {"type": "file", + "path": os.path.normpath(os.path.join(my_directory, "../rules")), + } + } + + repos_json["rust-rules-defaults"] = { + "repository": {"type": "file", + "path": "etc/defaults"} + } + + if args.to_git: + for k in ["external-deps", "rust-rules-root", "rust-rules-defaults"]: + repos_json[k]["repository"]["pragma"] = {"to_git": True} + + repos_json["rust-rules"] = { + "repository": "rust-rules-root", + "target_root": "rust-rules-defaults", + "rule_root": "rust-rules-root", + "bindings": {"orig-rules": "rust-rules-root"}, + } + + main_rep = repo_name(root_name, root_version) + print(hdumps(merge_repos(base_config, repos_json, root_name))) + + ext = os.path.join(repo_root, "etc", "deps-rust") + os.makedirs(ext, exist_ok=True) + + defaults_dict = { + "defaults": { + "type": "defaults", + "base": [["@", "orig-rules", "rust", "defaults"]], + } + } + + defaults_dir = os.path.join(repo_root, "etc", "defaults", "rust") + os.makedirs(defaults_dir, exist_ok=True) + with open(os.path.join(defaults_dir, "TARGETS"), "w") as f: + print(hdumps(defaults_dict), file=f) + + for pkg in metadata["packages"]: + rep_name = repo_name_from_id(pkg["id"]) + if rep_name in tree_out: + if rep_name == main_rep: + with open(os.path.join(root_dir, "TARGETS"), "w") as f: + print( + hdumps( + compute_targets( + metadata, pkg, tree_out, is_main=True, dev=True + ) + ), + file=f, + ) + continue + + if pkg["id"].startswith("path+file"): + path = pkg["id"].split("#")[0][12:] + with open(os.path.join(path, "TARGETS"), "w") as f: + print( + hdumps( + compute_targets( + metadata, pkg, tree_out, is_main=True, dev=True + ) + ), + file=f, + ) + + with open(os.path.join(ext, f"TARGETS.{rep_name}"), "w") as f: + print(hdumps(compute_targets(metadata, pkg, tree_out)), file=f) + + +main() |