summaryrefslogtreecommitdiff
path: root/bin/just-mr.py
blob: f71bbefe1cae1e91b6524c7a756455a784659343 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
#!/usr/bin/env python3

import hashlib
import json
import os
import shutil
import subprocess
import sys
import tempfile

from optparse import OptionParser
from pathlib import Path

JUST="just"
ROOT="/justroot"
DISTDIR=[]

ALWAYS_FILE=False

GIT_CHECKOUT_LOCATIONS={}

TAKE_OVER= [
    "bindings",
    "target_file_name",
    "index_file_name",
    "rule_file_name",
    "expression_file_name",
]
ALT_DIRS=[
    "target_root",
    "rule_root",
    "expression_root",
    "index_root",
]

GIT_NOBODY_ENV ={
    "GIT_AUTHOR_DATE": "1970-01-01T00:00Z",
    "GIT_AUTHOR_NAME": "Nobody",
    "GIT_AUTHOR_EMAIL": "nobody@example.org",
    "GIT_COMMITTER_DATE": "1970-01-01T00:00Z",
    "GIT_COMMITTER_NAME": "Nobody",
    "GIT_COMMITTER_EMAIL": "nobody@example.org",
}


def log(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)

def fail(s):
    log(s)
    sys.exit(1)

def run_cmd(cmd, *, env=None, cwd):
    result = subprocess.run(
        cmd, cwd=cwd, env=env,
        stdout=subprocess.DEVNULL)
    if result.returncode != 0:
        fail("Command %s in %s failed"
             % (cmd, cwd))

def read_config(configfile):
    if configfile:
        with open(configfile) as f:
            return json.load(f)
    default_config = os.path.join(Path.home(), ".just-repos.json")

    if os.path.isfile(default_config):
        with open(default_config) as f:
            return json.load(f)

    return {}

def git_root(*, upstream):
    if upstream in GIT_CHECKOUT_LOCATIONS:
        return GIT_CHECKOUT_LOCATIONS[upstream]
    else:
        return os.path.join(ROOT, "git")

def git_keep(commit, *, upstream):
    if upstream in GIT_CHECKOUT_LOCATIONS:
        # for those, we assume the referenced commit is kept by
        # some branch anyway
        return
    run_cmd(
        ["git", "tag", "-f", "-m", "Keep referenced tree alive",
         "keep-%s" % (commit,), commit],
        cwd=git_root(upstream=upstream),
        env = dict(os.environ, **GIT_NOBODY_ENV),
    )

def git_init_options(*, upstream):
    if upstream in GIT_CHECKOUT_LOCATIONS:
        return []
    else:
        return ["--bare"]

def ensure_git(*, upstream):
    root = git_root(upstream=upstream)
    if os.path.exists(root):
        return
    os.makedirs(root)
    run_cmd(["git", "init"] + git_init_options(upstream=upstream),
            cwd=root)

def git_commit_present(commit, *, upstream):
    result = subprocess.run(["git", "show", "--oneline", commit],
                            stdout=subprocess.DEVNULL,
                            stderr=subprocess.DEVNULL,
                            cwd=git_root(upstream=upstream))
    return result.returncode == 0

def git_url_is_path(url):
    for prefix in ["ssh://", "http://", "https://"]:
        if url.startswith(prefix):
            return False
    return True

def git_fetch(*, repo, branch):
    if git_url_is_path(repo):
        repo = os.path.abspath(repo)
    run_cmd(["git", "fetch", repo, branch], cwd=git_root(upstream=repo))

def subdir_path(checkout, desc):
    return os.path.normpath(os.path.join(checkout, desc.get("subdir", ".")))

def git_tree(*, commit, subdir, upstream):
    tree = subprocess.run(["git", "log", "-n", "1", "--format=%T", commit],
                          stdout=subprocess.PIPE,
                          cwd=git_root(upstream=upstream)).stdout.decode('utf-8').strip()
    return git_subtree(tree=tree, subdir=subdir, upstream=upstream)

def git_subtree(*, tree, subdir, upstream):
    if subdir == ".":
        return tree
    return subprocess.Popen(
        ["git", "cat-file", "--batch-check=%(objectname)"],
        stdout=subprocess.PIPE,
        stdin=subprocess.PIPE,
        cwd=git_root(upstream=upstream)
    ).communicate(input=("%s:%s" % (tree, subdir)).encode())[0].decode('utf-8').strip()

def git_checkout_dir(commit):
    return os.path.join(ROOT, "workspaces", "git", commit)

def git_checkout(desc):
    commit = desc["commit"]
    target = git_checkout_dir(commit)
    if ALWAYS_FILE and os.path.exists(target):
       return ["file", subdir_path(target, desc)]
    repo=desc["repository"]
    root = git_root(upstream=repo)
    ensure_git(upstream=repo)
    if not git_commit_present(commit, upstream=repo):
        branch=desc["branch"]
        log("Fetching %s from %s (in %s)" % (branch, repo, root))
        git_fetch(repo=repo, branch=branch)
        if not git_commit_present(commit, upstream=repo):
            fail("Fetching %s from %s failed to fetch %s"
                 % (branch, repo, commit))
        git_keep(commit, upstream=repo)
    if ALWAYS_FILE:
        os.makedirs(target)
        with tempfile.TemporaryFile() as f:
            run_cmd(["git", "archive", commit],
                    cwd=root, stdout=f)
            f.seek(0)
            run_cmd(["tar", "x"], cwd=target, stdin=f)
            return ["file", subdir_path(target, desc)]
    tree = git_tree(commit=commit, subdir=desc.get("subdir", "."), upstream=repo)
    return ["git tree", tree, root]

def update_git(desc):
    repo=desc["repository"]
    branch=desc["branch"]
    lsremote = subprocess.run(["git", "ls-remote", repo, branch],
                              stdout=subprocess.PIPE).stdout
    desc["commit"] = lsremote.decode('utf-8').split('\t')[0]

def git_hash(content):
  header = "blob {}\0".format(len(content)).encode('utf-8')
  h = hashlib.sha1()
  h.update(header)
  h.update(content)
  return h.hexdigest()

def add_to_cas(data):
    if isinstance(data, str):
        data = data.encode('utf-8')
    cas_root = os.path.join(ROOT,"casf")
    basename = git_hash(data)
    target = os.path.join(cas_root, basename)
    tempname = os.path.join(cas_root, "%s.%d" % (basename, os.getpid()))

    if os.path.exists(target):
        return target

    os.makedirs(cas_root, exist_ok=True)
    with open(tempname, "wb") as f:
        f.write(data)
        f.flush()
        os.fsync(f.fileno())
    os.rename(tempname, target)
    return target

def cas_path(h):
    return os.path.join(ROOT, "casf", h)

def is_in_cas(h):
    return os.path.exists(cas_path(h))

def add_file_to_cas(filename):
    # TODO: avoid going through memory
    with open(filename, "rb") as f:
        data = f.read()
    add_to_cas(data)

def add_distfile_to_cas(distfile):
    for d in DISTDIR:
        candidate = os.path.join(d, distfile)
        if os.path.exists(candidate):
            add_file_to_cas(candidate)

def archive_checkout_dir(content, repo_type):
    return os.path.join(ROOT, "workspaces", repo_type, content)

def archive_tmp_checkout_dir(content, repo_type):
    return os.path.join(ROOT, "tmp-workspaces", repo_type, content)

def archive_tree_id_file(content, repo_type):
    return os.path.join(ROOT, "tree-map", repo_type, content)

def archive_checkout(desc, repo_type="archive", *, fetch_only=False):
    content_id = desc["content"]
    target = archive_checkout_dir(content_id, repo_type=repo_type)
    if ALWAYS_FILE and os.path.exists(target):
        return ["file", subdir_path(target, desc)]
    tree_id_file = archive_tree_id_file(content_id, repo_type=repo_type)
    if (not ALWAYS_FILE) and os.path.exists(tree_id_file):
        with open(tree_id_file) as f:
           archive_tree_id = f.read()
        return [
            "git tree",
            git_subtree(tree=archive_tree_id, subdir=desc.get("subdir", "."),
                        upstream=None),
            git_root(upstream=None),
        ]
    if not is_in_cas(content_id):
        distfile = desc.get("distfile")
        if not distfile:
            distfile = os.path.basename(desc.get("fetch"))
        if distfile:
            add_distfile_to_cas(distfile)
    if not is_in_cas(content_id):
        url = desc["fetch"]
        data = subprocess.run(["wget", "-O", "-", url], stdout=subprocess.PIPE).stdout
        add_to_cas(data)
        if not is_in_cas(content_id):
            fail("Failed to fetch a file with id %s from %s" % (content_id, url))
    if fetch_only:
        return
    if not ALWAYS_FILE:
        target = archive_tmp_checkout_dir(content_id, repo_type=repo_type)
    os.makedirs(target)
    if repo_type == "zip":
        run_cmd(["unzip", "-d", ".", cas_path(content_id)], cwd=target)
    else:
        run_cmd(["tar", "xf", cas_path(content_id)], cwd=target)
    if ALWAYS_FILE:
        return ["file", subdir_path(target, desc)]
    run_cmd(["git", "init"], cwd=target)
    run_cmd(["git", "add", "."], cwd=target)
    run_cmd(
        ["git", "commit", "-m", "Content of %s %r" % (repo_type, content_id)],
        cwd=target,
        env=dict(os.environ, **GIT_NOBODY_ENV),
    )

    ensure_git(upstream=None)
    run_cmd(["git", "fetch", target],
            cwd=git_root(upstream=None))
    commit = subprocess.run(["git", "log", "-n", "1", "--format=%H"],
                            stdout=subprocess.PIPE,
                            cwd=target).stdout.decode('utf-8').strip()
    git_keep(commit, upstream=None)
    tree = subprocess.run(["git", "log", "-n", "1", "--format=%T"],
                          stdout=subprocess.PIPE,
                          cwd=target).stdout.decode('utf-8').strip()
    shutil.rmtree(target)
    os.makedirs(os.path.dirname(tree_id_file), exist_ok=True)
    with open(tree_id_file, "w") as f:
        f.write(tree)
    return ["git tree",
            git_subtree(tree=tree, subdir=desc.get("subdir", "."), upstream=None),
            git_root(upstream=None)]

def describe_file(desc):
    fpath = desc["path"]
    return ["file", os.path.abspath(fpath)]

def resolve_repo(desc, *, seen=None, repos):
    seen = seen or []
    if not isinstance(desc, str):
        return desc
    if desc in seen:
        fail("Cyclic reference in repository source definition: %r" % (seen,))
    return resolve_repo(repos[desc]["repository"],
                        seen = seen + [desc], repos=repos)

def checkout(desc, *, name, repos):
    repo_desc = resolve_repo(desc, repos=repos)
    repo_type = repo_desc.get("type")
    if repo_type == "git":
        return git_checkout(repo_desc)
    if repo_type in ["archive", "zip"]:
        return archive_checkout(repo_desc, repo_type=repo_type)
    if repo_type == "file":
        return describe_file(repo_desc)
    fail("Unknown repository type %s for %s"
         % (repo_type, name))

def reachable_repositories(repo, *, repos):
    # First compute the set of repositories transitively reachable via bindings
    reachable = set()

    def traverse(x):
        nonlocal reachable
        if x in reachable:
            return
        reachable.add(x)
        bindings = repos[x].get("bindings", {})
        for bound in bindings.values():
            traverse(bound)

    traverse(repo)

    # Now add the repositories that serve as overlay directories for
    # targets, rules, etc. Those repositories have to be fetched as well, but
    # we do not have to consider their bindings.
    to_fetch = reachable.copy()
    for x in reachable:
        for layer in ALT_DIRS:
            if layer in repos[x]:
                to_fetch.add(repos[x][layer])

    return reachable, to_fetch

def setup(*, config, args, interactive=False):
    repos = config.get("repositories", {})
    repos_to_setup = repos.keys()
    repos_to_include = repos.keys()
    mr_config = {}
    main = None

    if args:
        if len(args) > 1:
            fail("Usage: %s setup [<main repo>]"
                 % (sys.argv[0], ))
        main = args[0]
        repos_to_include, repos_to_setup = reachable_repositories(main,
                                                                  repos=repos)
        mr_config["main"] = main

    mr_repos = {}
    for repo in repos_to_setup:
        desc = repos[repo]
        if repo == main and interactive:
            config = {}
        else:
            workspace = checkout(desc.get("repository", {}), name=repo, repos=repos)
            config = { "workspace_root": workspace }
        for key in TAKE_OVER:
            val = desc.get(key, {})
            if val:
                config[key] = val
        mr_repos[repo] =  config
    # Alternate directories are specifies as the workspace of
    # some other repository. So we have to iterate over all repositories again
    # to add those directories. We do this only for the repositories we include
    # in the final configuration.
    for repo in repos_to_include:
        desc = repos[repo]
        if repo == main and interactive:
            continue
        for key in ALT_DIRS:
            val = desc.get(key, {})
            if val:
                if val == main and interactive:
                    continue
                mr_repos[repo][key] = mr_repos[val]["workspace_root"]
    mr_repos_actual = {}
    for repo in repos_to_include:
        mr_repos_actual[repo] = mr_repos[repo]
    mr_config["repositories"] = mr_repos_actual

    return add_to_cas(json.dumps(mr_config, indent=2, sort_keys=True))

def build(*, config, args):
    if len(args) != 3:
        fail("Usage: %s build <repo> <moudle> <target>" % (sys.argv[0],))
    config = setup(config=config, args=[args[0]])
    cmd = [JUST, "build", "-C", config, "--local_build_root", ROOT,
           args[1], args[2]]
    log("Setup finished, exec %s" % (cmd,))
    os.execvp(JUST, cmd)

def install(*, config, args):
    if len(args) != 4:
        fail("Usage: %s install <repo> <moudle> <target> <install-path>" % (sys.argv[0],))
    config = setup(config=config, args=[args[0]])
    cmd = [JUST, "install", "-C", config, "--local_build_root", ROOT,
           "-o", args[3], args[1], args[2]]
    log("Setup finished, exec %s" % (cmd,))
    os.execvp(JUST, cmd)

def update(*, config, args):
    for repo in args:
        desc = config["repositories"][repo]["repository"]
        desc = resolve_repo(desc, repos=config["repositories"])
        repo_type = desc.get("type")
        if repo_type == "git":
            update_git(desc)
        else:
            fail("Don't know how to update %s repositories" % (repo_type,))
    print(json.dumps(config, indent=2))
    sys.exit(0)

def fetch(*, config, args):
    if args:
        print("Warning: ignoring arguments %r" % (args,))
    fetch_dir = None
    for d in DISTDIR:
        if os.path.isdir(d):
            fetch_dir = os.path.abspath(d)
            break
    if not fetch_dir:
        print("No directory found to fetch to, considered %r" % (DISTDIR,))
        sys.exit(1)
    print("Fetching to %r" % (fetch_dir,))

    repos = config["repositories"]
    for repo, desc in repos.items():
        if ("repository" in desc and isinstance(desc["repository"], dict)
            and desc["repository"]["type"] in ["zip", "archive"]):
            repo_desc = desc["repository"]
            distfile = repo_desc.get("distfile") or os.path.basename(repo_desc["fetch"])
            content = repo_desc["content"]
            print("%r --> %r (content: %s)" % (repo, distfile, content))
            archive_checkout(repo_desc, repo_desc["type"], fetch_only=True)
            shutil.copyfile(cas_path(content), os.path.join(fetch_dir, distfile))

    sys.exit(0)


def main():
    parser = OptionParser()
    parser.add_option("-C", dest="repository_config",
                      help="Repository-description file to use",
                      metavar="FILE")
    parser.add_option("-L", dest="checkout_location",
                      help="Specification file for checkout locations")
    parser.add_option("--local_build_root", dest="local_build_root",
                      help="Root for CAS, repository space, etc",
                      metavar="PATH")
    parser.add_option("--distdir",  dest="distdir", action="append",
                      help="Directory to look for distfiles before fetching",
                      metavar="PATH")
    parser.add_option("--just", dest="just",
                      help="Path to the just binary",
                      metavar="PATH")
    parser.add_option("--always_file", dest="always_file", action="store_true",
                      default=False, help="Always create file roots")

    (options, args) = parser.parse_args()
    config = read_config(options.repository_config)
    global ROOT
    ROOT = options.local_build_root or os.path.join(Path.home(), ".cache/just")
    global GIT_CHECKOUT_LOCATIONS
    if options.checkout_location:
        with open(options.checkout_location) as f:
            GIT_CHECKOUT_LOCATIONS = json.load(f).get("checkouts",{}).get("git", {})
    elif os.path.isfile(os.path.join(Path().home(), ".just-local.json")):
        with open(os.path.join(Path().home(), ".just-local.json")) as f:
            GIT_CHECKOUT_LOCATIONS = json.load(f).get("checkouts",{}).get("git", {})
    global DISTDIR
    if options.distdir:
        DISTDIR = options.distdir

    DISTDIR.append(os.path.join(Path.home(), ".distfiles"))

    global JUST
    if options.just:
        JUST=os.path.abspath(options.just)

    global ALWAYS_FILE
    ALWAYS_FILE=options.always_file

    if not args:
        fail("Usage: %s <cmd> [<args>]" % (sys.argv[0],))
    if args[0] == "setup":
        # Setup for interactive use, i.e., fetch the required repositories
        # and generate an appropriate multi-repository configuration file.
        # Store it in the CAS and print its path on stdout.
        #
        # For the main repository (if specified), leave out the workspace
        # so that in the usage of just the workspace is determined from
        # the working directory; in this way, working on a checkout of that
        # repository is possible, while having all dependencies set up
        # correctly.
        print(setup(config=config, args=args[1:], interactive=True))
        return
    if args[0] == "build":
        build(config=config, args=args[1:])
    if args[0] == "install":
        install(config=config, args=args[1:])
    if args[0] == "update":
        update(config=config, args=args[1:])
    if args[0] == "fetch":
        fetch(config=config, args=args[1:])
    fail("Unknown subcommand %s" % (args[0],))


if __name__ == "__main__":
    main()