From c5aecdb15685bcbae02ced7586dd927482bf7ce5 Mon Sep 17 00:00:00 2001 From: Paul Cristian Sarbu Date: Tue, 24 Jan 2023 12:01:44 +0100 Subject: just-mr.py: Make disdir content reproducible and consistent In Python the json of a dict does not guarantee an order, while in our internal just-mr implementation the json always has the keys sorted. Also, the JSON dump in Python does not by default use the most compact representation. This change fixes these issues and makes the two just-mr versions produce the same distdir content id. --- bin/just-mr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'bin/just-mr.py') diff --git a/bin/just-mr.py b/bin/just-mr.py index 7e424f70..2be38ab1 100755 --- a/bin/just-mr.py +++ b/bin/just-mr.py @@ -560,7 +560,7 @@ def distdir_tree_id_file(content): return os.path.join(ROOT, "distfiles-tree-map", content) -def distdir_checkout(desc, repos): +def distdir_checkout(name, desc, repos): # DEBUG (added name) """ Logic for processing the distdir repo type. """ content = {} @@ -582,7 +582,7 @@ def distdir_checkout(desc, repos): content[get_distfile(repo_desc)] = content_id # Hash the map as unique id for the distdir repo entry - distdir_content_id = git_hash(json.dumps(content).encode('utf-8')) + distdir_content_id = git_hash(json.dumps(content, sort_keys=True, separators=(',', ':')).encode('utf-8')) target_distdir_dir = distdir_repo_dir(distdir_content_id) # Check if content already exists @@ -650,7 +650,7 @@ def checkout(desc, *, name, repos): if repo_type == "file": return file_checkout(repo_desc) if repo_type == "distdir": - return distdir_checkout(repo_desc, repos=repos) + return distdir_checkout(name, repo_desc, repos=repos) # DEBUG (added name) fail("Unknown repository type %s for %s" % (repo_type, name)) -- cgit v1.2.3