verify package hashes when importing via http
authorHelmut Grohne <helmut@subdivi.de>
Fri, 26 Jul 2013 19:53:11 +0000 (21:53 +0200)
committerHelmut Grohne <helmut@subdivi.de>
Fri, 26 Jul 2013 19:53:11 +0000 (21:53 +0200)
autoimport.py
dedup/hashing.py
importpkg.py

index 694ffeb..481a3f8 100755 (executable)
@@ -29,7 +29,8 @@ def process_http(pkgs, url):
                 version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
             continue
         pkgs[name] = dict(version=pkg["Version"],
-                          filename="%s/%s" % (url, pkg["Filename"]))
+                          filename="%s/%s" % (url, pkg["Filename"]),
+                          sha256hash=pkg["SHA256"])
 
 def process_file(pkgs, filename):
     base = os.path.basename(filename)
@@ -51,14 +52,18 @@ def process_dir(pkgs, d):
         except ValueError:
             pass
 
-def process_pkg(name, filename):
+def process_pkg(name, pkgdict):
+    filename = pkgdict["filename"]
     print("importing %s" % filename)
+    importcmd = ["python", "importpkg.py"]
+    if "sha256hash" in pkgdict:
+        importcmd.extend(["-H", pkgdict["sha256hash"]])
     if filename.startswith("http://"):
         with open(os.path.join("tmp", name), "w") as outp:
             dl = subprocess.Popen(["curl", "-s", filename],
                                   stdout=subprocess.PIPE, close_fds=True)
-            imp = subprocess.Popen(["python", "importpkg.py"], stdin=dl.stdout,
-                                   stdout=outp, close_fds=True)
+            imp = subprocess.Popen(importcmd, stdin=dl.stdout, stdout=outp,
+                                   close_fds=True)
             if imp.wait():
                 raise ValueError("importpkg failed")
             if dl.wait():
@@ -66,8 +71,8 @@ def process_pkg(name, filename):
     else:
         with open(filename) as inp:
             with open(os.path.join("tmp", name), "w") as outp:
-                subprocess.check_call(["python", "importpkg.py"], stdin=inp,
-                                      stdout=outp, close_fds=True)
+                subprocess.check_call(importcmd, stdin=inp, stdout=outp,
+                                      close_fds=True)
     print("preprocessed %s" % name)
 
 def main():
@@ -106,7 +111,7 @@ def main():
     with e:
         fs = {}
         for name, pkg in pkgs.items():
-            fs[e.submit(process_pkg, name, pkg["filename"])] = name
+            fs[e.submit(process_pkg, name, pkg)] = name
 
         for f in concurrent.futures.as_completed(fs.keys()):
             name = fs[f]
index 1283c7e..002eda8 100644 (file)
@@ -106,3 +106,22 @@ def hash_file(hashobj, filelike, blocksize=65536):
         hashobj.update(data)
         data = filelike.read(blocksize)
     return hashobj
+
+class HashedStream(object):
+    """A file-like object, that supports sequential reading and hashes the
+    contents on the fly."""
+    def __init__(self, filelike, hashobj):
+        """
+        @param filelike: a file-like object, that must support the read method
+        @param hashobj: a hashlib-like object providing update and hexdigest
+        """
+        self.filelike = filelike
+        self.hashobj = hashobj
+
+    def read(self, length):
+        data = self.filelike.read(length)
+        self.hashobj.update(data)
+        return data
+
+    def hexdigest(self):
+        return self.hashobj.hexdigest()
index 56e03ae..2f38f5c 100755 (executable)
@@ -6,6 +6,7 @@ document contains package metadata. Then a document is emitted for each file.
 And finally a document consisting of the string "commit" is emitted."""
 
 import hashlib
+import optparse
 import sys
 import tarfile
 import zlib
@@ -15,7 +16,8 @@ import lzma
 import yaml
 
 from dedup.arreader import ArReader
-from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, hash_file
+from dedup.hashing import HashBlacklist, DecompressedHash, SuppressingHash, \
+    HashedStream, hash_file
 from dedup.compression import GzipDecompressor, DecompressedStream
 from dedup.image import ImageHash
 
@@ -121,8 +123,28 @@ def process_package(filelike):
         yield "commit"
         break
 
+def process_package_with_hash(filelike, sha256hash):
+    hstream = HashedStream(filelike, hashlib.sha256())
+    for elem in process_package(hstream):
+        if elem == "commit":
+            while hstream.read(4096):
+                pass
+            if hstream.hexdigest() != sha256hash:
+                raise ValueError("hash sum mismatch")
+            yield elem
+            break
+        yield elem
+
 def main():
-    yaml.safe_dump_all(process_package(sys.stdin), sys.stdout)
+    parser = optparse.OptionParser()
+    parser.add_option("-H", "--hash", action="store",
+                      help="verify that stdin hash given sha256 hash")
+    options, args = parser.parse_args()
+    if options.hash:
+        gen = process_package_with_hash(sys.stdin, options.hash)
+    else:
+        gen = process_package(sys.stdin)
+    yaml.safe_dump_all(gen, sys.stdout)
 
 if __name__ == "__main__":
     main()