importpkg: add support for control.tar and control.tar.xz
authorGuillem Jover <guillem@debian.org>
Wed, 7 May 2014 19:06:38 +0000 (21:06 +0200)
committerHelmut Grohne <helmut@subdivi.de>
Sun, 11 May 2014 13:29:11 +0000 (15:29 +0200)
dpkg supports those since 1.17.6.

Signed-off-by: Guillem Jover <guillem@debian.org>
dedup/debpkg.py
importpkg.py

index 2d67135..dbee849 100644 (file)
@@ -3,7 +3,7 @@ from debian import deb822
 from dedup.hashing import hash_file
 
 def process_control(control_contents):
-    """Parses the contents of a control file from a control.tar.gz of a Debian
+    """Parses the contents of a control file from a control.tar of a Debian
     package and returns a dictionary containing the fields relevant to dedup.
     @type control_contents: bytes
     @rtype: {str: object}
index aeccda5..7482c4f 100755 (executable)
@@ -54,41 +54,56 @@ def process_package(filelike, hash_functions):
         except EOFError:
             raise ValueError("data.tar not found")
         if name == "control.tar.gz":
-            if state != "start":
-                raise ValueError("unexpected control.tar.gz")
-            state = "control"
+            new_state = "control"
             tf = tarfile.open(fileobj=af, mode="r|gz")
-            for elem in tf:
-                if elem.name != "./control":
-                    continue
-                if state != "control":
-                    raise ValueError("duplicate control file")
-                state = "control_file"
-                yield process_control(tf.extractfile(elem).read())
-                break
-            continue
+        elif name == "control.tar.xz":
+            new_state = "control"
+            zf = DecompressedStream(af, lzma.LZMADecompressor())
+            tf = tarfile.open(fileobj=zf, mode="r|")
+        elif name == "control.tar":
+            new_state = "control"
+            tf = tarfile.open(fileobj=af, mode="r|")
         elif name == "data.tar.gz":
+            new_state = "data"
             tf = tarfile.open(fileobj=af, mode="r|gz")
         elif name == "data.tar.bz2":
+            new_state = "data"
             tf = tarfile.open(fileobj=af, mode="r|bz2")
         elif name == "data.tar.xz":
+            new_state = "data"
             zf = DecompressedStream(af, lzma.LZMADecompressor())
             tf = tarfile.open(fileobj=zf, mode="r|")
         elif name == "data.tar":
+            new_state = "data"
             tf = tarfile.open(fileobj=af, mode="r|")
         else:
             continue
-        if state != "control_file":
-            raise ValueError("missing control file")
-        for name, size, hashes in get_tar_hashes(tf, hash_functions):
-            try:
-                name = name.decode("utf8")
-            except UnicodeDecodeError:
-                print("warning: skipping filename with encoding error")
-                continue # skip files with non-utf8 encoding for now
-            yield dict(name=name, size=size, hashes=hashes)
-        yield "commit"
-        break
+        if new_state == "control":
+            if state != "start":
+                raise ValueError("unexpected control.tar")
+            state = new_state
+            for elem in tf:
+                if elem.name != "./control":
+                    continue
+                if state != "control":
+                    raise ValueError("duplicate control file")
+                state = "control_file"
+                yield process_control(tf.extractfile(elem).read())
+                break
+            continue
+        elif new_state == "data":
+            if state != "control_file":
+                raise ValueError("missing control file")
+            state = new_state
+            for name, size, hashes in get_tar_hashes(tf, hash_functions):
+                try:
+                    name = name.decode("utf8")
+                except UnicodeDecodeError:
+                    print("warning: skipping filename with encoding error")
+                    continue # skip files with non-utf8 encoding for now
+                yield dict(name=name, size=size, hashes=hashes)
+            yield "commit"
+            break
 
 def process_package_with_hash(filelike, hash_functions, sha256hash):
     hstream = HashedStream(filelike, hashlib.sha256())