importpkg: add support for control.tar and control.tar.xz
[~helmut/debian-dedup.git] / importpkg.py
index aeccda5..7482c4f 100755 (executable)
@@ -54,41 +54,56 @@ def process_package(filelike, hash_functions):
         except EOFError:
             raise ValueError("data.tar not found")
         if name == "control.tar.gz":
-            if state != "start":
-                raise ValueError("unexpected control.tar.gz")
-            state = "control"
+            new_state = "control"
             tf = tarfile.open(fileobj=af, mode="r|gz")
-            for elem in tf:
-                if elem.name != "./control":
-                    continue
-                if state != "control":
-                    raise ValueError("duplicate control file")
-                state = "control_file"
-                yield process_control(tf.extractfile(elem).read())
-                break
-            continue
+        elif name == "control.tar.xz":
+            new_state = "control"
+            zf = DecompressedStream(af, lzma.LZMADecompressor())
+            tf = tarfile.open(fileobj=zf, mode="r|")
+        elif name == "control.tar":
+            new_state = "control"
+            tf = tarfile.open(fileobj=af, mode="r|")
         elif name == "data.tar.gz":
+            new_state = "data"
             tf = tarfile.open(fileobj=af, mode="r|gz")
         elif name == "data.tar.bz2":
+            new_state = "data"
             tf = tarfile.open(fileobj=af, mode="r|bz2")
         elif name == "data.tar.xz":
+            new_state = "data"
             zf = DecompressedStream(af, lzma.LZMADecompressor())
             tf = tarfile.open(fileobj=zf, mode="r|")
         elif name == "data.tar":
+            new_state = "data"
             tf = tarfile.open(fileobj=af, mode="r|")
         else:
             continue
-        if state != "control_file":
-            raise ValueError("missing control file")
-        for name, size, hashes in get_tar_hashes(tf, hash_functions):
-            try:
-                name = name.decode("utf8")
-            except UnicodeDecodeError:
-                print("warning: skipping filename with encoding error")
-                continue # skip files with non-utf8 encoding for now
-            yield dict(name=name, size=size, hashes=hashes)
-        yield "commit"
-        break
+        if new_state == "control":
+            if state != "start":
+                raise ValueError("unexpected control.tar")
+            state = new_state
+            for elem in tf:
+                if elem.name != "./control":
+                    continue
+                if state != "control":
+                    raise ValueError("duplicate control file")
+                state = "control_file"
+                yield process_control(tf.extractfile(elem).read())
+                break
+            continue
+        elif new_state == "data":
+            if state != "control_file":
+                raise ValueError("missing control file")
+            state = new_state
+            for name, size, hashes in get_tar_hashes(tf, hash_functions):
+                try:
+                    name = name.decode("utf8")
+                except UnicodeDecodeError:
+                    print("warning: skipping filename with encoding error")
+                    continue # skip files with non-utf8 encoding for now
+                yield dict(name=name, size=size, hashes=hashes)
+            yield "commit"
+            break
 
 def process_package_with_hash(filelike, hash_functions, sha256hash):
     hstream = HashedStream(filelike, hashlib.sha256())