move dedup.debpkg.process_control back into importpkg
authorHelmut Grohne <helmut@subdivi.de>
Mon, 23 May 2016 19:49:43 +0000 (21:49 +0200)
committerHelmut Grohne <helmut@subdivi.de>
Mon, 23 May 2016 19:49:43 +0000 (21:49 +0200)
After all, it isn't that generic. It knows what information is necessary
for running dedup. Thus it really belongs to the extractor subclass.
By building on handle_control_info, not that much parsing logic is left
in the extractor subclass.

dedup/debpkg.py
importpkg.py

index c64f3c0..3a30b3e 100644 (file)
@@ -7,28 +7,6 @@ from dedup.arreader import ArReader
 from dedup.compression import decompress
 from dedup.hashing import hash_file
 
-def process_control(control_contents):
-    """Parses the contents of a control file from a control.tar of a Debian
-    package and returns a dictionary containing the fields relevant to dedup.
-    @type control_contents: bytes
-    @rtype: {str: object}
-    """
-    control = deb822.Packages(control_contents)
-    package = control["package"]
-    try:
-        source = control["source"].split()[0]
-    except KeyError:
-        source = package
-    version = control["version"]
-    architecture = control["architecture"]
-    # deb822 currently returns :any dependencies raw. see #670679
-    deprelations = control.relations.get("depends", []) + \
-                   control.relations.get("pre-depends", [])
-    depends = set(dep[0]["name"].split(u':', 1)[0]
-                  for dep in deprelations if len(dep) == 1)
-    return dict(package=package, source=source, version=version,
-                architecture=architecture, depends=depends)
-
 class MultiHash(object):
     def __init__(self, *hashes):
         self.hashes = hashes
index 92c474e..b01fad3 100755 (executable)
@@ -16,8 +16,7 @@ except ImportError:
 
 import yaml
 
-from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes, \
-        process_control
+from dedup.debpkg import DebExtractor, decodetarname, get_tar_hashes
 from dedup.hashing import DecompressedHash, SuppressingHash, HashedStream, \
         HashBlacklistContent
 from dedup.compression import GzipDecompressor
@@ -56,13 +55,19 @@ class ImportpkgExtractor(DebExtractor):
         DebExtractor.__init__(self)
         self.callback = callback
 
-    def handle_control_tar(self, tarfileobj):
-        for elem in tarfileobj:
-            if elem.name not in ("./control", "control"):
-                continue
-            self.callback(process_control(tarfileobj.extractfile(elem).read()))
-            return
-        raise ValueError("missing control file")
+    def handle_control_info(self, info):
+        try:
+            source = info["source"].split()[0]
+        except KeyError:
+            source = info["package"]
+        # deb822 currently returns :any dependencies raw. see #670679
+        deprelations = info.relations.get("depends", []) + \
+                       info.relations.get("pre-depends", [])
+        depends = set(dep[0]["name"].split(u':', 1)[0]
+                      for dep in deprelations if len(dep) == 1)
+        self.callback(dict(package=info["package"], source=source,
+                           version=info["version"],
+                           architecture=info["architecture"], depends=depends))
 
     def handle_data_tar(self, tarfileobj):
         for name, size, hashes in get_tar_hashes(tarfileobj,