autoimport: improve fetching package lists
authorHelmut Grohne <helmut@subdivi.de>
Wed, 25 May 2016 17:27:35 +0000 (19:27 +0200)
committerHelmut Grohne <helmut@subdivi.de>
Wed, 25 May 2016 17:27:35 +0000 (19:27 +0200)
Moving the fetching part into dedup.utils. Instead of hard coding the
gzip compressed copy, try xz, gz and plain in that order. Also take care
to actually close the connection.

autoimport.py
dedup/utils.py

index 9c416c5..5fb0659 100755 (executable)
@@ -4,6 +4,7 @@ packages contained. It has rather strong assumptions on the working directory.
 """
 
 import argparse
+import contextlib
 import errno
 import multiprocessing
 import os
@@ -15,33 +16,28 @@ try:
     from urllib.parse import unquote
 except ImportError:
     from urllib import unquote
-try:
-    from urllib.request import urlopen
-except ImportError:
-    from urllib2 import urlopen
 
 import concurrent.futures
 from debian import deb822
 from debian.debian_support import version_compare
 
-from dedup.compression import decompress
+from dedup.utils import open_compressed_mirror_url
 
 from readyaml import readyaml
 
 def process_http(pkgs, url, addhash=True):
-    pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz")
-    pkglist = decompress(pkglist, ".gz")
-    pkglist = deb822.Packages.iter_paragraphs(pkglist)
-    for pkg in pkglist:
-        name = pkg["Package"]
-        if name in pkgs and \
-                version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
-            continue
-        inst = dict(version=pkg["Version"],
-                    filename="%s/%s" % (url, pkg["Filename"]))
-        if addhash:
-            inst["sharing"] = pkg["SHA256"]
-        pkgs[name] = inst
+    url += "/dists/sid/main/binary-amd64/Packages"
+    with contextlib.closing(open_compressed_mirror_url(url)) as pkglist:
+        for pkg in deb822.Packages.iter_paragraphs(pkglist):
+            name = pkg["Package"]
+            if name in pkgs and \
+                    version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
+                continue
+            inst = dict(version=pkg["Version"],
+                        filename="%s/%s" % (url, pkg["Filename"]))
+            if addhash:
+                inst["sharing"] = pkg["SHA256"]
+            pkgs[name] = inst
 
 def process_file(pkgs, filename):
     base = os.path.basename(filename)
index 6fb233b..dab6653 100644 (file)
@@ -1,5 +1,17 @@
+import errno
+try:
+    from urllib.error import URLError, HTTPError
+except ImportError:
+    from urllib2 import URLError, HTTPError
+try:
+    from urllib.request import urlopen
+except ImportError:
+    from urllib2 import urlopen
+
 from debian.debian_support import version_compare
 
+from dedup.compression import decompress
+
 def fetchiter(cursor):
     rows = cursor.fetchmany()
     while rows:
@@ -10,3 +22,24 @@ def fetchiter(cursor):
 def sql_add_version_compare(db):
     db.create_collation("debian_version", version_compare)
     db.create_function("debian_version_compare", 2, version_compare)
+
+def open_compressed_mirror_url(url, extensions=(u".xz", u".gz", u"")):
+    """Fetch the given url. Try appending each of the given compression
+    schemes and move on in case it doesn't exist. Decompress the resulting
+    stream on the fly.
+    @returns: a file-like with the decompressed contents
+    """
+    for ext in extensions:
+        try:
+            handle = urlopen(url + ext)
+        except HTTPError as error:
+            if error.code != 404:
+                raise
+        except URLError as error:
+            if not hasattr(error.reason, "errno"):
+                raise
+            if error.reason.errno != errno.ENOENT:
+                raise
+        else:
+            return decompress(handle, ext)
+    raise OSError(errno.ENOENT, "No such file or directory")