2 """This scrip takes a directory or a http base url to a mirror and imports all
3 packages contained. It has rather strong assumptions on the working directory.
15 from urllib.parse import unquote
17 from urllib import unquote
19 from urllib.request import urlopen
21 from urllib import urlopen
23 import concurrent.futures
24 from debian import deb822
25 from debian.debian_support import version_compare
27 from dedup.compression import decompress
29 from readyaml import readyaml
31 def process_http(pkgs, url, addhash=True):
32 pkglist = urlopen(url + "/dists/sid/main/binary-amd64/Packages.gz")
33 pkglist = decompress(pkglist, ".gz")
34 pkglist = deb822.Packages.iter_paragraphs(pkglist)
38 version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
40 inst = dict(version=pkg["Version"],
41 filename="%s/%s" % (url, pkg["Filename"]))
43 inst["sharing"] = pkg["SHA256"]
46 def process_file(pkgs, filename):
47 base = os.path.basename(filename)
48 if not base.endswith(".deb"):
49 raise ValueError("filename does not end in .deb")
50 parts = base.split("_")
52 raise ValueError("filename not in form name_version_arch.deb")
53 name, version, _ = parts
54 version = unquote(version)
55 if name in pkgs and version_compare(pkgs[name]["version"], version) > 0:
57 pkgs[name] = dict(version=version, filename=filename)
59 def process_dir(pkgs, d):
60 for entry in os.listdir(d):
62 process_file(pkgs, os.path.join(d, entry))
66 def process_pkg(name, pkgdict, outpath):
67 filename = pkgdict["filename"]
68 print("importing %s" % filename)
69 importcmd = [sys.executable, "importpkg.py"]
70 if "sha256hash" in pkgdict:
71 importcmd.extend(["-H", pkgdict["sha256hash"]])
72 if filename.startswith(("http://", "https://", "ftp://", "file://")):
73 importcmd.append(filename)
74 with open(outpath, "w") as outp:
75 subprocess.check_call(importcmd, stdout=outp, close_fds=True)
77 with open(filename) as inp:
78 with open(outpath, "w") as outp:
79 subprocess.check_call(importcmd, stdin=inp, stdout=outp,
81 print("preprocessed %s" % name)
84 parser = argparse.ArgumentParser()
85 parser.add_argument("-n", "--new", action="store_true",
86 help="avoid reimporting same versions")
87 parser.add_argument("-p", "--prune", action="store_true",
88 help="prune packages old packages")
89 parser.add_argument("-d", "--database", action="store",
90 default="test.sqlite3",
91 help="path to the sqlite3 database file")
92 parser.add_argument("--noverify", action="store_true",
93 help="do not verify binary package hashes")
94 parser.add_argument("files", nargs='+',
95 help="files or directories or repository urls")
96 args = parser.parse_args()
97 tmpdir = tempfile.mkdtemp(prefix="debian-dedup")
98 db = sqlite3.connect(args.database)
100 cur.execute("PRAGMA foreign_keys = ON;")
101 e = concurrent.futures.ThreadPoolExecutor(multiprocessing.cpu_count())
104 print("processing %s" % d)
105 if d.startswith(("http://", "https://", "ftp://", "file://")):
106 process_http(pkgs, d, not args.noverify)
107 elif os.path.isdir(d):
110 process_file(pkgs, d)
112 print("reading database")
113 cur.execute("SELECT name, version FROM package;")
114 knownpkgs = dict((row[0], row[1]) for row in cur.fetchall())
115 distpkgs = set(pkgs.keys())
117 for name in distpkgs:
118 if name in knownpkgs and version_compare(pkgs[name]["version"],
119 knownpkgs[name]) <= 0:
121 knownpkgs = set(knownpkgs)
125 for name, pkg in pkgs.items():
126 outpath = os.path.join(tmpdir, name)
127 fs[e.submit(process_pkg, name, pkg, outpath)] = name
129 for f in concurrent.futures.as_completed(fs.keys()):
132 print("%s failed to import: %r" % (name, f.exception()))
134 inf = os.path.join(tmpdir, name)
135 print("sqlimporting %s" % name)
136 with open(inf) as inp:
139 except Exception as exc:
140 print("%s failed sql with exception %r" % (name, exc))
145 delpkgs = knownpkgs - distpkgs
146 print("clearing packages %s" % " ".join(delpkgs))
147 cur.executemany("DELETE FROM package WHERE name = ?;",
148 ((pkg,) for pkg in delpkgs))
149 # Tables content, dependency and sharing will also be pruned
150 # due to ON DELETE CASCADE clauses.
154 except OSError as err:
155 if err.errno != errno.ENOTEMPTY:
157 print("keeping temporary directory %s due to failed packages %s" %
158 (tmpdir, " ".join(os.listdir(tmpdir))))
160 if __name__ == "__main__":