2 """This scrip takes a directory or a http base url to a mirror and imports all
3 packages contained. It has rather strong assumptions on the working directory.
15 import concurrent.futures
16 from debian.debian_support import version_compare
18 from dedup.utils import iterate_packages
20 from readyaml import readyaml
22 def process_http(pkgs, url, addhash=True):
23 for pkg in iterate_packages(url, "amd64"):
26 version_compare(pkgs[name]["version"], pkg["Version"]) > 0:
28 inst = dict(version=pkg["Version"],
29 filename="%s/%s" % (url, pkg["Filename"]))
31 inst["sha256hash"] = pkg["SHA256"]
34 def process_file(pkgs, filename):
35 if filename.suffix != ".deb":
36 raise ValueError("filename does not end in .deb")
37 parts = filename.name.split("_")
39 raise ValueError("filename not in form name_version_arch.deb")
40 name, version, _ = parts
41 version = urllib.parse.unquote(version)
42 if name in pkgs and version_compare(pkgs[name]["version"], version) > 0:
44 pkgs[name] = dict(version=version, filename=str(filename))
46 def process_dir(pkgs, d):
47 for entry in d.iterdir():
49 process_file(pkgs, entry)
53 def process_pkg(name, pkgdict, outpath):
54 filename = pkgdict["filename"]
55 print("importing %s" % filename)
56 importcmd = [sys.executable, "importpkg.py"]
57 if "sha256hash" in pkgdict:
58 importcmd.extend(["-H", pkgdict["sha256hash"]])
59 if filename.startswith(("http://", "https://", "ftp://", "file://")):
60 importcmd.append(filename)
61 with outpath.open("w") as outp:
62 subprocess.check_call(importcmd, stdout=outp, close_fds=True)
64 with open(filename) as inp:
65 with outpath.open("w") as outp:
66 subprocess.check_call(importcmd, stdin=inp, stdout=outp,
68 print("preprocessed %s" % name)
71 parser = argparse.ArgumentParser()
72 parser.add_argument("-n", "--new", action="store_true",
73 help="avoid reimporting same versions")
74 parser.add_argument("-p", "--prune", action="store_true",
75 help="prune packages old packages")
76 parser.add_argument("-d", "--database", action="store",
77 default="test.sqlite3",
78 help="path to the sqlite3 database file")
79 parser.add_argument("--noverify", action="store_true",
80 help="do not verify binary package hashes")
81 parser.add_argument("files", nargs='+',
82 help="files or directories or repository urls")
83 args = parser.parse_args()
84 tmpdir = pathlib.Path(tempfile.mkdtemp(prefix="debian-dedup"))
85 db = sqlite3.connect(args.database)
87 cur.execute("PRAGMA foreign_keys = ON;")
88 e = concurrent.futures.ThreadPoolExecutor(multiprocessing.cpu_count())
91 print("processing %s" % d)
92 if d.startswith(("http://", "https://", "ftp://", "file://")):
93 process_http(pkgs, d, not args.noverify)
99 process_file(pkgs, dp)
101 print("reading database")
102 cur.execute("SELECT name, version FROM package;")
103 knownpkgvers = dict((row[0], row[1]) for row in cur.fetchall())
104 distpkgs = set(pkgs.keys())
106 for name in distpkgs:
107 if name in knownpkgvers and \
108 version_compare(pkgs[name]["version"], knownpkgvers[name]) <= 0:
110 knownpkgs = set(knownpkgvers)
115 for name, pkg in pkgs.items():
116 fs[e.submit(process_pkg, name, pkg, tmpdir / name)] = name
118 for f in concurrent.futures.as_completed(fs.keys()):
121 print("%s failed to import: %r" % (name, f.exception()))
124 print("sqlimporting %s" % name)
125 with inf.open() as inp:
128 except Exception as exc:
129 print("%s failed sql with exception %r" % (name, exc))
134 delpkgs = knownpkgs - distpkgs
135 print("clearing packages %s" % " ".join(delpkgs))
136 cur.executemany("DELETE FROM package WHERE name = ?;",
137 ((pkg,) for pkg in delpkgs))
138 # Tables content, dependency and sharing will also be pruned
139 # due to ON DELETE CASCADE clauses.
143 except OSError as err:
144 if err.errno != errno.ENOTEMPTY:
146 print("keeping temporary directory %s due to failed packages %s" %
147 (tmpdir, " ".join(map(str, tmpdir.iterdir()))))
149 if __name__ == "__main__":