3 CREATE TABLE package (package TEXT PRIMARY KEY, version TEXT, architecture TEXT);
4 CREATE TABLE content (package TEXT, filename TEXT, size INTEGER, function TEXT, hash TEXT, FOREIGN KEY (package) REFERENCES package(package));
5 CREATE INDEX content_package_index ON content (package);
6 CREATE INDEX content_hash_index ON content (hash);
22 class ArReader(object):
23 global_magic = b"!<arch>\n"
26 def __init__(self, fileobj):
27 self.fileobj = fileobj
31 def skip(self, length):
33 data = self.fileobj.read(min(4096, length))
35 raise ValueError("archive truncated")
39 data = self.fileobj.read(len(self.global_magic))
40 if data != self.global_magic:
41 raise ValueError("ar global header not found")
45 self.skip_current_entry()
47 if self.fileobj.read(1) != '\n':
48 raise ValueError("missing ar padding")
50 file_header = self.fileobj.read(60)
52 raise EOFError("end of archive found")
53 parts = struct.unpack("16s 12s 6s 6s 8s 10s 2s", file_header)
54 parts = [p.rstrip(" ") for p in parts]
55 if parts.pop() != self.file_magic:
56 raise ValueError("ar file header not found")
57 self.remaining = int(parts[5])
58 self.padding = self.remaining % 2
59 return parts[0] # name
61 def skip_current_entry(self):
62 self.skip(self.remaining)
65 def read(self, length=None):
67 length = self.remaining
69 length = min(self.remaining, length)
70 data = self.fileobj.read(length)
71 self.remaining -= len(data)
74 class XzStream(object):
77 def __init__(self, fileobj):
78 self.fileobj = fileobj
79 self.decomp = lzma.LZMADecompressor()
82 def read(self, length):
85 if len(self.buff) >= length:
86 ret = self.buff[:length]
87 self.buff = self.buff[length:]
89 elif not data: # read EOF in last iteration
93 data = self.fileobj.read(self.blocksize)
95 self.buff += self.decomp.decompress(data)
97 self.buff += self.decomp.flush()
99 class MultiHash(object):
100 def __init__(self, *hashes):
103 def update(self, data):
104 for hasher in self.hashes:
107 class HashBlacklist(object):
108 def __init__(self, hasher, blacklist=set()):
110 self.blacklist = blacklist
111 self.update = self.hasher.update
112 self.name = hasher.name
115 digest = self.hasher.hexdigest()
116 if digest in self.blacklist:
120 class GzipDecompressor(object):
123 self.decompressor = None # zlib.decompressobj(-zlib.MAX_WBITS)
125 def decompress(self, data):
126 if self.decompressor:
127 data = self.decompressor.decompress(data)
128 if not self.decompressor.unused_data:
130 unused_data = self.decompressor.unused_data
131 self.decompressor = None
132 return data + self.decompress(unused_data)
133 self.inbuffer += data
135 if len(self.inbuffer) < skip:
137 if not self.inbuffer.startswith(b"\037\213\010"):
138 raise ValueError("gzip magic not found")
139 flag = ord(self.inbuffer[3])
141 if len(self.inbuffer) < skip + 2:
143 length, = struct.unpack("<H", self.inbuffer[skip:skip+2])
145 for field in (8, 16):
147 length = self.inbuffer.find("\0", skip)
153 if len(self.inbuffer) < skip:
155 data = self.inbuffer[skip:]
157 self.decompressor = zlib.decompressobj(-zlib.MAX_WBITS)
158 return self.decompress(data)
161 def unused_data(self):
162 if self.decompressor:
163 return self.decompressor.unused_data
168 if not self.decompressor:
170 return self.decompressor.flush()
173 new = GzipDecompressor()
174 new.inbuffer = self.inbuffer
175 if self.decompressor:
176 new.decompressor = self.decompressor.copy()
179 class DecompressedHash(object):
180 def __init__(self, decompressor, hashobj):
181 self.decompressor = decompressor
182 self.hashobj = hashobj
184 def update(self, data):
185 self.hashobj.update(self.decompressor.decompress(data))
188 if not hasattr(self.decompressor, "flush"):
189 return self.hashobj.hexdigest()
190 tmpdecomp = self.decompressor.copy()
191 data = tmpdecomp.flush()
192 tmphash = self.hashobj.copy()
194 return tmphash.hexdigest()
196 class SuppressingHash(object):
197 def __init__(self, hashobj, exceptions=()):
198 self.hashobj = hashobj
199 self.exceptions = exceptions
201 def update(self, data):
204 self.hashobj.update(data)
205 except self.exceptions:
211 return self.hashobj.hexdigest()
212 except self.exceptions:
216 def hash_file(hashobj, filelike, blocksize=65536):
217 data = filelike.read(blocksize)
220 data = filelike.read(blocksize)
223 boring_sha512_hashes = set((
225 "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
227 "be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09"))
229 def sha512_nontrivial():
230 return HashBlacklist(hashlib.sha512(), boring_sha512_hashes)
233 hashobj = DecompressedHash(GzipDecompressor(), hashlib.sha512())
234 hashobj = SuppressingHash(hashobj, (ValueError, zlib.error))
235 hashobj.name = "gzip_sha512"
236 return HashBlacklist(hashobj, boring_sha512_hashes)
238 def get_hashes(filelike):
239 af = ArReader(filelike)
244 name = af.read_entry()
247 if name == "data.tar.gz":
248 tf = tarfile.open(fileobj=af, mode="r|gz")
249 elif name == "data.tar.bz2":
250 tf = tarfile.open(fileobj=af, mode="r|bz2")
251 elif name == "data.tar.xz":
253 tf = tarfile.open(fileobj=zf, mode="r|")
257 if not elem.isreg(): # excludes hard links as well
259 hasher = MultiHash(sha512_nontrivial(), gziphash())
260 hasher = hash_file(hasher, tf.extractfile(elem))
261 for hashobj in hasher.hashes:
262 hashvalue = hashobj.hexdigest()
264 yield (elem.name, elem.size, hashobj.name, hashvalue)
266 raise ValueError("data.tar not found")
269 filename = sys.argv[1]
270 match = re.match("(?:.*/)?(?P<name>[^_]+)_(?P<version>[^_]+)_(?P<architecture>[^_.]+)\\.deb$", filename)
271 package, version, architecture = match.groups()
272 db = sqlite3.connect("test.sqlite3")
275 cur.execute("SELECT version FROM package WHERE package = ?;", (package,))
276 versions = [tpl[0] for tpl in cur.fetchall()]
277 versions.append(version)
278 versions.sort(cmp=apt_pkg.version_compare)
279 if versions[-1] != version:
280 return # not the newest version
282 cur.execute("DELETE FROM package WHERE package = ?;", (package,))
283 cur.execute("DELETE FROM content WHERE package = ?;", (package,))
284 cur.execute("INSERT INTO package (package, version, architecture) VALUES (?, ?, ?);",
285 (package, version, architecture))
286 with open(filename) as pkg:
287 for name, size, function, hexhash in get_hashes(pkg):
288 name = name.decode("utf8")
289 cur.execute("INSERT INTO content (package, filename, size, function, hash) VALUES (?, ?, ?, ?, ?);",
290 (package, name, size, function, hexhash))
293 if __name__ == "__main__":