5 from dedup.utils import fetchiter
7 def add_values(cursor, insert_key, files, size):
8 cursor.execute("UPDATE sharing SET files = files + ?, size = size + ? WHERE package1 = ? AND package2 = ? AND func1 = ? AND func2 = ?;",
9 (files, size) + insert_key)
10 if cursor.rowcount > 0:
12 cursor.execute("INSERT INTO sharing (package1, package2, func1, func2, files, size) VALUES (?, ?, ?, ?, ?, ?);",
13 insert_key + (files, size))
15 def compute_pkgdict(rows):
17 for package, filename, size, function in rows:
18 funcdict = pkgdict.setdefault(package, {})
19 funcdict.setdefault(function, []).append((size, filename))
22 def process_pkgdict(cursor, pkgdict):
23 for package1, funcdict1 in pkgdict.items():
24 for function1, files in funcdict1.items():
26 size = sum(entry[0] for entry in files)
27 for package2, funcdict2 in pkgdict.items():
28 if package1 == package2:
29 pkgnumfiles = numfiles - 1
30 pkgsize = size - min(entry[0] for entry in files)
34 pkgnumfiles = numfiles
36 for function2 in funcdict2.keys():
37 insert_key = (package1, package2, function1, function2)
38 add_values(cursor, insert_key, pkgnumfiles, pkgsize)
41 db = sqlite3.connect("test.sqlite3")
43 cur.execute("DELETE FROM sharing;")
45 readcur.execute("SELECT hash FROM content GROUP BY hash HAVING count(*) > 1;")
46 for hashvalue, in fetchiter(readcur):
47 cur.execute("SELECT package, filename, size, function FROM content WHERE hash = ?;",
50 print("processing hash %s with %d entries" % (hashvalue, len(rows)))
51 pkgdict = compute_pkgdict(rows)
52 process_pkgdict(cur, pkgdict)
55 if __name__ == "__main__":