importpkg: ignore filenames with encoding errors
authorHelmut Grohne <helmut@subdivi.de>
Sat, 23 Feb 2013 08:53:33 +0000 (09:53 +0100)
committerHelmut Grohne <helmut@subdivi.de>
Sat, 23 Feb 2013 08:53:33 +0000 (09:53 +0100)
importpkg.py

index 89020b9..5901b57 100755 (executable)
@@ -167,8 +167,13 @@ def process_package(db, filelike):
         if state != "control_file":
             raise ValueError("missing control file")
         for name, size, function, hexhash in get_hashes(tf):
+            try:
+                name = name.decode("utf8")
+            except UnicodeDecodeError:
+                print("warning: skipping filename with encoding error")
+                continue # skip files with non-utf8 encoding for now
             cur.execute("INSERT INTO content (package, filename, size, function, hash) VALUES (?, ?, ?, ?, ?);",
-                        (package, name.decode("utf8"), size, function, hexhash))
+                        (package, name, size, function, hexhash))
         db.commit()
         return
     raise ValueError("data.tar not found")