distinguish bytes from unicode for py3k
authorHelmut Grohne <helmut@subdivi.de>
Thu, 16 Apr 2015 15:43:11 +0000 (17:43 +0200)
committerHelmut Grohne <helmut@subdivi.de>
Thu, 16 Apr 2015 15:43:11 +0000 (17:43 +0200)
dedup/compression.py
dedup/hashing.py
dedup/image.py
importpkg.py

index 4ce258c..4385f1d 100644 (file)
@@ -62,7 +62,7 @@ class GzipDecompressor(object):
         else:
             expect = struct.pack("<ll", self.crc, self.size)
             if self.inbuffer.startswith(expect) and \
-                    self.inbuffer[len(expect):].replace("\0", "") == "":
+                    self.inbuffer[len(expect):].replace(b"\0", b"") == b"":
                 return b""
             return self.inbuffer
 
index a8a46c7..0b7f889 100644 (file)
@@ -52,7 +52,7 @@ class HashBlacklistContent(object):
             # the chain avoids passing the empty sequence to max
             maxlen = max(itertools.chain((0,), itertools.imap(len, blacklist)))
         self.maxlen = maxlen
-        self.stored = ""
+        self.stored = b""
 
     @property
     def name(self):
index ef17989..314eb44 100644 (file)
@@ -91,7 +91,7 @@ class GIFHash(ImageHash):
         if self.content.tell() < 10: # magic + logical dimension
             return False
         curvalue = self.content.getvalue()
-        if curvalue.startswith((b"GIF87a", "GIF89a")):
+        if curvalue.startswith((b"GIF87a", b"GIF89a")):
             width, height = struct.unpack("<HH", curvalue[6:10])
             if width * height > self.maxpixels:
                 raise ValueError("maximum image pixels exceeded")
index 7e074e1..6dced21 100755 (executable)
@@ -45,13 +45,14 @@ def gifhash():
     return hashobj
 
 def decompress_tar(filelike, extension):
-    if extension in (".lzma", ".xz"):
+    if extension in (b".lzma", b".xz"):
         filelike = DecompressedStream(filelike, lzma.LZMADecompressor())
-        extension = ""
-    if extension not in ("", ".gz", ".bz2"):
+        extension = b""
+    if extension not in (b"", b".gz", b".bz2"):
         raise ValueError("unknown compression format with extension %r" %
                          extension)
-    return tarfile.open(fileobj=filelike, mode="r|" + extension[1:])
+    return tarfile.open(fileobj=filelike,
+                        mode="r|" + extension[1:].decode("ascii"))
 
 def process_package(filelike, hash_functions):
     af = ArReader(filelike)
@@ -62,7 +63,7 @@ def process_package(filelike, hash_functions):
             name = af.read_entry()
         except EOFError:
             raise ValueError("data.tar not found")
-        if name.startswith("control.tar"):
+        if name.startswith(b"control.tar"):
             if state != "start":
                 raise ValueError("unexpected control.tar")
             state = "control"
@@ -76,7 +77,7 @@ def process_package(filelike, hash_functions):
                 yield process_control(tf.extractfile(elem).read())
                 break
             continue
-        elif name.startswith("data.tar"):
+        elif name.startswith(b"data.tar"):
             if state != "control_file":
                 raise ValueError("missing control file")
             state = "data"