drop support for Python 2.x
[~helmut/debian-dedup.git] / dedup / filemagic.py
1 """A very strange "hash" that uses the magic module (python3-magic) to guess
2 the file type."""
3
4 import magic
5
6 # It changed API a few times...
7 try:
8     _magic_identify = magic.from_buffer
9 except AttributeError:
10     _magic_identify = magic.none_magic.buffer
11
12 class FileDigester:
13     """A hashlib-like class to guess a filetype using the magic module."""
14     FILE_BYTES_MAX = 1024 * 1024 # copied from file source
15
16     def __init__(self):
17         self.buff = b""
18         self.identification = None
19
20     def _compute_identification(self):
21         try:
22             return _magic_identify(self.buff)
23         except UnicodeDecodeError:
24             return "magic identification is not valid UTF-8"
25
26     def update(self, buff):
27         if self.identification:
28             return
29         self.buff += buff
30         if len(self.buff) >= self.FILE_BYTES_MAX:
31             self.identification = self._compute_identification()
32             self.buff = None
33
34     def identify(self):
35         """Return the guessed file magic identification."""
36         if self.identification:
37             return self.identification
38         return self._compute_identification()
39
40     def hexdigest(self):
41         """Compatibility with hashlib. An alias of identify. Doesn't return
42         hex."""
43         return self.identify()
44
45     def copy(self):
46         new = FileDigester()
47         new.buff = self.buff
48         new.identification = self.identification
49         return new