Coverage for grm\plugin\magic.py: 0%
157 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-10 14:44 +0900
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-10 14:44 +0900
1"""
2magic is a wrapper around the libmagic file identification library.
4See README for more information.
6Usage:
8>>> import magic
9>>> magic.from_file("testdata/test.pdf")
10'PDF document, version 1.2'
11>>> magic.from_file("testdata/test.pdf", mime=True)
12'application/pdf'
13>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
14'PDF document, version 1.2'
15>>>
18"""
20import sys
21import glob
22import os.path
23import ctypes
24import ctypes.util
25import threading
27from ctypes import c_char_p, c_int, c_size_t, c_void_p
30class MagicException(Exception):
31 def __init__(self, message):
32 super(MagicException, self).__init__(message)
33 self.message = message
36class Magic:
37 """
38 Magic is a wrapper around the libmagic C library.
40 """
42 def __init__(self, mime=False, magic_file=None, mime_encoding=False,
43 keep_going=False, uncompress=False):
44 """
45 Create a new libmagic wrapper.
47 mime - if True, mimetypes are returned instead of textual descriptions
48 mime_encoding - if True, codec is returned
49 magic_file - use a mime database other than the system default
50 keep_going - don't stop at the first match, keep going
51 uncompress - Try to look inside compressed files.
52 """
53 self.flags = MAGIC_NONE
54 if mime:
55 self.flags |= MAGIC_MIME
56 if mime_encoding:
57 self.flags |= MAGIC_MIME_ENCODING
58 if keep_going:
59 self.flags |= MAGIC_CONTINUE
61 if uncompress:
62 self.flags |= MAGIC_COMPRESS
64 self.cookie = magic_open(self.flags)
65 self.lock = threading.Lock()
67 magic_load(self.cookie, magic_file)
69 def from_buffer(self, buf):
70 """
71 Identify the contents of `buf`
72 """
73 with self.lock:
74 try:
75 return maybe_decode(magic_buffer(self.cookie, buf))
76 except MagicException as e:
77 return self._handle509Bug(e)
79 def from_file(self, filename):
80 # raise FileNotFoundException or IOError if the file does not exist
81 with open(filename):
82 pass
83 with self.lock:
84 try:
85 return maybe_decode(magic_file(self.cookie, filename))
86 except MagicException as e:
87 return self._handle509Bug(e)
89 def _handle509Bug(self, e):
90 # libmagic 5.09 has a bug where it might fail to identify the
91 # mimetype of a file and returns null from magic_file (and
92 # likely _buffer), but also does not return an error message.
93 if e.message is None and (self.flags & MAGIC_MIME):
94 return "application/octet-stream"
95 else:
96 raise e
98 def __del__(self):
99 # no _thread_check here because there can be no other
100 # references to this object at this point.
102 # during shutdown magic_close may have been cleared already so
103 # make sure it exists before using it.
105 # the self.cookie check should be unnecessary and was an
106 # incorrect fix for a threading problem, however I'm leaving
107 # it in because it's harmless and I'm slightly afraid to
108 # remove it.
109 if self.cookie and magic_close:
110 magic_close(self.cookie)
111 self.cookie = None
113_instances = {}
115def _get_magic_type(mime):
116 i = _instances.get(mime)
117 if i is None:
118 i = _instances[mime] = Magic(mime=mime)
119 return i
121def from_file(filename, mime=False):
122 """"
123 Accepts a filename and returns the detected filetype. Return
124 value is the mimetype if mime=True, otherwise a human readable
125 name.
127 >>> magic.from_file("testdata/test.pdf", mime=True)
128 'application/pdf'
129 """
130 m = _get_magic_type(mime)
131 return m.from_file(filename)
133def from_buffer(buffer, mime=False):
134 """
135 Accepts a binary string and returns the detected filetype. Return
136 value is the mimetype if mime=True, otherwise a human readable
137 name.
139 >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
140 'PDF document, version 1.2'
141 """
142 m = _get_magic_type(mime)
143 return m.from_buffer(buffer)
148libmagic = None
149# Let's try to find magic or magic1
150dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1')
152# This is necessary because find_library returns None if it doesn't find the library
153if dll:
154 libmagic = ctypes.CDLL(dll)
156if not libmagic or not libmagic._name:
157 windows_dlls = ['magic1.dll','cygmagic-1.dll']
158 platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
159 '/usr/local/lib/libmagic.dylib'] +
160 # Assumes there will only be one version installed
161 glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
162 'win32': windows_dlls,
163 'cygwin': windows_dlls,
164 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work
165 }
166 platform = 'linux' if sys.platform.startswith('linux') else sys.platform
167 for dll in platform_to_lib.get(platform, []):
168 try:
169 libmagic = ctypes.CDLL(dll)
170 break
171 except OSError:
172 pass
174if not libmagic or not libmagic._name:
175 # It is better to raise an ImportError since we are importing magic module
176 raise ImportError('failed to find libmagic. Check your installation')
178magic_t = ctypes.c_void_p
180def errorcheck_null(result, func, args):
181 if result is None:
182 err = magic_error(args[0])
183 raise MagicException(err)
184 else:
185 return result
187def errorcheck_negative_one(result, func, args):
188 if result is -1:
189 err = magic_error(args[0])
190 raise MagicException(err)
191 else:
192 return result
195# return str on python3. Don't want to unconditionally
196# decode because that results in unicode on python2
197def maybe_decode(s):
198 if str == bytes:
199 return s
200 else:
201 return s.decode('utf-8')
203def coerce_filename(filename):
204 if filename is None:
205 return None
207 # ctypes will implicitly convert unicode strings to bytes with
208 # .encode('ascii'). If you use the filesystem encoding
209 # then you'll get inconsistent behavior (crashes) depending on the user's
210 # LANG environment variable
211 is_unicode = (sys.version_info[0] <= 2 and
212 isinstance(filename, unicode)) or \
213 (sys.version_info[0] >= 3 and
214 isinstance(filename, str))
215 if is_unicode:
216 return filename.encode('utf-8')
217 else:
218 return filename
220magic_open = libmagic.magic_open
221magic_open.restype = magic_t
222magic_open.argtypes = [c_int]
224magic_close = libmagic.magic_close
225magic_close.restype = None
226magic_close.argtypes = [magic_t]
228magic_error = libmagic.magic_error
229magic_error.restype = c_char_p
230magic_error.argtypes = [magic_t]
232magic_errno = libmagic.magic_errno
233magic_errno.restype = c_int
234magic_errno.argtypes = [magic_t]
236_magic_file = libmagic.magic_file
237_magic_file.restype = c_char_p
238_magic_file.argtypes = [magic_t, c_char_p]
239_magic_file.errcheck = errorcheck_null
241def magic_file(cookie, filename):
242 return _magic_file(cookie, coerce_filename(filename))
244_magic_buffer = libmagic.magic_buffer
245_magic_buffer.restype = c_char_p
246_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
247_magic_buffer.errcheck = errorcheck_null
249def magic_buffer(cookie, buf):
250 return _magic_buffer(cookie, buf, len(buf))
253_magic_load = libmagic.magic_load
254_magic_load.restype = c_int
255_magic_load.argtypes = [magic_t, c_char_p]
256_magic_load.errcheck = errorcheck_negative_one
258def magic_load(cookie, filename):
259 return _magic_load(cookie, coerce_filename(filename))
261magic_setflags = libmagic.magic_setflags
262magic_setflags.restype = c_int
263magic_setflags.argtypes = [magic_t, c_int]
265magic_check = libmagic.magic_check
266magic_check.restype = c_int
267magic_check.argtypes = [magic_t, c_char_p]
269magic_compile = libmagic.magic_compile
270magic_compile.restype = c_int
271magic_compile.argtypes = [magic_t, c_char_p]
275MAGIC_NONE = 0x000000 # No flags
276MAGIC_DEBUG = 0x000001 # Turn on debugging
277MAGIC_SYMLINK = 0x000002 # Follow symlinks
278MAGIC_COMPRESS = 0x000004 # Check inside compressed files
279MAGIC_DEVICES = 0x000008 # Look at the contents of devices
280MAGIC_MIME = 0x000010 # Return a mime string
281MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
282MAGIC_CONTINUE = 0x000020 # Return all matches
283MAGIC_CHECK = 0x000040 # Print warnings to stderr
284MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
285MAGIC_RAW = 0x000100 # Don't translate unprintable chars
286MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
288MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
289MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
290MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
291MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
292MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
293MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
294MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
295MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
296MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens