Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2magic is a wrapper around the libmagic file identification library. 

3 

4See README for more information. 

5 

6Usage: 

7 

8>>> import magic 

9>>> magic.from_file("testdata/test.pdf") 

10'PDF document, version 1.2' 

11>>> magic.from_file("testdata/test.pdf", mime=True) 

12'application/pdf' 

13>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) 

14'PDF document, version 1.2' 

15>>> 

16 

17 

18""" 

19 

20import sys 

21import glob 

22import os.path 

23import ctypes 

24import ctypes.util 

25import threading 

26 

27from ctypes import c_char_p, c_int, c_size_t, c_void_p 

28 

29 

30class MagicException(Exception): 

31 def __init__(self, message): 

32 super(MagicException, self).__init__(message) 

33 self.message = message 

34 

35 

36class Magic: 

37 """ 

38 Magic is a wrapper around the libmagic C library. 

39 

40 """ 

41 

42 def __init__(self, mime=False, magic_file=None, mime_encoding=False, 

43 keep_going=False, uncompress=False): 

44 """ 

45 Create a new libmagic wrapper. 

46 

47 mime - if True, mimetypes are returned instead of textual descriptions 

48 mime_encoding - if True, codec is returned 

49 magic_file - use a mime database other than the system default 

50 keep_going - don't stop at the first match, keep going 

51 uncompress - Try to look inside compressed files. 

52 """ 

53 self.flags = MAGIC_NONE 

54 if mime: 

55 self.flags |= MAGIC_MIME 

56 if mime_encoding: 

57 self.flags |= MAGIC_MIME_ENCODING 

58 if keep_going: 

59 self.flags |= MAGIC_CONTINUE 

60 

61 if uncompress: 

62 self.flags |= MAGIC_COMPRESS 

63 

64 self.cookie = magic_open(self.flags) 

65 self.lock = threading.Lock() 

66 

67 magic_load(self.cookie, magic_file) 

68 

69 def from_buffer(self, buf): 

70 """ 

71 Identify the contents of `buf` 

72 """ 

73 with self.lock: 

74 try: 

75 return maybe_decode(magic_buffer(self.cookie, buf)) 

76 except MagicException as e: 

77 return self._handle509Bug(e) 

78 

79 def from_file(self, filename): 

80 # raise FileNotFoundException or IOError if the file does not exist 

81 with open(filename): 

82 pass 

83 with self.lock: 

84 try: 

85 return maybe_decode(magic_file(self.cookie, filename)) 

86 except MagicException as e: 

87 return self._handle509Bug(e) 

88 

89 def _handle509Bug(self, e): 

90 # libmagic 5.09 has a bug where it might fail to identify the 

91 # mimetype of a file and returns null from magic_file (and 

92 # likely _buffer), but also does not return an error message. 

93 if e.message is None and (self.flags & MAGIC_MIME): 

94 return "application/octet-stream" 

95 else: 

96 raise e 

97 

98 def __del__(self): 

99 # no _thread_check here because there can be no other 

100 # references to this object at this point. 

101 

102 # during shutdown magic_close may have been cleared already so 

103 # make sure it exists before using it. 

104 

105 # the self.cookie check should be unnecessary and was an 

106 # incorrect fix for a threading problem, however I'm leaving 

107 # it in because it's harmless and I'm slightly afraid to 

108 # remove it. 

109 if self.cookie and magic_close: 

110 magic_close(self.cookie) 

111 self.cookie = None 

112 

113_instances = {} 

114 

115def _get_magic_type(mime): 

116 i = _instances.get(mime) 

117 if i is None: 

118 i = _instances[mime] = Magic(mime=mime) 

119 return i 

120 

121def from_file(filename, mime=False): 

122 """" 

123 Accepts a filename and returns the detected filetype. Return 

124 value is the mimetype if mime=True, otherwise a human readable 

125 name. 

126 

127 >>> magic.from_file("testdata/test.pdf", mime=True) 

128 'application/pdf' 

129 """ 

130 m = _get_magic_type(mime) 

131 return m.from_file(filename) 

132 

133def from_buffer(buffer, mime=False): 

134 """ 

135 Accepts a binary string and returns the detected filetype. Return 

136 value is the mimetype if mime=True, otherwise a human readable 

137 name. 

138 

139 >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) 

140 'PDF document, version 1.2' 

141 """ 

142 m = _get_magic_type(mime) 

143 return m.from_buffer(buffer) 

144 

145 

146 

147 

148libmagic = None 

149# Let's try to find magic or magic1 

150dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1') 

151 

152# This is necessary because find_library returns None if it doesn't find the library 

153if dll: 

154 libmagic = ctypes.CDLL(dll) 

155 

156if not libmagic or not libmagic._name: 

157 windows_dlls = ['magic1.dll','cygmagic-1.dll'] 

158 platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib', 

159 '/usr/local/lib/libmagic.dylib'] + 

160 # Assumes there will only be one version installed 

161 glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'), 

162 'win32': windows_dlls, 

163 'cygwin': windows_dlls, 

164 'linux': ['libmagic.so.1'], # fallback for some Linuxes (e.g. Alpine) where library search does not work 

165 } 

166 platform = 'linux' if sys.platform.startswith('linux') else sys.platform 

167 for dll in platform_to_lib.get(platform, []): 

168 try: 

169 libmagic = ctypes.CDLL(dll) 

170 break 

171 except OSError: 

172 pass 

173 

174if not libmagic or not libmagic._name: 

175 # It is better to raise an ImportError since we are importing magic module 

176 raise ImportError('failed to find libmagic. Check your installation') 

177 

178magic_t = ctypes.c_void_p 

179 

180def errorcheck_null(result, func, args): 

181 if result is None: 

182 err = magic_error(args[0]) 

183 raise MagicException(err) 

184 else: 

185 return result 

186 

187def errorcheck_negative_one(result, func, args): 

188 if result is -1: 

189 err = magic_error(args[0]) 

190 raise MagicException(err) 

191 else: 

192 return result 

193 

194 

195# return str on python3. Don't want to unconditionally 

196# decode because that results in unicode on python2 

197def maybe_decode(s): 

198 if str == bytes: 

199 return s 

200 else: 

201 return s.decode('utf-8') 

202 

203def coerce_filename(filename): 

204 if filename is None: 

205 return None 

206 

207 # ctypes will implicitly convert unicode strings to bytes with 

208 # .encode('ascii'). If you use the filesystem encoding  

209 # then you'll get inconsistent behavior (crashes) depending on the user's 

210 # LANG environment variable 

211 is_unicode = (sys.version_info[0] <= 2 and 

212 isinstance(filename, unicode)) or \ 

213 (sys.version_info[0] >= 3 and 

214 isinstance(filename, str)) 

215 if is_unicode: 

216 return filename.encode('utf-8') 

217 else: 

218 return filename 

219 

220magic_open = libmagic.magic_open 

221magic_open.restype = magic_t 

222magic_open.argtypes = [c_int] 

223 

224magic_close = libmagic.magic_close 

225magic_close.restype = None 

226magic_close.argtypes = [magic_t] 

227 

228magic_error = libmagic.magic_error 

229magic_error.restype = c_char_p 

230magic_error.argtypes = [magic_t] 

231 

232magic_errno = libmagic.magic_errno 

233magic_errno.restype = c_int 

234magic_errno.argtypes = [magic_t] 

235 

236_magic_file = libmagic.magic_file 

237_magic_file.restype = c_char_p 

238_magic_file.argtypes = [magic_t, c_char_p] 

239_magic_file.errcheck = errorcheck_null 

240 

241def magic_file(cookie, filename): 

242 return _magic_file(cookie, coerce_filename(filename)) 

243 

244_magic_buffer = libmagic.magic_buffer 

245_magic_buffer.restype = c_char_p 

246_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] 

247_magic_buffer.errcheck = errorcheck_null 

248 

249def magic_buffer(cookie, buf): 

250 return _magic_buffer(cookie, buf, len(buf)) 

251 

252 

253_magic_load = libmagic.magic_load 

254_magic_load.restype = c_int 

255_magic_load.argtypes = [magic_t, c_char_p] 

256_magic_load.errcheck = errorcheck_negative_one 

257 

258def magic_load(cookie, filename): 

259 return _magic_load(cookie, coerce_filename(filename)) 

260 

261magic_setflags = libmagic.magic_setflags 

262magic_setflags.restype = c_int 

263magic_setflags.argtypes = [magic_t, c_int] 

264 

265magic_check = libmagic.magic_check 

266magic_check.restype = c_int 

267magic_check.argtypes = [magic_t, c_char_p] 

268 

269magic_compile = libmagic.magic_compile 

270magic_compile.restype = c_int 

271magic_compile.argtypes = [magic_t, c_char_p] 

272 

273 

274 

275MAGIC_NONE = 0x000000 # No flags 

276MAGIC_DEBUG = 0x000001 # Turn on debugging 

277MAGIC_SYMLINK = 0x000002 # Follow symlinks 

278MAGIC_COMPRESS = 0x000004 # Check inside compressed files 

279MAGIC_DEVICES = 0x000008 # Look at the contents of devices 

280MAGIC_MIME = 0x000010 # Return a mime string 

281MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding 

282MAGIC_CONTINUE = 0x000020 # Return all matches 

283MAGIC_CHECK = 0x000040 # Print warnings to stderr 

284MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit 

285MAGIC_RAW = 0x000100 # Don't translate unprintable chars 

286MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors 

287 

288MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files 

289MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files 

290MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries 

291MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type 

292MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details 

293MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files 

294MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff 

295MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran 

296MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens