source: trunk/essentials/dev-lang/python/Lib/imputil.py

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 25.1 KB
Line 
1"""
2Import utilities
3
4Exported classes:
5 ImportManager Manage the import process
6
7 Importer Base class for replacing standard import functions
8 BuiltinImporter Emulate the import mechanism for builtin and frozen modules
9
10 DynLoadSuffixImporter
11"""
12
13# note: avoid importing non-builtin modules
14import imp ### not available in JPython?
15import sys
16import __builtin__
17
18# for the DirectoryImporter
19import struct
20import marshal
21
22__all__ = ["ImportManager","Importer","BuiltinImporter"]
23
24_StringType = type('')
25_ModuleType = type(sys) ### doesn't work in JPython...
26
27class ImportManager:
28 "Manage the import process."
29
30 def install(self, namespace=vars(__builtin__)):
31 "Install this ImportManager into the specified namespace."
32
33 if isinstance(namespace, _ModuleType):
34 namespace = vars(namespace)
35
36 # Note: we have no notion of "chaining"
37
38 # Record the previous import hook, then install our own.
39 self.previous_importer = namespace['__import__']
40 self.namespace = namespace
41 namespace['__import__'] = self._import_hook
42
43 ### fix this
44 #namespace['reload'] = self._reload_hook
45
46 def uninstall(self):
47 "Restore the previous import mechanism."
48 self.namespace['__import__'] = self.previous_importer
49
50 def add_suffix(self, suffix, importFunc):
51 assert callable(importFunc)
52 self.fs_imp.add_suffix(suffix, importFunc)
53
54 ######################################################################
55 #
56 # PRIVATE METHODS
57 #
58
59 clsFilesystemImporter = None
60
61 def __init__(self, fs_imp=None):
62 # we're definitely going to be importing something in the future,
63 # so let's just load the OS-related facilities.
64 if not _os_stat:
65 _os_bootstrap()
66
67 # This is the Importer that we use for grabbing stuff from the
68 # filesystem. It defines one more method (import_from_dir) for our use.
69 if fs_imp is None:
70 cls = self.clsFilesystemImporter or _FilesystemImporter
71 fs_imp = cls()
72 self.fs_imp = fs_imp
73
74 # Initialize the set of suffixes that we recognize and import.
75 # The default will import dynamic-load modules first, followed by
76 # .py files (or a .py file's cached bytecode)
77 for desc in imp.get_suffixes():
78 if desc[2] == imp.C_EXTENSION:
79 self.add_suffix(desc[0],
80 DynLoadSuffixImporter(desc).import_file)
81 self.add_suffix('.py', py_suffix_importer)
82
83 def _import_hook(self, fqname, globals=None, locals=None, fromlist=None):
84 """Python calls this hook to locate and import a module."""
85
86 parts = fqname.split('.')
87
88 # determine the context of this import
89 parent = self._determine_import_context(globals)
90
91 # if there is a parent, then its importer should manage this import
92 if parent:
93 module = parent.__importer__._do_import(parent, parts, fromlist)
94 if module:
95 return module
96
97 # has the top module already been imported?
98 try:
99 top_module = sys.modules[parts[0]]
100 except KeyError:
101
102 # look for the topmost module
103 top_module = self._import_top_module(parts[0])
104 if not top_module:
105 # the topmost module wasn't found at all.
106 raise ImportError, 'No module named ' + fqname
107
108 # fast-path simple imports
109 if len(parts) == 1:
110 if not fromlist:
111 return top_module
112
113 if not top_module.__dict__.get('__ispkg__'):
114 # __ispkg__ isn't defined (the module was not imported by us),
115 # or it is zero.
116 #
117 # In the former case, there is no way that we could import
118 # sub-modules that occur in the fromlist (but we can't raise an
119 # error because it may just be names) because we don't know how
120 # to deal with packages that were imported by other systems.
121 #
122 # In the latter case (__ispkg__ == 0), there can't be any sub-
123 # modules present, so we can just return.
124 #
125 # In both cases, since len(parts) == 1, the top_module is also
126 # the "bottom" which is the defined return when a fromlist
127 # exists.
128 return top_module
129
130 importer = top_module.__dict__.get('__importer__')
131 if importer:
132 return importer._finish_import(top_module, parts[1:], fromlist)
133
134 # Grrr, some people "import os.path" or do "from os.path import ..."
135 if len(parts) == 2 and hasattr(top_module, parts[1]):
136 if fromlist:
137 return getattr(top_module, parts[1])
138 else:
139 return top_module
140
141 # If the importer does not exist, then we have to bail. A missing
142 # importer means that something else imported the module, and we have
143 # no knowledge of how to get sub-modules out of the thing.
144 raise ImportError, 'No module named ' + fqname
145
146 def _determine_import_context(self, globals):
147 """Returns the context in which a module should be imported.
148
149 The context could be a loaded (package) module and the imported module
150 will be looked for within that package. The context could also be None,
151 meaning there is no context -- the module should be looked for as a
152 "top-level" module.
153 """
154
155 if not globals or not globals.get('__importer__'):
156 # globals does not refer to one of our modules or packages. That
157 # implies there is no relative import context (as far as we are
158 # concerned), and it should just pick it off the standard path.
159 return None
160
161 # The globals refer to a module or package of ours. It will define
162 # the context of the new import. Get the module/package fqname.
163 parent_fqname = globals['__name__']
164
165 # if a package is performing the import, then return itself (imports
166 # refer to pkg contents)
167 if globals['__ispkg__']:
168 parent = sys.modules[parent_fqname]
169 assert globals is parent.__dict__
170 return parent
171
172 i = parent_fqname.rfind('.')
173
174 # a module outside of a package has no particular import context
175 if i == -1:
176 return None
177
178 # if a module in a package is performing the import, then return the
179 # package (imports refer to siblings)
180 parent_fqname = parent_fqname[:i]
181 parent = sys.modules[parent_fqname]
182 assert parent.__name__ == parent_fqname
183 return parent
184
185 def _import_top_module(self, name):
186 # scan sys.path looking for a location in the filesystem that contains
187 # the module, or an Importer object that can import the module.
188 for item in sys.path:
189 if isinstance(item, _StringType):
190 module = self.fs_imp.import_from_dir(item, name)
191 else:
192 module = item.import_top(name)
193 if module:
194 return module
195 return None
196
197 def _reload_hook(self, module):
198 "Python calls this hook to reload a module."
199
200 # reloading of a module may or may not be possible (depending on the
201 # importer), but at least we can validate that it's ours to reload
202 importer = module.__dict__.get('__importer__')
203 if not importer:
204 ### oops. now what...
205 pass
206
207 # okay. it is using the imputil system, and we must delegate it, but
208 # we don't know what to do (yet)
209 ### we should blast the module dict and do another get_code(). need to
210 ### flesh this out and add proper docco...
211 raise SystemError, "reload not yet implemented"
212
213
214class Importer:
215 "Base class for replacing standard import functions."
216
217 def import_top(self, name):
218 "Import a top-level module."
219 return self._import_one(None, name, name)
220
221 ######################################################################
222 #
223 # PRIVATE METHODS
224 #
225 def _finish_import(self, top, parts, fromlist):
226 # if "a.b.c" was provided, then load the ".b.c" portion down from
227 # below the top-level module.
228 bottom = self._load_tail(top, parts)
229
230 # if the form is "import a.b.c", then return "a"
231 if not fromlist:
232 # no fromlist: return the top of the import tree
233 return top
234
235 # the top module was imported by self.
236 #
237 # this means that the bottom module was also imported by self (just
238 # now, or in the past and we fetched it from sys.modules).
239 #
240 # since we imported/handled the bottom module, this means that we can
241 # also handle its fromlist (and reliably use __ispkg__).
242
243 # if the bottom node is a package, then (potentially) import some
244 # modules.
245 #
246 # note: if it is not a package, then "fromlist" refers to names in
247 # the bottom module rather than modules.
248 # note: for a mix of names and modules in the fromlist, we will
249 # import all modules and insert those into the namespace of
250 # the package module. Python will pick up all fromlist names
251 # from the bottom (package) module; some will be modules that
252 # we imported and stored in the namespace, others are expected
253 # to be present already.
254 if bottom.__ispkg__:
255 self._import_fromlist(bottom, fromlist)
256
257 # if the form is "from a.b import c, d" then return "b"
258 return bottom
259
260 def _import_one(self, parent, modname, fqname):
261 "Import a single module."
262
263 # has the module already been imported?
264 try:
265 return sys.modules[fqname]
266 except KeyError:
267 pass
268
269 # load the module's code, or fetch the module itself
270 result = self.get_code(parent, modname, fqname)
271 if result is None:
272 return None
273
274 module = self._process_result(result, fqname)
275
276 # insert the module into its parent
277 if parent:
278 setattr(parent, modname, module)
279 return module
280
281 def _process_result(self, (ispkg, code, values), fqname):
282 # did get_code() return an actual module? (rather than a code object)
283 is_module = isinstance(code, _ModuleType)
284
285 # use the returned module, or create a new one to exec code into
286 if is_module:
287 module = code
288 else:
289 module = imp.new_module(fqname)
290
291 ### record packages a bit differently??
292 module.__importer__ = self
293 module.__ispkg__ = ispkg
294
295 # insert additional values into the module (before executing the code)
296 module.__dict__.update(values)
297
298 # the module is almost ready... make it visible
299 sys.modules[fqname] = module
300
301 # execute the code within the module's namespace
302 if not is_module:
303 try:
304 exec code in module.__dict__
305 except:
306 if fqname in sys.modules:
307 del sys.modules[fqname]
308 raise
309
310 # fetch from sys.modules instead of returning module directly.
311 # also make module's __name__ agree with fqname, in case
312 # the "exec code in module.__dict__" played games on us.
313 module = sys.modules[fqname]
314 module.__name__ = fqname
315 return module
316
317 def _load_tail(self, m, parts):
318 """Import the rest of the modules, down from the top-level module.
319
320 Returns the last module in the dotted list of modules.
321 """
322 for part in parts:
323 fqname = "%s.%s" % (m.__name__, part)
324 m = self._import_one(m, part, fqname)
325 if not m:
326 raise ImportError, "No module named " + fqname
327 return m
328
329 def _import_fromlist(self, package, fromlist):
330 'Import any sub-modules in the "from" list.'
331
332 # if '*' is present in the fromlist, then look for the '__all__'
333 # variable to find additional items (modules) to import.
334 if '*' in fromlist:
335 fromlist = list(fromlist) + \
336 list(package.__dict__.get('__all__', []))
337
338 for sub in fromlist:
339 # if the name is already present, then don't try to import it (it
340 # might not be a module!).
341 if sub != '*' and not hasattr(package, sub):
342 subname = "%s.%s" % (package.__name__, sub)
343 submod = self._import_one(package, sub, subname)
344 if not submod:
345 raise ImportError, "cannot import name " + subname
346
347 def _do_import(self, parent, parts, fromlist):
348 """Attempt to import the module relative to parent.
349
350 This method is used when the import context specifies that <self>
351 imported the parent module.
352 """
353 top_name = parts[0]
354 top_fqname = parent.__name__ + '.' + top_name
355 top_module = self._import_one(parent, top_name, top_fqname)
356 if not top_module:
357 # this importer and parent could not find the module (relatively)
358 return None
359
360 return self._finish_import(top_module, parts[1:], fromlist)
361
362 ######################################################################
363 #
364 # METHODS TO OVERRIDE
365 #
366 def get_code(self, parent, modname, fqname):
367 """Find and retrieve the code for the given module.
368
369 parent specifies a parent module to define a context for importing. It
370 may be None, indicating no particular context for the search.
371
372 modname specifies a single module (not dotted) within the parent.
373
374 fqname specifies the fully-qualified module name. This is a
375 (potentially) dotted name from the "root" of the module namespace
376 down to the modname.
377 If there is no parent, then modname==fqname.
378
379 This method should return None, or a 3-tuple.
380
381 * If the module was not found, then None should be returned.
382
383 * The first item of the 2- or 3-tuple should be the integer 0 or 1,
384 specifying whether the module that was found is a package or not.
385
386 * The second item is the code object for the module (it will be
387 executed within the new module's namespace). This item can also
388 be a fully-loaded module object (e.g. loaded from a shared lib).
389
390 * The third item is a dictionary of name/value pairs that will be
391 inserted into new module before the code object is executed. This
392 is provided in case the module's code expects certain values (such
393 as where the module was found). When the second item is a module
394 object, then these names/values will be inserted *after* the module
395 has been loaded/initialized.
396 """
397 raise RuntimeError, "get_code not implemented"
398
399
400######################################################################
401#
402# Some handy stuff for the Importers
403#
404
405# byte-compiled file suffix character
406_suffix_char = __debug__ and 'c' or 'o'
407
408# byte-compiled file suffix
409_suffix = '.py' + _suffix_char
410
411def _compile(pathname, timestamp):
412 """Compile (and cache) a Python source file.
413
414 The file specified by <pathname> is compiled to a code object and
415 returned.
416
417 Presuming the appropriate privileges exist, the bytecodes will be
418 saved back to the filesystem for future imports. The source file's
419 modification timestamp must be provided as a Long value.
420 """
421 codestring = open(pathname, 'rU').read()
422 if codestring and codestring[-1] != '\n':
423 codestring = codestring + '\n'
424 code = __builtin__.compile(codestring, pathname, 'exec')
425
426 # try to cache the compiled code
427 try:
428 f = open(pathname + _suffix_char, 'wb')
429 except IOError:
430 pass
431 else:
432 f.write('\0\0\0\0')
433 f.write(struct.pack('<I', timestamp))
434 marshal.dump(code, f)
435 f.flush()
436 f.seek(0, 0)
437 f.write(imp.get_magic())
438 f.close()
439
440 return code
441
442_os_stat = _os_path_join = None
443def _os_bootstrap():
444 "Set up 'os' module replacement functions for use during import bootstrap."
445
446 names = sys.builtin_module_names
447
448 join = None
449 if 'posix' in names:
450 sep = '/'
451 from posix import stat
452 elif 'nt' in names:
453 sep = '\\'
454 from nt import stat
455 elif 'dos' in names:
456 sep = '\\'
457 from dos import stat
458 elif 'os2' in names:
459 sep = '\\'
460 from os2 import stat
461 elif 'mac' in names:
462 from mac import stat
463 def join(a, b):
464 if a == '':
465 return b
466 if ':' not in a:
467 a = ':' + a
468 if a[-1:] != ':':
469 a = a + ':'
470 return a + b
471 else:
472 raise ImportError, 'no os specific module found'
473
474 if join is None:
475 def join(a, b, sep=sep):
476 if a == '':
477 return b
478 lastchar = a[-1:]
479 if lastchar == '/' or lastchar == sep:
480 return a + b
481 return a + sep + b
482
483 global _os_stat
484 _os_stat = stat
485
486 global _os_path_join
487 _os_path_join = join
488
489def _os_path_isdir(pathname):
490 "Local replacement for os.path.isdir()."
491 try:
492 s = _os_stat(pathname)
493 except OSError:
494 return None
495 return (s.st_mode & 0170000) == 0040000
496
497def _timestamp(pathname):
498 "Return the file modification time as a Long."
499 try:
500 s = _os_stat(pathname)
501 except OSError:
502 return None
503 return long(s.st_mtime)
504
505
506######################################################################
507#
508# Emulate the import mechanism for builtin and frozen modules
509#
510class BuiltinImporter(Importer):
511 def get_code(self, parent, modname, fqname):
512 if parent:
513 # these modules definitely do not occur within a package context
514 return None
515
516 # look for the module
517 if imp.is_builtin(modname):
518 type = imp.C_BUILTIN
519 elif imp.is_frozen(modname):
520 type = imp.PY_FROZEN
521 else:
522 # not found
523 return None
524
525 # got it. now load and return it.
526 module = imp.load_module(modname, None, modname, ('', '', type))
527 return 0, module, { }
528
529
530######################################################################
531#
532# Internal importer used for importing from the filesystem
533#
534class _FilesystemImporter(Importer):
535 def __init__(self):
536 self.suffixes = [ ]
537
538 def add_suffix(self, suffix, importFunc):
539 assert callable(importFunc)
540 self.suffixes.append((suffix, importFunc))
541
542 def import_from_dir(self, dir, fqname):
543 result = self._import_pathname(_os_path_join(dir, fqname), fqname)
544 if result:
545 return self._process_result(result, fqname)
546 return None
547
548 def get_code(self, parent, modname, fqname):
549 # This importer is never used with an empty parent. Its existence is
550 # private to the ImportManager. The ImportManager uses the
551 # import_from_dir() method to import top-level modules/packages.
552 # This method is only used when we look for a module within a package.
553 assert parent
554
555 return self._import_pathname(_os_path_join(parent.__pkgdir__, modname),
556 fqname)
557
558 def _import_pathname(self, pathname, fqname):
559 if _os_path_isdir(pathname):
560 result = self._import_pathname(_os_path_join(pathname, '__init__'),
561 fqname)
562 if result:
563 values = result[2]
564 values['__pkgdir__'] = pathname
565 values['__path__'] = [ pathname ]
566 return 1, result[1], values
567 return None
568
569 for suffix, importFunc in self.suffixes:
570 filename = pathname + suffix
571 try:
572 finfo = _os_stat(filename)
573 except OSError:
574 pass
575 else:
576 return importFunc(filename, finfo, fqname)
577 return None
578
579######################################################################
580#
581# SUFFIX-BASED IMPORTERS
582#
583
584def py_suffix_importer(filename, finfo, fqname):
585 file = filename[:-3] + _suffix
586 t_py = long(finfo[8])
587 t_pyc = _timestamp(file)
588
589 code = None
590 if t_pyc is not None and t_pyc >= t_py:
591 f = open(file, 'rb')
592 if f.read(4) == imp.get_magic():
593 t = struct.unpack('<I', f.read(4))[0]
594 if t == t_py:
595 code = marshal.load(f)
596 f.close()
597 if code is None:
598 file = filename
599 code = _compile(file, t_py)
600
601 return 0, code, { '__file__' : file }
602
603class DynLoadSuffixImporter:
604 def __init__(self, desc):
605 self.desc = desc
606
607 def import_file(self, filename, finfo, fqname):
608 fp = open(filename, self.desc[1])
609 module = imp.load_module(fqname, fp, filename, self.desc)
610 module.__file__ = filename
611 return 0, module, { }
612
613
614######################################################################
615
616def _print_importers():
617 items = sys.modules.items()
618 items.sort()
619 for name, module in items:
620 if module:
621 print name, module.__dict__.get('__importer__', '-- no importer')
622 else:
623 print name, '-- non-existent module'
624
625def _test_revamp():
626 ImportManager().install()
627 sys.path.insert(0, BuiltinImporter())
628
629######################################################################
630
631#
632# TODO
633#
634# from Finn Bock:
635# type(sys) is not a module in JPython. what to use instead?
636# imp.C_EXTENSION is not in JPython. same for get_suffixes and new_module
637#
638# given foo.py of:
639# import sys
640# sys.modules['foo'] = sys
641#
642# ---- standard import mechanism
643# >>> import foo
644# >>> foo
645# <module 'sys' (built-in)>
646#
647# ---- revamped import mechanism
648# >>> import imputil
649# >>> imputil._test_revamp()
650# >>> import foo
651# >>> foo
652# <module 'foo' from 'foo.py'>
653#
654#
655# from MAL:
656# should BuiltinImporter exist in sys.path or hard-wired in ImportManager?
657# need __path__ processing
658# performance
659# move chaining to a subclass [gjs: it's been nuked]
660# deinstall should be possible
661# query mechanism needed: is a specific Importer installed?
662# py/pyc/pyo piping hooks to filter/process these files
663# wish list:
664# distutils importer hooked to list of standard Internet repositories
665# module->file location mapper to speed FS-based imports
666# relative imports
667# keep chaining so that it can play nice with other import hooks
668#
669# from Gordon:
670# push MAL's mapper into sys.path[0] as a cache (hard-coded for apps)
671#
672# from Guido:
673# need to change sys.* references for rexec environs
674# need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy
675# watch out for sys.modules[...] is None
676# flag to force absolute imports? (speeds _determine_import_context and
677# checking for a relative module)
678# insert names of archives into sys.path (see quote below)
679# note: reload does NOT blast module dict
680# shift import mechanisms and policies around; provide for hooks, overrides
681# (see quote below)
682# add get_source stuff
683# get_topcode and get_subcode
684# CRLF handling in _compile
685# race condition in _compile
686# refactoring of os.py to deal with _os_bootstrap problem
687# any special handling to do for importing a module with a SyntaxError?
688# (e.g. clean up the traceback)
689# implement "domain" for path-type functionality using pkg namespace
690# (rather than FS-names like __path__)
691# don't use the word "private"... maybe "internal"
692#
693#
694# Guido's comments on sys.path caching:
695#
696# We could cache this in a dictionary: the ImportManager can have a
697# cache dict mapping pathnames to importer objects, and a separate
698# method for coming up with an importer given a pathname that's not yet
699# in the cache. The method should do a stat and/or look at the
700# extension to decide which importer class to use; you can register new
701# importer classes by registering a suffix or a Boolean function, plus a
702# class. If you register a new importer class, the cache is zapped.
703# The cache is independent from sys.path (but maintained per
704# ImportManager instance) so that rearrangements of sys.path do the
705# right thing. If a path is dropped from sys.path the corresponding
706# cache entry is simply no longer used.
707#
708# My/Guido's comments on factoring ImportManager and Importer:
709#
710# > However, we still have a tension occurring here:
711# >
712# > 1) implementing policy in ImportManager assists in single-point policy
713# > changes for app/rexec situations
714# > 2) implementing policy in Importer assists in package-private policy
715# > changes for normal, operating conditions
716# >
717# > I'll see if I can sort out a way to do this. Maybe the Importer class will
718# > implement the methods (which can be overridden to change policy) by
719# > delegating to ImportManager.
720#
721# Maybe also think about what kind of policies an Importer would be
722# likely to want to change. I have a feeling that a lot of the code
723# there is actually not so much policy but a *necessity* to get things
724# working given the calling conventions for the __import__ hook: whether
725# to return the head or tail of a dotted name, or when to do the "finish
726# fromlist" stuff.
727#
Note: See TracBrowser for help on using the repository browser.