source: python/vendor/Python-2.7.6/Tools/i18n/pygettext.py

Last change on this file was 388, checked in by dmik, 11 years ago

python: Update vendor to 2.7.6.

  • Property svn:eol-style set to native
File size: 21.6 KB
Line 
1#! /usr/bin/env python
2# -*- coding: iso-8859-1 -*-
3# Originally written by Barry Warsaw <barry@zope.com>
4#
5# Minimally patched to make it even more xgettext compatible
6# by Peter Funk <pf@artcom-gmbh.de>
7#
8# 2002-11-22 Jürgen Hermann <jh@web.de>
9# Added checks that _() only contains string literals, and
10# command line args are resolved to module lists, i.e. you
11# can now pass a filename, a module or package name, or a
12# directory (including globbing chars, important for Win32).
13# Made docstring fit in 80 chars wide displays using pydoc.
14#
15
16# for selftesting
17try:
18 import fintl
19 _ = fintl.gettext
20except ImportError:
21 _ = lambda s: s
22
23__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
24
25Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
26internationalization of C programs. Most of these tools are independent of
27the programming language and can be used from within Python programs.
28Martin von Loewis' work[1] helps considerably in this regard.
29
30There's one problem though; xgettext is the program that scans source code
31looking for message strings, but it groks only C (or C++). Python
32introduces a few wrinkles, such as dual quoting characters, triple quoted
33strings, and raw strings. xgettext understands none of this.
34
35Enter pygettext, which uses Python's standard tokenize module to scan
36Python source code, generating .pot files identical to what GNU xgettext[2]
37generates for C and C++ code. From there, the standard GNU tools can be
38used.
39
40A word about marking Python strings as candidates for translation. GNU
41xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
42and gettext_noop. But those can be a lot of text to include all over your
43code. C and C++ have a trick: they use the C preprocessor. Most
44internationalized C source includes a #define for gettext() to _() so that
45what has to be written in the source is much less. Thus these are both
46translatable strings:
47
48 gettext("Translatable String")
49 _("Translatable String")
50
51Python of course has no preprocessor so this doesn't work so well. Thus,
52pygettext searches only for _() by default, but see the -k/--keyword flag
53below for how to augment this.
54
55 [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
56 [2] http://www.gnu.org/software/gettext/gettext.html
57
58NOTE: pygettext attempts to be option and feature compatible with GNU
59xgettext where ever possible. However some options are still missing or are
60not fully implemented. Also, xgettext's use of command line switches with
61option arguments is broken, and in these cases, pygettext just defines
62additional switches.
63
64Usage: pygettext [options] inputfile ...
65
66Options:
67
68 -a
69 --extract-all
70 Extract all strings.
71
72 -d name
73 --default-domain=name
74 Rename the default output file from messages.pot to name.pot.
75
76 -E
77 --escape
78 Replace non-ASCII characters with octal escape sequences.
79
80 -D
81 --docstrings
82 Extract module, class, method, and function docstrings. These do
83 not need to be wrapped in _() markers, and in fact cannot be for
84 Python to consider them docstrings. (See also the -X option).
85
86 -h
87 --help
88 Print this help message and exit.
89
90 -k word
91 --keyword=word
92 Keywords to look for in addition to the default set, which are:
93 %(DEFAULTKEYWORDS)s
94
95 You can have multiple -k flags on the command line.
96
97 -K
98 --no-default-keywords
99 Disable the default set of keywords (see above). Any keywords
100 explicitly added with the -k/--keyword option are still recognized.
101
102 --no-location
103 Do not write filename/lineno location comments.
104
105 -n
106 --add-location
107 Write filename/lineno location comments indicating where each
108 extracted string is found in the source. These lines appear before
109 each msgid. The style of comments is controlled by the -S/--style
110 option. This is the default.
111
112 -o filename
113 --output=filename
114 Rename the default output file from messages.pot to filename. If
115 filename is `-' then the output is sent to standard out.
116
117 -p dir
118 --output-dir=dir
119 Output files will be placed in directory dir.
120
121 -S stylename
122 --style stylename
123 Specify which style to use for location comments. Two styles are
124 supported:
125
126 Solaris # File: filename, line: line-number
127 GNU #: filename:line
128
129 The style name is case insensitive. GNU style is the default.
130
131 -v
132 --verbose
133 Print the names of the files being processed.
134
135 -V
136 --version
137 Print the version of pygettext and exit.
138
139 -w columns
140 --width=columns
141 Set width of output to columns.
142
143 -x filename
144 --exclude-file=filename
145 Specify a file that contains a list of strings that are not be
146 extracted from the input files. Each string to be excluded must
147 appear on a line by itself in the file.
148
149 -X filename
150 --no-docstrings=filename
151 Specify a file that contains a list of files (one per line) that
152 should not have their docstrings extracted. This is only useful in
153 conjunction with the -D option above.
154
155If `inputfile' is -, standard input is read.
156""")
157
158import os
159import imp
160import sys
161import glob
162import time
163import getopt
164import token
165import tokenize
166import operator
167
168__version__ = '1.5'
169
170default_keywords = ['_']
171DEFAULTKEYWORDS = ', '.join(default_keywords)
172
173EMPTYSTRING = ''
174
175
176
177
178# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
179# there.
180pot_header = _('''\
181# SOME DESCRIPTIVE TITLE.
182# Copyright (C) YEAR ORGANIZATION
183# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
184#
185msgid ""
186msgstr ""
187"Project-Id-Version: PACKAGE VERSION\\n"
188"POT-Creation-Date: %(time)s\\n"
189"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
190"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
191"Language-Team: LANGUAGE <LL@li.org>\\n"
192"MIME-Version: 1.0\\n"
193"Content-Type: text/plain; charset=CHARSET\\n"
194"Content-Transfer-Encoding: ENCODING\\n"
195"Generated-By: pygettext.py %(version)s\\n"
196
197''')
198
199
200
201def usage(code, msg=''):
202 print >> sys.stderr, __doc__ % globals()
203 if msg:
204 print >> sys.stderr, msg
205 sys.exit(code)
206
207
208
209
210escapes = []
211
212def make_escapes(pass_iso8859):
213 global escapes
214 escapes = [chr(i) for i in range(256)]
215 if pass_iso8859:
216 # Allow iso-8859 characters to pass through so that e.g. 'msgid
217 # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
218 # escape any character outside the 32..126 range.
219 mod = 128
220 else:
221 mod = 256
222 for i in range(mod):
223 if not(32 <= i <= 126):
224 escapes[i] = "\\%03o" % i
225 escapes[ord('\\')] = '\\\\'
226 escapes[ord('\t')] = '\\t'
227 escapes[ord('\r')] = '\\r'
228 escapes[ord('\n')] = '\\n'
229 escapes[ord('\"')] = '\\"'
230
231
232def escape(s):
233 global escapes
234 s = list(s)
235 for i in range(len(s)):
236 s[i] = escapes[ord(s[i])]
237 return EMPTYSTRING.join(s)
238
239
240def safe_eval(s):
241 # unwrap quotes, safely
242 return eval(s, {'__builtins__':{}}, {})
243
244
245def normalize(s):
246 # This converts the various Python string types into a format that is
247 # appropriate for .po files, namely much closer to C style.
248 lines = s.split('\n')
249 if len(lines) == 1:
250 s = '"' + escape(s) + '"'
251 else:
252 if not lines[-1]:
253 del lines[-1]
254 lines[-1] = lines[-1] + '\n'
255 for i in range(len(lines)):
256 lines[i] = escape(lines[i])
257 lineterm = '\\n"\n"'
258 s = '""\n"' + lineterm.join(lines) + '"'
259 return s
260
261
262
263def containsAny(str, set):
264 """Check whether 'str' contains ANY of the chars in 'set'"""
265 return 1 in [c in str for c in set]
266
267
268def _visit_pyfiles(list, dirname, names):
269 """Helper for getFilesForName()."""
270 # get extension for python source files
271 if not globals().has_key('_py_ext'):
272 global _py_ext
273 _py_ext = [triple[0] for triple in imp.get_suffixes()
274 if triple[2] == imp.PY_SOURCE][0]
275
276 # don't recurse into CVS directories
277 if 'CVS' in names:
278 names.remove('CVS')
279
280 # add all *.py files to list
281 list.extend(
282 [os.path.join(dirname, file) for file in names
283 if os.path.splitext(file)[1] == _py_ext]
284 )
285
286
287def _get_modpkg_path(dotted_name, pathlist=None):
288 """Get the filesystem path for a module or a package.
289
290 Return the file system path to a file for a module, and to a directory for
291 a package. Return None if the name is not found, or is a builtin or
292 extension module.
293 """
294 # split off top-most name
295 parts = dotted_name.split('.', 1)
296
297 if len(parts) > 1:
298 # we have a dotted path, import top-level package
299 try:
300 file, pathname, description = imp.find_module(parts[0], pathlist)
301 if file: file.close()
302 except ImportError:
303 return None
304
305 # check if it's indeed a package
306 if description[2] == imp.PKG_DIRECTORY:
307 # recursively handle the remaining name parts
308 pathname = _get_modpkg_path(parts[1], [pathname])
309 else:
310 pathname = None
311 else:
312 # plain name
313 try:
314 file, pathname, description = imp.find_module(
315 dotted_name, pathlist)
316 if file:
317 file.close()
318 if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
319 pathname = None
320 except ImportError:
321 pathname = None
322
323 return pathname
324
325
326def getFilesForName(name):
327 """Get a list of module files for a filename, a module or package name,
328 or a directory.
329 """
330 if not os.path.exists(name):
331 # check for glob chars
332 if containsAny(name, "*?[]"):
333 files = glob.glob(name)
334 list = []
335 for file in files:
336 list.extend(getFilesForName(file))
337 return list
338
339 # try to find module or package
340 name = _get_modpkg_path(name)
341 if not name:
342 return []
343
344 if os.path.isdir(name):
345 # find all python files in directory
346 list = []
347 os.path.walk(name, _visit_pyfiles, list)
348 return list
349 elif os.path.exists(name):
350 # a single file
351 return [name]
352
353 return []
354
355
356
357class TokenEater:
358 def __init__(self, options):
359 self.__options = options
360 self.__messages = {}
361 self.__state = self.__waiting
362 self.__data = []
363 self.__lineno = -1
364 self.__freshmodule = 1
365 self.__curfile = None
366
367 def __call__(self, ttype, tstring, stup, etup, line):
368 # dispatch
369## import token
370## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
371## 'tstring:', tstring
372 self.__state(ttype, tstring, stup[0])
373
374 def __waiting(self, ttype, tstring, lineno):
375 opts = self.__options
376 # Do docstring extractions, if enabled
377 if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
378 # module docstring?
379 if self.__freshmodule:
380 if ttype == tokenize.STRING:
381 self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
382 self.__freshmodule = 0
383 elif ttype not in (tokenize.COMMENT, tokenize.NL):
384 self.__freshmodule = 0
385 return
386 # class docstring?
387 if ttype == tokenize.NAME and tstring in ('class', 'def'):
388 self.__state = self.__suiteseen
389 return
390 if ttype == tokenize.NAME and tstring in opts.keywords:
391 self.__state = self.__keywordseen
392
393 def __suiteseen(self, ttype, tstring, lineno):
394 # ignore anything until we see the colon
395 if ttype == tokenize.OP and tstring == ':':
396 self.__state = self.__suitedocstring
397
398 def __suitedocstring(self, ttype, tstring, lineno):
399 # ignore any intervening noise
400 if ttype == tokenize.STRING:
401 self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
402 self.__state = self.__waiting
403 elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
404 tokenize.COMMENT):
405 # there was no class docstring
406 self.__state = self.__waiting
407
408 def __keywordseen(self, ttype, tstring, lineno):
409 if ttype == tokenize.OP and tstring == '(':
410 self.__data = []
411 self.__lineno = lineno
412 self.__state = self.__openseen
413 else:
414 self.__state = self.__waiting
415
416 def __openseen(self, ttype, tstring, lineno):
417 if ttype == tokenize.OP and tstring == ')':
418 # We've seen the last of the translatable strings. Record the
419 # line number of the first line of the strings and update the list
420 # of messages seen. Reset state for the next batch. If there
421 # were no strings inside _(), then just ignore this entry.
422 if self.__data:
423 self.__addentry(EMPTYSTRING.join(self.__data))
424 self.__state = self.__waiting
425 elif ttype == tokenize.STRING:
426 self.__data.append(safe_eval(tstring))
427 elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
428 token.NEWLINE, tokenize.NL]:
429 # warn if we see anything else than STRING or whitespace
430 print >> sys.stderr, _(
431 '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
432 ) % {
433 'token': tstring,
434 'file': self.__curfile,
435 'lineno': self.__lineno
436 }
437 self.__state = self.__waiting
438
439 def __addentry(self, msg, lineno=None, isdocstring=0):
440 if lineno is None:
441 lineno = self.__lineno
442 if not msg in self.__options.toexclude:
443 entry = (self.__curfile, lineno)
444 self.__messages.setdefault(msg, {})[entry] = isdocstring
445
446 def set_filename(self, filename):
447 self.__curfile = filename
448 self.__freshmodule = 1
449
450 def write(self, fp):
451 options = self.__options
452 timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
453 # The time stamp in the header doesn't have the same format as that
454 # generated by xgettext...
455 print >> fp, pot_header % {'time': timestamp, 'version': __version__}
456 # Sort the entries. First sort each particular entry's keys, then
457 # sort all the entries by their first item.
458 reverse = {}
459 for k, v in self.__messages.items():
460 keys = v.keys()
461 keys.sort()
462 reverse.setdefault(tuple(keys), []).append((k, v))
463 rkeys = reverse.keys()
464 rkeys.sort()
465 for rkey in rkeys:
466 rentries = reverse[rkey]
467 rentries.sort()
468 for k, v in rentries:
469 isdocstring = 0
470 # If the entry was gleaned out of a docstring, then add a
471 # comment stating so. This is to aid translators who may wish
472 # to skip translating some unimportant docstrings.
473 if reduce(operator.__add__, v.values()):
474 isdocstring = 1
475 # k is the message string, v is a dictionary-set of (filename,
476 # lineno) tuples. We want to sort the entries in v first by
477 # file name and then by line number.
478 v = v.keys()
479 v.sort()
480 if not options.writelocations:
481 pass
482 # location comments are different b/w Solaris and GNU:
483 elif options.locationstyle == options.SOLARIS:
484 for filename, lineno in v:
485 d = {'filename': filename, 'lineno': lineno}
486 print >>fp, _(
487 '# File: %(filename)s, line: %(lineno)d') % d
488 elif options.locationstyle == options.GNU:
489 # fit as many locations on one line, as long as the
490 # resulting line length doesn't exceeds 'options.width'
491 locline = '#:'
492 for filename, lineno in v:
493 d = {'filename': filename, 'lineno': lineno}
494 s = _(' %(filename)s:%(lineno)d') % d
495 if len(locline) + len(s) <= options.width:
496 locline = locline + s
497 else:
498 print >> fp, locline
499 locline = "#:" + s
500 if len(locline) > 2:
501 print >> fp, locline
502 if isdocstring:
503 print >> fp, '#, docstring'
504 print >> fp, 'msgid', normalize(k)
505 print >> fp, 'msgstr ""\n'
506
507
508
509
510def main():
511 global default_keywords
512 try:
513 opts, args = getopt.getopt(
514 sys.argv[1:],
515 'ad:DEhk:Kno:p:S:Vvw:x:X:',
516 ['extract-all', 'default-domain=', 'escape', 'help',
517 'keyword=', 'no-default-keywords',
518 'add-location', 'no-location', 'output=', 'output-dir=',
519 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
520 'docstrings', 'no-docstrings',
521 ])
522 except getopt.error, msg:
523 usage(1, msg)
524
525 # for holding option values
526 class Options:
527 # constants
528 GNU = 1
529 SOLARIS = 2
530 # defaults
531 extractall = 0 # FIXME: currently this option has no effect at all.
532 escape = 0
533 keywords = []
534 outpath = ''
535 outfile = 'messages.pot'
536 writelocations = 1
537 locationstyle = GNU
538 verbose = 0
539 width = 78
540 excludefilename = ''
541 docstrings = 0
542 nodocstrings = {}
543
544 options = Options()
545 locations = {'gnu' : options.GNU,
546 'solaris' : options.SOLARIS,
547 }
548
549 # parse options
550 for opt, arg in opts:
551 if opt in ('-h', '--help'):
552 usage(0)
553 elif opt in ('-a', '--extract-all'):
554 options.extractall = 1
555 elif opt in ('-d', '--default-domain'):
556 options.outfile = arg + '.pot'
557 elif opt in ('-E', '--escape'):
558 options.escape = 1
559 elif opt in ('-D', '--docstrings'):
560 options.docstrings = 1
561 elif opt in ('-k', '--keyword'):
562 options.keywords.append(arg)
563 elif opt in ('-K', '--no-default-keywords'):
564 default_keywords = []
565 elif opt in ('-n', '--add-location'):
566 options.writelocations = 1
567 elif opt in ('--no-location',):
568 options.writelocations = 0
569 elif opt in ('-S', '--style'):
570 options.locationstyle = locations.get(arg.lower())
571 if options.locationstyle is None:
572 usage(1, _('Invalid value for --style: %s') % arg)
573 elif opt in ('-o', '--output'):
574 options.outfile = arg
575 elif opt in ('-p', '--output-dir'):
576 options.outpath = arg
577 elif opt in ('-v', '--verbose'):
578 options.verbose = 1
579 elif opt in ('-V', '--version'):
580 print _('pygettext.py (xgettext for Python) %s') % __version__
581 sys.exit(0)
582 elif opt in ('-w', '--width'):
583 try:
584 options.width = int(arg)
585 except ValueError:
586 usage(1, _('--width argument must be an integer: %s') % arg)
587 elif opt in ('-x', '--exclude-file'):
588 options.excludefilename = arg
589 elif opt in ('-X', '--no-docstrings'):
590 fp = open(arg)
591 try:
592 while 1:
593 line = fp.readline()
594 if not line:
595 break
596 options.nodocstrings[line[:-1]] = 1
597 finally:
598 fp.close()
599
600 # calculate escapes
601 make_escapes(not options.escape)
602
603 # calculate all keywords
604 options.keywords.extend(default_keywords)
605
606 # initialize list of strings to exclude
607 if options.excludefilename:
608 try:
609 fp = open(options.excludefilename)
610 options.toexclude = fp.readlines()
611 fp.close()
612 except IOError:
613 print >> sys.stderr, _(
614 "Can't read --exclude-file: %s") % options.excludefilename
615 sys.exit(1)
616 else:
617 options.toexclude = []
618
619 # resolve args to module lists
620 expanded = []
621 for arg in args:
622 if arg == '-':
623 expanded.append(arg)
624 else:
625 expanded.extend(getFilesForName(arg))
626 args = expanded
627
628 # slurp through all the files
629 eater = TokenEater(options)
630 for filename in args:
631 if filename == '-':
632 if options.verbose:
633 print _('Reading standard input')
634 fp = sys.stdin
635 closep = 0
636 else:
637 if options.verbose:
638 print _('Working on %s') % filename
639 fp = open(filename)
640 closep = 1
641 try:
642 eater.set_filename(filename)
643 try:
644 tokenize.tokenize(fp.readline, eater)
645 except tokenize.TokenError, e:
646 print >> sys.stderr, '%s: %s, line %d, column %d' % (
647 e[0], filename, e[1][0], e[1][1])
648 finally:
649 if closep:
650 fp.close()
651
652 # write the output
653 if options.outfile == '-':
654 fp = sys.stdout
655 closep = 0
656 else:
657 if options.outpath:
658 options.outfile = os.path.join(options.outpath, options.outfile)
659 fp = open(options.outfile, 'w')
660 closep = 1
661 try:
662 eater.write(fp)
663 finally:
664 if closep:
665 fp.close()
666
667
668
669if __name__ == '__main__':
670 main()
671 # some more test strings
672 _(u'a unicode string')
673 # this one creates a warning
674 _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
675 _('more' 'than' 'one' 'string')
Note: See TracBrowser for help on using the repository browser.