1 | #! /usr/bin/env python
|
---|
2 | # -*- coding: iso-8859-1 -*-
|
---|
3 | # Originally written by Barry Warsaw <barry@zope.com>
|
---|
4 | #
|
---|
5 | # Minimally patched to make it even more xgettext compatible
|
---|
6 | # by Peter Funk <pf@artcom-gmbh.de>
|
---|
7 | #
|
---|
8 | # 2002-11-22 Jürgen Hermann <jh@web.de>
|
---|
9 | # Added checks that _() only contains string literals, and
|
---|
10 | # command line args are resolved to module lists, i.e. you
|
---|
11 | # can now pass a filename, a module or package name, or a
|
---|
12 | # directory (including globbing chars, important for Win32).
|
---|
13 | # Made docstring fit in 80 chars wide displays using pydoc.
|
---|
14 | #
|
---|
15 |
|
---|
16 | # for selftesting
|
---|
17 | try:
|
---|
18 | import fintl
|
---|
19 | _ = fintl.gettext
|
---|
20 | except ImportError:
|
---|
21 | _ = lambda s: s
|
---|
22 |
|
---|
23 | __doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
---|
24 |
|
---|
25 | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
---|
26 | internationalization of C programs. Most of these tools are independent of
|
---|
27 | the programming language and can be used from within Python programs.
|
---|
28 | Martin von Loewis' work[1] helps considerably in this regard.
|
---|
29 |
|
---|
30 | There's one problem though; xgettext is the program that scans source code
|
---|
31 | looking for message strings, but it groks only C (or C++). Python
|
---|
32 | introduces a few wrinkles, such as dual quoting characters, triple quoted
|
---|
33 | strings, and raw strings. xgettext understands none of this.
|
---|
34 |
|
---|
35 | Enter pygettext, which uses Python's standard tokenize module to scan
|
---|
36 | Python source code, generating .pot files identical to what GNU xgettext[2]
|
---|
37 | generates for C and C++ code. From there, the standard GNU tools can be
|
---|
38 | used.
|
---|
39 |
|
---|
40 | A word about marking Python strings as candidates for translation. GNU
|
---|
41 | xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
---|
42 | and gettext_noop. But those can be a lot of text to include all over your
|
---|
43 | code. C and C++ have a trick: they use the C preprocessor. Most
|
---|
44 | internationalized C source includes a #define for gettext() to _() so that
|
---|
45 | what has to be written in the source is much less. Thus these are both
|
---|
46 | translatable strings:
|
---|
47 |
|
---|
48 | gettext("Translatable String")
|
---|
49 | _("Translatable String")
|
---|
50 |
|
---|
51 | Python of course has no preprocessor so this doesn't work so well. Thus,
|
---|
52 | pygettext searches only for _() by default, but see the -k/--keyword flag
|
---|
53 | below for how to augment this.
|
---|
54 |
|
---|
55 | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
---|
56 | [2] http://www.gnu.org/software/gettext/gettext.html
|
---|
57 |
|
---|
58 | NOTE: pygettext attempts to be option and feature compatible with GNU
|
---|
59 | xgettext where ever possible. However some options are still missing or are
|
---|
60 | not fully implemented. Also, xgettext's use of command line switches with
|
---|
61 | option arguments is broken, and in these cases, pygettext just defines
|
---|
62 | additional switches.
|
---|
63 |
|
---|
64 | Usage: pygettext [options] inputfile ...
|
---|
65 |
|
---|
66 | Options:
|
---|
67 |
|
---|
68 | -a
|
---|
69 | --extract-all
|
---|
70 | Extract all strings.
|
---|
71 |
|
---|
72 | -d name
|
---|
73 | --default-domain=name
|
---|
74 | Rename the default output file from messages.pot to name.pot.
|
---|
75 |
|
---|
76 | -E
|
---|
77 | --escape
|
---|
78 | Replace non-ASCII characters with octal escape sequences.
|
---|
79 |
|
---|
80 | -D
|
---|
81 | --docstrings
|
---|
82 | Extract module, class, method, and function docstrings. These do
|
---|
83 | not need to be wrapped in _() markers, and in fact cannot be for
|
---|
84 | Python to consider them docstrings. (See also the -X option).
|
---|
85 |
|
---|
86 | -h
|
---|
87 | --help
|
---|
88 | Print this help message and exit.
|
---|
89 |
|
---|
90 | -k word
|
---|
91 | --keyword=word
|
---|
92 | Keywords to look for in addition to the default set, which are:
|
---|
93 | %(DEFAULTKEYWORDS)s
|
---|
94 |
|
---|
95 | You can have multiple -k flags on the command line.
|
---|
96 |
|
---|
97 | -K
|
---|
98 | --no-default-keywords
|
---|
99 | Disable the default set of keywords (see above). Any keywords
|
---|
100 | explicitly added with the -k/--keyword option are still recognized.
|
---|
101 |
|
---|
102 | --no-location
|
---|
103 | Do not write filename/lineno location comments.
|
---|
104 |
|
---|
105 | -n
|
---|
106 | --add-location
|
---|
107 | Write filename/lineno location comments indicating where each
|
---|
108 | extracted string is found in the source. These lines appear before
|
---|
109 | each msgid. The style of comments is controlled by the -S/--style
|
---|
110 | option. This is the default.
|
---|
111 |
|
---|
112 | -o filename
|
---|
113 | --output=filename
|
---|
114 | Rename the default output file from messages.pot to filename. If
|
---|
115 | filename is `-' then the output is sent to standard out.
|
---|
116 |
|
---|
117 | -p dir
|
---|
118 | --output-dir=dir
|
---|
119 | Output files will be placed in directory dir.
|
---|
120 |
|
---|
121 | -S stylename
|
---|
122 | --style stylename
|
---|
123 | Specify which style to use for location comments. Two styles are
|
---|
124 | supported:
|
---|
125 |
|
---|
126 | Solaris # File: filename, line: line-number
|
---|
127 | GNU #: filename:line
|
---|
128 |
|
---|
129 | The style name is case insensitive. GNU style is the default.
|
---|
130 |
|
---|
131 | -v
|
---|
132 | --verbose
|
---|
133 | Print the names of the files being processed.
|
---|
134 |
|
---|
135 | -V
|
---|
136 | --version
|
---|
137 | Print the version of pygettext and exit.
|
---|
138 |
|
---|
139 | -w columns
|
---|
140 | --width=columns
|
---|
141 | Set width of output to columns.
|
---|
142 |
|
---|
143 | -x filename
|
---|
144 | --exclude-file=filename
|
---|
145 | Specify a file that contains a list of strings that are not be
|
---|
146 | extracted from the input files. Each string to be excluded must
|
---|
147 | appear on a line by itself in the file.
|
---|
148 |
|
---|
149 | -X filename
|
---|
150 | --no-docstrings=filename
|
---|
151 | Specify a file that contains a list of files (one per line) that
|
---|
152 | should not have their docstrings extracted. This is only useful in
|
---|
153 | conjunction with the -D option above.
|
---|
154 |
|
---|
155 | If `inputfile' is -, standard input is read.
|
---|
156 | """)
|
---|
157 |
|
---|
158 | import os
|
---|
159 | import imp
|
---|
160 | import sys
|
---|
161 | import glob
|
---|
162 | import time
|
---|
163 | import getopt
|
---|
164 | import token
|
---|
165 | import tokenize
|
---|
166 | import operator
|
---|
167 |
|
---|
168 | __version__ = '1.5'
|
---|
169 |
|
---|
170 | default_keywords = ['_']
|
---|
171 | DEFAULTKEYWORDS = ', '.join(default_keywords)
|
---|
172 |
|
---|
173 | EMPTYSTRING = ''
|
---|
174 |
|
---|
175 |
|
---|
176 | |
---|
177 |
|
---|
178 | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
---|
179 | # there.
|
---|
180 | pot_header = _('''\
|
---|
181 | # SOME DESCRIPTIVE TITLE.
|
---|
182 | # Copyright (C) YEAR ORGANIZATION
|
---|
183 | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
---|
184 | #
|
---|
185 | msgid ""
|
---|
186 | msgstr ""
|
---|
187 | "Project-Id-Version: PACKAGE VERSION\\n"
|
---|
188 | "POT-Creation-Date: %(time)s\\n"
|
---|
189 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
---|
190 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
---|
191 | "Language-Team: LANGUAGE <LL@li.org>\\n"
|
---|
192 | "MIME-Version: 1.0\\n"
|
---|
193 | "Content-Type: text/plain; charset=CHARSET\\n"
|
---|
194 | "Content-Transfer-Encoding: ENCODING\\n"
|
---|
195 | "Generated-By: pygettext.py %(version)s\\n"
|
---|
196 |
|
---|
197 | ''')
|
---|
198 |
|
---|
199 | |
---|
200 |
|
---|
201 | def usage(code, msg=''):
|
---|
202 | print >> sys.stderr, __doc__ % globals()
|
---|
203 | if msg:
|
---|
204 | print >> sys.stderr, msg
|
---|
205 | sys.exit(code)
|
---|
206 |
|
---|
207 |
|
---|
208 | |
---|
209 |
|
---|
210 | escapes = []
|
---|
211 |
|
---|
212 | def make_escapes(pass_iso8859):
|
---|
213 | global escapes
|
---|
214 | escapes = [chr(i) for i in range(256)]
|
---|
215 | if pass_iso8859:
|
---|
216 | # Allow iso-8859 characters to pass through so that e.g. 'msgid
|
---|
217 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
---|
218 | # escape any character outside the 32..126 range.
|
---|
219 | mod = 128
|
---|
220 | else:
|
---|
221 | mod = 256
|
---|
222 | for i in range(mod):
|
---|
223 | if not(32 <= i <= 126):
|
---|
224 | escapes[i] = "\\%03o" % i
|
---|
225 | escapes[ord('\\')] = '\\\\'
|
---|
226 | escapes[ord('\t')] = '\\t'
|
---|
227 | escapes[ord('\r')] = '\\r'
|
---|
228 | escapes[ord('\n')] = '\\n'
|
---|
229 | escapes[ord('\"')] = '\\"'
|
---|
230 |
|
---|
231 |
|
---|
232 | def escape(s):
|
---|
233 | global escapes
|
---|
234 | s = list(s)
|
---|
235 | for i in range(len(s)):
|
---|
236 | s[i] = escapes[ord(s[i])]
|
---|
237 | return EMPTYSTRING.join(s)
|
---|
238 |
|
---|
239 |
|
---|
240 | def safe_eval(s):
|
---|
241 | # unwrap quotes, safely
|
---|
242 | return eval(s, {'__builtins__':{}}, {})
|
---|
243 |
|
---|
244 |
|
---|
245 | def normalize(s):
|
---|
246 | # This converts the various Python string types into a format that is
|
---|
247 | # appropriate for .po files, namely much closer to C style.
|
---|
248 | lines = s.split('\n')
|
---|
249 | if len(lines) == 1:
|
---|
250 | s = '"' + escape(s) + '"'
|
---|
251 | else:
|
---|
252 | if not lines[-1]:
|
---|
253 | del lines[-1]
|
---|
254 | lines[-1] = lines[-1] + '\n'
|
---|
255 | for i in range(len(lines)):
|
---|
256 | lines[i] = escape(lines[i])
|
---|
257 | lineterm = '\\n"\n"'
|
---|
258 | s = '""\n"' + lineterm.join(lines) + '"'
|
---|
259 | return s
|
---|
260 |
|
---|
261 | |
---|
262 |
|
---|
263 | def containsAny(str, set):
|
---|
264 | """Check whether 'str' contains ANY of the chars in 'set'"""
|
---|
265 | return 1 in [c in str for c in set]
|
---|
266 |
|
---|
267 |
|
---|
268 | def _visit_pyfiles(list, dirname, names):
|
---|
269 | """Helper for getFilesForName()."""
|
---|
270 | # get extension for python source files
|
---|
271 | if not globals().has_key('_py_ext'):
|
---|
272 | global _py_ext
|
---|
273 | _py_ext = [triple[0] for triple in imp.get_suffixes()
|
---|
274 | if triple[2] == imp.PY_SOURCE][0]
|
---|
275 |
|
---|
276 | # don't recurse into CVS directories
|
---|
277 | if 'CVS' in names:
|
---|
278 | names.remove('CVS')
|
---|
279 |
|
---|
280 | # add all *.py files to list
|
---|
281 | list.extend(
|
---|
282 | [os.path.join(dirname, file) for file in names
|
---|
283 | if os.path.splitext(file)[1] == _py_ext]
|
---|
284 | )
|
---|
285 |
|
---|
286 |
|
---|
287 | def _get_modpkg_path(dotted_name, pathlist=None):
|
---|
288 | """Get the filesystem path for a module or a package.
|
---|
289 |
|
---|
290 | Return the file system path to a file for a module, and to a directory for
|
---|
291 | a package. Return None if the name is not found, or is a builtin or
|
---|
292 | extension module.
|
---|
293 | """
|
---|
294 | # split off top-most name
|
---|
295 | parts = dotted_name.split('.', 1)
|
---|
296 |
|
---|
297 | if len(parts) > 1:
|
---|
298 | # we have a dotted path, import top-level package
|
---|
299 | try:
|
---|
300 | file, pathname, description = imp.find_module(parts[0], pathlist)
|
---|
301 | if file: file.close()
|
---|
302 | except ImportError:
|
---|
303 | return None
|
---|
304 |
|
---|
305 | # check if it's indeed a package
|
---|
306 | if description[2] == imp.PKG_DIRECTORY:
|
---|
307 | # recursively handle the remaining name parts
|
---|
308 | pathname = _get_modpkg_path(parts[1], [pathname])
|
---|
309 | else:
|
---|
310 | pathname = None
|
---|
311 | else:
|
---|
312 | # plain name
|
---|
313 | try:
|
---|
314 | file, pathname, description = imp.find_module(
|
---|
315 | dotted_name, pathlist)
|
---|
316 | if file:
|
---|
317 | file.close()
|
---|
318 | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
---|
319 | pathname = None
|
---|
320 | except ImportError:
|
---|
321 | pathname = None
|
---|
322 |
|
---|
323 | return pathname
|
---|
324 |
|
---|
325 |
|
---|
326 | def getFilesForName(name):
|
---|
327 | """Get a list of module files for a filename, a module or package name,
|
---|
328 | or a directory.
|
---|
329 | """
|
---|
330 | if not os.path.exists(name):
|
---|
331 | # check for glob chars
|
---|
332 | if containsAny(name, "*?[]"):
|
---|
333 | files = glob.glob(name)
|
---|
334 | list = []
|
---|
335 | for file in files:
|
---|
336 | list.extend(getFilesForName(file))
|
---|
337 | return list
|
---|
338 |
|
---|
339 | # try to find module or package
|
---|
340 | name = _get_modpkg_path(name)
|
---|
341 | if not name:
|
---|
342 | return []
|
---|
343 |
|
---|
344 | if os.path.isdir(name):
|
---|
345 | # find all python files in directory
|
---|
346 | list = []
|
---|
347 | os.path.walk(name, _visit_pyfiles, list)
|
---|
348 | return list
|
---|
349 | elif os.path.exists(name):
|
---|
350 | # a single file
|
---|
351 | return [name]
|
---|
352 |
|
---|
353 | return []
|
---|
354 |
|
---|
355 | |
---|
356 |
|
---|
357 | class TokenEater:
|
---|
358 | def __init__(self, options):
|
---|
359 | self.__options = options
|
---|
360 | self.__messages = {}
|
---|
361 | self.__state = self.__waiting
|
---|
362 | self.__data = []
|
---|
363 | self.__lineno = -1
|
---|
364 | self.__freshmodule = 1
|
---|
365 | self.__curfile = None
|
---|
366 |
|
---|
367 | def __call__(self, ttype, tstring, stup, etup, line):
|
---|
368 | # dispatch
|
---|
369 | ## import token
|
---|
370 | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
---|
371 | ## 'tstring:', tstring
|
---|
372 | self.__state(ttype, tstring, stup[0])
|
---|
373 |
|
---|
374 | def __waiting(self, ttype, tstring, lineno):
|
---|
375 | opts = self.__options
|
---|
376 | # Do docstring extractions, if enabled
|
---|
377 | if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
---|
378 | # module docstring?
|
---|
379 | if self.__freshmodule:
|
---|
380 | if ttype == tokenize.STRING:
|
---|
381 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
---|
382 | self.__freshmodule = 0
|
---|
383 | elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
---|
384 | self.__freshmodule = 0
|
---|
385 | return
|
---|
386 | # class docstring?
|
---|
387 | if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
---|
388 | self.__state = self.__suiteseen
|
---|
389 | return
|
---|
390 | if ttype == tokenize.NAME and tstring in opts.keywords:
|
---|
391 | self.__state = self.__keywordseen
|
---|
392 |
|
---|
393 | def __suiteseen(self, ttype, tstring, lineno):
|
---|
394 | # ignore anything until we see the colon
|
---|
395 | if ttype == tokenize.OP and tstring == ':':
|
---|
396 | self.__state = self.__suitedocstring
|
---|
397 |
|
---|
398 | def __suitedocstring(self, ttype, tstring, lineno):
|
---|
399 | # ignore any intervening noise
|
---|
400 | if ttype == tokenize.STRING:
|
---|
401 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
---|
402 | self.__state = self.__waiting
|
---|
403 | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
---|
404 | tokenize.COMMENT):
|
---|
405 | # there was no class docstring
|
---|
406 | self.__state = self.__waiting
|
---|
407 |
|
---|
408 | def __keywordseen(self, ttype, tstring, lineno):
|
---|
409 | if ttype == tokenize.OP and tstring == '(':
|
---|
410 | self.__data = []
|
---|
411 | self.__lineno = lineno
|
---|
412 | self.__state = self.__openseen
|
---|
413 | else:
|
---|
414 | self.__state = self.__waiting
|
---|
415 |
|
---|
416 | def __openseen(self, ttype, tstring, lineno):
|
---|
417 | if ttype == tokenize.OP and tstring == ')':
|
---|
418 | # We've seen the last of the translatable strings. Record the
|
---|
419 | # line number of the first line of the strings and update the list
|
---|
420 | # of messages seen. Reset state for the next batch. If there
|
---|
421 | # were no strings inside _(), then just ignore this entry.
|
---|
422 | if self.__data:
|
---|
423 | self.__addentry(EMPTYSTRING.join(self.__data))
|
---|
424 | self.__state = self.__waiting
|
---|
425 | elif ttype == tokenize.STRING:
|
---|
426 | self.__data.append(safe_eval(tstring))
|
---|
427 | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
---|
428 | token.NEWLINE, tokenize.NL]:
|
---|
429 | # warn if we see anything else than STRING or whitespace
|
---|
430 | print >> sys.stderr, _(
|
---|
431 | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
---|
432 | ) % {
|
---|
433 | 'token': tstring,
|
---|
434 | 'file': self.__curfile,
|
---|
435 | 'lineno': self.__lineno
|
---|
436 | }
|
---|
437 | self.__state = self.__waiting
|
---|
438 |
|
---|
439 | def __addentry(self, msg, lineno=None, isdocstring=0):
|
---|
440 | if lineno is None:
|
---|
441 | lineno = self.__lineno
|
---|
442 | if not msg in self.__options.toexclude:
|
---|
443 | entry = (self.__curfile, lineno)
|
---|
444 | self.__messages.setdefault(msg, {})[entry] = isdocstring
|
---|
445 |
|
---|
446 | def set_filename(self, filename):
|
---|
447 | self.__curfile = filename
|
---|
448 | self.__freshmodule = 1
|
---|
449 |
|
---|
450 | def write(self, fp):
|
---|
451 | options = self.__options
|
---|
452 | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
---|
453 | # The time stamp in the header doesn't have the same format as that
|
---|
454 | # generated by xgettext...
|
---|
455 | print >> fp, pot_header % {'time': timestamp, 'version': __version__}
|
---|
456 | # Sort the entries. First sort each particular entry's keys, then
|
---|
457 | # sort all the entries by their first item.
|
---|
458 | reverse = {}
|
---|
459 | for k, v in self.__messages.items():
|
---|
460 | keys = v.keys()
|
---|
461 | keys.sort()
|
---|
462 | reverse.setdefault(tuple(keys), []).append((k, v))
|
---|
463 | rkeys = reverse.keys()
|
---|
464 | rkeys.sort()
|
---|
465 | for rkey in rkeys:
|
---|
466 | rentries = reverse[rkey]
|
---|
467 | rentries.sort()
|
---|
468 | for k, v in rentries:
|
---|
469 | isdocstring = 0
|
---|
470 | # If the entry was gleaned out of a docstring, then add a
|
---|
471 | # comment stating so. This is to aid translators who may wish
|
---|
472 | # to skip translating some unimportant docstrings.
|
---|
473 | if reduce(operator.__add__, v.values()):
|
---|
474 | isdocstring = 1
|
---|
475 | # k is the message string, v is a dictionary-set of (filename,
|
---|
476 | # lineno) tuples. We want to sort the entries in v first by
|
---|
477 | # file name and then by line number.
|
---|
478 | v = v.keys()
|
---|
479 | v.sort()
|
---|
480 | if not options.writelocations:
|
---|
481 | pass
|
---|
482 | # location comments are different b/w Solaris and GNU:
|
---|
483 | elif options.locationstyle == options.SOLARIS:
|
---|
484 | for filename, lineno in v:
|
---|
485 | d = {'filename': filename, 'lineno': lineno}
|
---|
486 | print >>fp, _(
|
---|
487 | '# File: %(filename)s, line: %(lineno)d') % d
|
---|
488 | elif options.locationstyle == options.GNU:
|
---|
489 | # fit as many locations on one line, as long as the
|
---|
490 | # resulting line length doesn't exceeds 'options.width'
|
---|
491 | locline = '#:'
|
---|
492 | for filename, lineno in v:
|
---|
493 | d = {'filename': filename, 'lineno': lineno}
|
---|
494 | s = _(' %(filename)s:%(lineno)d') % d
|
---|
495 | if len(locline) + len(s) <= options.width:
|
---|
496 | locline = locline + s
|
---|
497 | else:
|
---|
498 | print >> fp, locline
|
---|
499 | locline = "#:" + s
|
---|
500 | if len(locline) > 2:
|
---|
501 | print >> fp, locline
|
---|
502 | if isdocstring:
|
---|
503 | print >> fp, '#, docstring'
|
---|
504 | print >> fp, 'msgid', normalize(k)
|
---|
505 | print >> fp, 'msgstr ""\n'
|
---|
506 |
|
---|
507 |
|
---|
508 | |
---|
509 |
|
---|
510 | def main():
|
---|
511 | global default_keywords
|
---|
512 | try:
|
---|
513 | opts, args = getopt.getopt(
|
---|
514 | sys.argv[1:],
|
---|
515 | 'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
---|
516 | ['extract-all', 'default-domain=', 'escape', 'help',
|
---|
517 | 'keyword=', 'no-default-keywords',
|
---|
518 | 'add-location', 'no-location', 'output=', 'output-dir=',
|
---|
519 | 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
---|
520 | 'docstrings', 'no-docstrings',
|
---|
521 | ])
|
---|
522 | except getopt.error, msg:
|
---|
523 | usage(1, msg)
|
---|
524 |
|
---|
525 | # for holding option values
|
---|
526 | class Options:
|
---|
527 | # constants
|
---|
528 | GNU = 1
|
---|
529 | SOLARIS = 2
|
---|
530 | # defaults
|
---|
531 | extractall = 0 # FIXME: currently this option has no effect at all.
|
---|
532 | escape = 0
|
---|
533 | keywords = []
|
---|
534 | outpath = ''
|
---|
535 | outfile = 'messages.pot'
|
---|
536 | writelocations = 1
|
---|
537 | locationstyle = GNU
|
---|
538 | verbose = 0
|
---|
539 | width = 78
|
---|
540 | excludefilename = ''
|
---|
541 | docstrings = 0
|
---|
542 | nodocstrings = {}
|
---|
543 |
|
---|
544 | options = Options()
|
---|
545 | locations = {'gnu' : options.GNU,
|
---|
546 | 'solaris' : options.SOLARIS,
|
---|
547 | }
|
---|
548 |
|
---|
549 | # parse options
|
---|
550 | for opt, arg in opts:
|
---|
551 | if opt in ('-h', '--help'):
|
---|
552 | usage(0)
|
---|
553 | elif opt in ('-a', '--extract-all'):
|
---|
554 | options.extractall = 1
|
---|
555 | elif opt in ('-d', '--default-domain'):
|
---|
556 | options.outfile = arg + '.pot'
|
---|
557 | elif opt in ('-E', '--escape'):
|
---|
558 | options.escape = 1
|
---|
559 | elif opt in ('-D', '--docstrings'):
|
---|
560 | options.docstrings = 1
|
---|
561 | elif opt in ('-k', '--keyword'):
|
---|
562 | options.keywords.append(arg)
|
---|
563 | elif opt in ('-K', '--no-default-keywords'):
|
---|
564 | default_keywords = []
|
---|
565 | elif opt in ('-n', '--add-location'):
|
---|
566 | options.writelocations = 1
|
---|
567 | elif opt in ('--no-location',):
|
---|
568 | options.writelocations = 0
|
---|
569 | elif opt in ('-S', '--style'):
|
---|
570 | options.locationstyle = locations.get(arg.lower())
|
---|
571 | if options.locationstyle is None:
|
---|
572 | usage(1, _('Invalid value for --style: %s') % arg)
|
---|
573 | elif opt in ('-o', '--output'):
|
---|
574 | options.outfile = arg
|
---|
575 | elif opt in ('-p', '--output-dir'):
|
---|
576 | options.outpath = arg
|
---|
577 | elif opt in ('-v', '--verbose'):
|
---|
578 | options.verbose = 1
|
---|
579 | elif opt in ('-V', '--version'):
|
---|
580 | print _('pygettext.py (xgettext for Python) %s') % __version__
|
---|
581 | sys.exit(0)
|
---|
582 | elif opt in ('-w', '--width'):
|
---|
583 | try:
|
---|
584 | options.width = int(arg)
|
---|
585 | except ValueError:
|
---|
586 | usage(1, _('--width argument must be an integer: %s') % arg)
|
---|
587 | elif opt in ('-x', '--exclude-file'):
|
---|
588 | options.excludefilename = arg
|
---|
589 | elif opt in ('-X', '--no-docstrings'):
|
---|
590 | fp = open(arg)
|
---|
591 | try:
|
---|
592 | while 1:
|
---|
593 | line = fp.readline()
|
---|
594 | if not line:
|
---|
595 | break
|
---|
596 | options.nodocstrings[line[:-1]] = 1
|
---|
597 | finally:
|
---|
598 | fp.close()
|
---|
599 |
|
---|
600 | # calculate escapes
|
---|
601 | make_escapes(not options.escape)
|
---|
602 |
|
---|
603 | # calculate all keywords
|
---|
604 | options.keywords.extend(default_keywords)
|
---|
605 |
|
---|
606 | # initialize list of strings to exclude
|
---|
607 | if options.excludefilename:
|
---|
608 | try:
|
---|
609 | fp = open(options.excludefilename)
|
---|
610 | options.toexclude = fp.readlines()
|
---|
611 | fp.close()
|
---|
612 | except IOError:
|
---|
613 | print >> sys.stderr, _(
|
---|
614 | "Can't read --exclude-file: %s") % options.excludefilename
|
---|
615 | sys.exit(1)
|
---|
616 | else:
|
---|
617 | options.toexclude = []
|
---|
618 |
|
---|
619 | # resolve args to module lists
|
---|
620 | expanded = []
|
---|
621 | for arg in args:
|
---|
622 | if arg == '-':
|
---|
623 | expanded.append(arg)
|
---|
624 | else:
|
---|
625 | expanded.extend(getFilesForName(arg))
|
---|
626 | args = expanded
|
---|
627 |
|
---|
628 | # slurp through all the files
|
---|
629 | eater = TokenEater(options)
|
---|
630 | for filename in args:
|
---|
631 | if filename == '-':
|
---|
632 | if options.verbose:
|
---|
633 | print _('Reading standard input')
|
---|
634 | fp = sys.stdin
|
---|
635 | closep = 0
|
---|
636 | else:
|
---|
637 | if options.verbose:
|
---|
638 | print _('Working on %s') % filename
|
---|
639 | fp = open(filename)
|
---|
640 | closep = 1
|
---|
641 | try:
|
---|
642 | eater.set_filename(filename)
|
---|
643 | try:
|
---|
644 | tokenize.tokenize(fp.readline, eater)
|
---|
645 | except tokenize.TokenError, e:
|
---|
646 | print >> sys.stderr, '%s: %s, line %d, column %d' % (
|
---|
647 | e[0], filename, e[1][0], e[1][1])
|
---|
648 | finally:
|
---|
649 | if closep:
|
---|
650 | fp.close()
|
---|
651 |
|
---|
652 | # write the output
|
---|
653 | if options.outfile == '-':
|
---|
654 | fp = sys.stdout
|
---|
655 | closep = 0
|
---|
656 | else:
|
---|
657 | if options.outpath:
|
---|
658 | options.outfile = os.path.join(options.outpath, options.outfile)
|
---|
659 | fp = open(options.outfile, 'w')
|
---|
660 | closep = 1
|
---|
661 | try:
|
---|
662 | eater.write(fp)
|
---|
663 | finally:
|
---|
664 | if closep:
|
---|
665 | fp.close()
|
---|
666 |
|
---|
667 | |
---|
668 |
|
---|
669 | if __name__ == '__main__':
|
---|
670 | main()
|
---|
671 | # some more test strings
|
---|
672 | _(u'a unicode string')
|
---|
673 | # this one creates a warning
|
---|
674 | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
|
---|
675 | _('more' 'than' 'one' 'string')
|
---|