1 | #! /usr/bin/env python
|
---|
2 | # -*- coding: iso-8859-1 -*-
|
---|
3 | # Originally written by Barry Warsaw <barry@zope.com>
|
---|
4 | #
|
---|
5 | # Minimally patched to make it even more xgettext compatible
|
---|
6 | # by Peter Funk <pf@artcom-gmbh.de>
|
---|
7 | #
|
---|
8 | # 2002-11-22 Jürgen Hermann <jh@web.de>
|
---|
9 | # Added checks that _() only contains string literals, and
|
---|
10 | # command line args are resolved to module lists, i.e. you
|
---|
11 | # can now pass a filename, a module or package name, or a
|
---|
12 | # directory (including globbing chars, important for Win32).
|
---|
13 | # Made docstring fit in 80 chars wide displays using pydoc.
|
---|
14 | #
|
---|
15 |
|
---|
16 | # for selftesting
|
---|
17 | try:
|
---|
18 | import fintl
|
---|
19 | _ = fintl.gettext
|
---|
20 | except ImportError:
|
---|
21 | _ = lambda s: s
|
---|
22 |
|
---|
23 | __doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
---|
24 |
|
---|
25 | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
---|
26 | internationalization of C programs. Most of these tools are independent of
|
---|
27 | the programming language and can be used from within Python programs.
|
---|
28 | Martin von Loewis' work[1] helps considerably in this regard.
|
---|
29 |
|
---|
30 | There's one problem though; xgettext is the program that scans source code
|
---|
31 | looking for message strings, but it groks only C (or C++). Python
|
---|
32 | introduces a few wrinkles, such as dual quoting characters, triple quoted
|
---|
33 | strings, and raw strings. xgettext understands none of this.
|
---|
34 |
|
---|
35 | Enter pygettext, which uses Python's standard tokenize module to scan
|
---|
36 | Python source code, generating .pot files identical to what GNU xgettext[2]
|
---|
37 | generates for C and C++ code. From there, the standard GNU tools can be
|
---|
38 | used.
|
---|
39 |
|
---|
40 | A word about marking Python strings as candidates for translation. GNU
|
---|
41 | xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
---|
42 | and gettext_noop. But those can be a lot of text to include all over your
|
---|
43 | code. C and C++ have a trick: they use the C preprocessor. Most
|
---|
44 | internationalized C source includes a #define for gettext() to _() so that
|
---|
45 | what has to be written in the source is much less. Thus these are both
|
---|
46 | translatable strings:
|
---|
47 |
|
---|
48 | gettext("Translatable String")
|
---|
49 | _("Translatable String")
|
---|
50 |
|
---|
51 | Python of course has no preprocessor so this doesn't work so well. Thus,
|
---|
52 | pygettext searches only for _() by default, but see the -k/--keyword flag
|
---|
53 | below for how to augment this.
|
---|
54 |
|
---|
55 | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
---|
56 | [2] http://www.gnu.org/software/gettext/gettext.html
|
---|
57 |
|
---|
58 | NOTE: pygettext attempts to be option and feature compatible with GNU
|
---|
59 | xgettext where ever possible. However some options are still missing or are
|
---|
60 | not fully implemented. Also, xgettext's use of command line switches with
|
---|
61 | option arguments is broken, and in these cases, pygettext just defines
|
---|
62 | additional switches.
|
---|
63 |
|
---|
64 | Usage: pygettext [options] inputfile ...
|
---|
65 |
|
---|
66 | Options:
|
---|
67 |
|
---|
68 | -a
|
---|
69 | --extract-all
|
---|
70 | Extract all strings.
|
---|
71 |
|
---|
72 | -d name
|
---|
73 | --default-domain=name
|
---|
74 | Rename the default output file from messages.pot to name.pot.
|
---|
75 |
|
---|
76 | -E
|
---|
77 | --escape
|
---|
78 | Replace non-ASCII characters with octal escape sequences.
|
---|
79 |
|
---|
80 | -D
|
---|
81 | --docstrings
|
---|
82 | Extract module, class, method, and function docstrings. These do
|
---|
83 | not need to be wrapped in _() markers, and in fact cannot be for
|
---|
84 | Python to consider them docstrings. (See also the -X option).
|
---|
85 |
|
---|
86 | -h
|
---|
87 | --help
|
---|
88 | Print this help message and exit.
|
---|
89 |
|
---|
90 | -k word
|
---|
91 | --keyword=word
|
---|
92 | Keywords to look for in addition to the default set, which are:
|
---|
93 | %(DEFAULTKEYWORDS)s
|
---|
94 |
|
---|
95 | You can have multiple -k flags on the command line.
|
---|
96 |
|
---|
97 | -K
|
---|
98 | --no-default-keywords
|
---|
99 | Disable the default set of keywords (see above). Any keywords
|
---|
100 | explicitly added with the -k/--keyword option are still recognized.
|
---|
101 |
|
---|
102 | --no-location
|
---|
103 | Do not write filename/lineno location comments.
|
---|
104 |
|
---|
105 | -n
|
---|
106 | --add-location
|
---|
107 | Write filename/lineno location comments indicating where each
|
---|
108 | extracted string is found in the source. These lines appear before
|
---|
109 | each msgid. The style of comments is controlled by the -S/--style
|
---|
110 | option. This is the default.
|
---|
111 |
|
---|
112 | -o filename
|
---|
113 | --output=filename
|
---|
114 | Rename the default output file from messages.pot to filename. If
|
---|
115 | filename is `-' then the output is sent to standard out.
|
---|
116 |
|
---|
117 | -p dir
|
---|
118 | --output-dir=dir
|
---|
119 | Output files will be placed in directory dir.
|
---|
120 |
|
---|
121 | -S stylename
|
---|
122 | --style stylename
|
---|
123 | Specify which style to use for location comments. Two styles are
|
---|
124 | supported:
|
---|
125 |
|
---|
126 | Solaris # File: filename, line: line-number
|
---|
127 | GNU #: filename:line
|
---|
128 |
|
---|
129 | The style name is case insensitive. GNU style is the default.
|
---|
130 |
|
---|
131 | -v
|
---|
132 | --verbose
|
---|
133 | Print the names of the files being processed.
|
---|
134 |
|
---|
135 | -V
|
---|
136 | --version
|
---|
137 | Print the version of pygettext and exit.
|
---|
138 |
|
---|
139 | -w columns
|
---|
140 | --width=columns
|
---|
141 | Set width of output to columns.
|
---|
142 |
|
---|
143 | -x filename
|
---|
144 | --exclude-file=filename
|
---|
145 | Specify a file that contains a list of strings that are not be
|
---|
146 | extracted from the input files. Each string to be excluded must
|
---|
147 | appear on a line by itself in the file.
|
---|
148 |
|
---|
149 | -X filename
|
---|
150 | --no-docstrings=filename
|
---|
151 | Specify a file that contains a list of files (one per line) that
|
---|
152 | should not have their docstrings extracted. This is only useful in
|
---|
153 | conjunction with the -D option above.
|
---|
154 |
|
---|
155 | If `inputfile' is -, standard input is read.
|
---|
156 | """)
|
---|
157 |
|
---|
158 | import os
|
---|
159 | import imp
|
---|
160 | import sys
|
---|
161 | import glob
|
---|
162 | import time
|
---|
163 | import getopt
|
---|
164 | import token
|
---|
165 | import tokenize
|
---|
166 | import operator
|
---|
167 |
|
---|
168 | __version__ = '1.5'
|
---|
169 |
|
---|
170 | default_keywords = ['_']
|
---|
171 | DEFAULTKEYWORDS = ', '.join(default_keywords)
|
---|
172 |
|
---|
173 | EMPTYSTRING = ''
|
---|
174 |
|
---|
175 |
|
---|
176 | |
---|
177 |
|
---|
178 | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
---|
179 | # there.
|
---|
180 | pot_header = _('''\
|
---|
181 | # SOME DESCRIPTIVE TITLE.
|
---|
182 | # Copyright (C) YEAR ORGANIZATION
|
---|
183 | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
---|
184 | #
|
---|
185 | msgid ""
|
---|
186 | msgstr ""
|
---|
187 | "Project-Id-Version: PACKAGE VERSION\\n"
|
---|
188 | "POT-Creation-Date: %(time)s\\n"
|
---|
189 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
---|
190 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
---|
191 | "Language-Team: LANGUAGE <LL@li.org>\\n"
|
---|
192 | "MIME-Version: 1.0\\n"
|
---|
193 | "Content-Type: text/plain; charset=CHARSET\\n"
|
---|
194 | "Content-Transfer-Encoding: ENCODING\\n"
|
---|
195 | "Generated-By: pygettext.py %(version)s\\n"
|
---|
196 |
|
---|
197 | ''')
|
---|
198 |
|
---|
199 | |
---|
200 |
|
---|
201 | def usage(code, msg=''):
|
---|
202 | print >> sys.stderr, __doc__ % globals()
|
---|
203 | if msg:
|
---|
204 | print >> sys.stderr, msg
|
---|
205 | sys.exit(code)
|
---|
206 |
|
---|
207 |
|
---|
208 | |
---|
209 |
|
---|
210 | escapes = []
|
---|
211 |
|
---|
212 | def make_escapes(pass_iso8859):
|
---|
213 | global escapes
|
---|
214 | if pass_iso8859:
|
---|
215 | # Allow iso-8859 characters to pass through so that e.g. 'msgid
|
---|
216 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
---|
217 | # escape any character outside the 32..126 range.
|
---|
218 | mod = 128
|
---|
219 | else:
|
---|
220 | mod = 256
|
---|
221 | for i in range(256):
|
---|
222 | if 32 <= (i % mod) <= 126:
|
---|
223 | escapes.append(chr(i))
|
---|
224 | else:
|
---|
225 | escapes.append("\\%03o" % i)
|
---|
226 | escapes[ord('\\')] = '\\\\'
|
---|
227 | escapes[ord('\t')] = '\\t'
|
---|
228 | escapes[ord('\r')] = '\\r'
|
---|
229 | escapes[ord('\n')] = '\\n'
|
---|
230 | escapes[ord('\"')] = '\\"'
|
---|
231 |
|
---|
232 |
|
---|
233 | def escape(s):
|
---|
234 | global escapes
|
---|
235 | s = list(s)
|
---|
236 | for i in range(len(s)):
|
---|
237 | s[i] = escapes[ord(s[i])]
|
---|
238 | return EMPTYSTRING.join(s)
|
---|
239 |
|
---|
240 |
|
---|
241 | def safe_eval(s):
|
---|
242 | # unwrap quotes, safely
|
---|
243 | return eval(s, {'__builtins__':{}}, {})
|
---|
244 |
|
---|
245 |
|
---|
246 | def normalize(s):
|
---|
247 | # This converts the various Python string types into a format that is
|
---|
248 | # appropriate for .po files, namely much closer to C style.
|
---|
249 | lines = s.split('\n')
|
---|
250 | if len(lines) == 1:
|
---|
251 | s = '"' + escape(s) + '"'
|
---|
252 | else:
|
---|
253 | if not lines[-1]:
|
---|
254 | del lines[-1]
|
---|
255 | lines[-1] = lines[-1] + '\n'
|
---|
256 | for i in range(len(lines)):
|
---|
257 | lines[i] = escape(lines[i])
|
---|
258 | lineterm = '\\n"\n"'
|
---|
259 | s = '""\n"' + lineterm.join(lines) + '"'
|
---|
260 | return s
|
---|
261 |
|
---|
262 | |
---|
263 |
|
---|
264 | def containsAny(str, set):
|
---|
265 | """Check whether 'str' contains ANY of the chars in 'set'"""
|
---|
266 | return 1 in [c in str for c in set]
|
---|
267 |
|
---|
268 |
|
---|
269 | def _visit_pyfiles(list, dirname, names):
|
---|
270 | """Helper for getFilesForName()."""
|
---|
271 | # get extension for python source files
|
---|
272 | if not globals().has_key('_py_ext'):
|
---|
273 | global _py_ext
|
---|
274 | _py_ext = [triple[0] for triple in imp.get_suffixes()
|
---|
275 | if triple[2] == imp.PY_SOURCE][0]
|
---|
276 |
|
---|
277 | # don't recurse into CVS directories
|
---|
278 | if 'CVS' in names:
|
---|
279 | names.remove('CVS')
|
---|
280 |
|
---|
281 | # add all *.py files to list
|
---|
282 | list.extend(
|
---|
283 | [os.path.join(dirname, file) for file in names
|
---|
284 | if os.path.splitext(file)[1] == _py_ext]
|
---|
285 | )
|
---|
286 |
|
---|
287 |
|
---|
288 | def _get_modpkg_path(dotted_name, pathlist=None):
|
---|
289 | """Get the filesystem path for a module or a package.
|
---|
290 |
|
---|
291 | Return the file system path to a file for a module, and to a directory for
|
---|
292 | a package. Return None if the name is not found, or is a builtin or
|
---|
293 | extension module.
|
---|
294 | """
|
---|
295 | # split off top-most name
|
---|
296 | parts = dotted_name.split('.', 1)
|
---|
297 |
|
---|
298 | if len(parts) > 1:
|
---|
299 | # we have a dotted path, import top-level package
|
---|
300 | try:
|
---|
301 | file, pathname, description = imp.find_module(parts[0], pathlist)
|
---|
302 | if file: file.close()
|
---|
303 | except ImportError:
|
---|
304 | return None
|
---|
305 |
|
---|
306 | # check if it's indeed a package
|
---|
307 | if description[2] == imp.PKG_DIRECTORY:
|
---|
308 | # recursively handle the remaining name parts
|
---|
309 | pathname = _get_modpkg_path(parts[1], [pathname])
|
---|
310 | else:
|
---|
311 | pathname = None
|
---|
312 | else:
|
---|
313 | # plain name
|
---|
314 | try:
|
---|
315 | file, pathname, description = imp.find_module(
|
---|
316 | dotted_name, pathlist)
|
---|
317 | if file:
|
---|
318 | file.close()
|
---|
319 | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
---|
320 | pathname = None
|
---|
321 | except ImportError:
|
---|
322 | pathname = None
|
---|
323 |
|
---|
324 | return pathname
|
---|
325 |
|
---|
326 |
|
---|
327 | def getFilesForName(name):
|
---|
328 | """Get a list of module files for a filename, a module or package name,
|
---|
329 | or a directory.
|
---|
330 | """
|
---|
331 | if not os.path.exists(name):
|
---|
332 | # check for glob chars
|
---|
333 | if containsAny(name, "*?[]"):
|
---|
334 | files = glob.glob(name)
|
---|
335 | list = []
|
---|
336 | for file in files:
|
---|
337 | list.extend(getFilesForName(file))
|
---|
338 | return list
|
---|
339 |
|
---|
340 | # try to find module or package
|
---|
341 | name = _get_modpkg_path(name)
|
---|
342 | if not name:
|
---|
343 | return []
|
---|
344 |
|
---|
345 | if os.path.isdir(name):
|
---|
346 | # find all python files in directory
|
---|
347 | list = []
|
---|
348 | os.path.walk(name, _visit_pyfiles, list)
|
---|
349 | return list
|
---|
350 | elif os.path.exists(name):
|
---|
351 | # a single file
|
---|
352 | return [name]
|
---|
353 |
|
---|
354 | return []
|
---|
355 |
|
---|
356 | |
---|
357 |
|
---|
358 | class TokenEater:
|
---|
359 | def __init__(self, options):
|
---|
360 | self.__options = options
|
---|
361 | self.__messages = {}
|
---|
362 | self.__state = self.__waiting
|
---|
363 | self.__data = []
|
---|
364 | self.__lineno = -1
|
---|
365 | self.__freshmodule = 1
|
---|
366 | self.__curfile = None
|
---|
367 |
|
---|
368 | def __call__(self, ttype, tstring, stup, etup, line):
|
---|
369 | # dispatch
|
---|
370 | ## import token
|
---|
371 | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
---|
372 | ## 'tstring:', tstring
|
---|
373 | self.__state(ttype, tstring, stup[0])
|
---|
374 |
|
---|
375 | def __waiting(self, ttype, tstring, lineno):
|
---|
376 | opts = self.__options
|
---|
377 | # Do docstring extractions, if enabled
|
---|
378 | if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
---|
379 | # module docstring?
|
---|
380 | if self.__freshmodule:
|
---|
381 | if ttype == tokenize.STRING:
|
---|
382 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
---|
383 | self.__freshmodule = 0
|
---|
384 | elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
---|
385 | self.__freshmodule = 0
|
---|
386 | return
|
---|
387 | # class docstring?
|
---|
388 | if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
---|
389 | self.__state = self.__suiteseen
|
---|
390 | return
|
---|
391 | if ttype == tokenize.NAME and tstring in opts.keywords:
|
---|
392 | self.__state = self.__keywordseen
|
---|
393 |
|
---|
394 | def __suiteseen(self, ttype, tstring, lineno):
|
---|
395 | # ignore anything until we see the colon
|
---|
396 | if ttype == tokenize.OP and tstring == ':':
|
---|
397 | self.__state = self.__suitedocstring
|
---|
398 |
|
---|
399 | def __suitedocstring(self, ttype, tstring, lineno):
|
---|
400 | # ignore any intervening noise
|
---|
401 | if ttype == tokenize.STRING:
|
---|
402 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
---|
403 | self.__state = self.__waiting
|
---|
404 | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
---|
405 | tokenize.COMMENT):
|
---|
406 | # there was no class docstring
|
---|
407 | self.__state = self.__waiting
|
---|
408 |
|
---|
409 | def __keywordseen(self, ttype, tstring, lineno):
|
---|
410 | if ttype == tokenize.OP and tstring == '(':
|
---|
411 | self.__data = []
|
---|
412 | self.__lineno = lineno
|
---|
413 | self.__state = self.__openseen
|
---|
414 | else:
|
---|
415 | self.__state = self.__waiting
|
---|
416 |
|
---|
417 | def __openseen(self, ttype, tstring, lineno):
|
---|
418 | if ttype == tokenize.OP and tstring == ')':
|
---|
419 | # We've seen the last of the translatable strings. Record the
|
---|
420 | # line number of the first line of the strings and update the list
|
---|
421 | # of messages seen. Reset state for the next batch. If there
|
---|
422 | # were no strings inside _(), then just ignore this entry.
|
---|
423 | if self.__data:
|
---|
424 | self.__addentry(EMPTYSTRING.join(self.__data))
|
---|
425 | self.__state = self.__waiting
|
---|
426 | elif ttype == tokenize.STRING:
|
---|
427 | self.__data.append(safe_eval(tstring))
|
---|
428 | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
---|
429 | token.NEWLINE, tokenize.NL]:
|
---|
430 | # warn if we see anything else than STRING or whitespace
|
---|
431 | print >> sys.stderr, _(
|
---|
432 | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
---|
433 | ) % {
|
---|
434 | 'token': tstring,
|
---|
435 | 'file': self.__curfile,
|
---|
436 | 'lineno': self.__lineno
|
---|
437 | }
|
---|
438 | self.__state = self.__waiting
|
---|
439 |
|
---|
440 | def __addentry(self, msg, lineno=None, isdocstring=0):
|
---|
441 | if lineno is None:
|
---|
442 | lineno = self.__lineno
|
---|
443 | if not msg in self.__options.toexclude:
|
---|
444 | entry = (self.__curfile, lineno)
|
---|
445 | self.__messages.setdefault(msg, {})[entry] = isdocstring
|
---|
446 |
|
---|
447 | def set_filename(self, filename):
|
---|
448 | self.__curfile = filename
|
---|
449 | self.__freshmodule = 1
|
---|
450 |
|
---|
451 | def write(self, fp):
|
---|
452 | options = self.__options
|
---|
453 | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
---|
454 | # The time stamp in the header doesn't have the same format as that
|
---|
455 | # generated by xgettext...
|
---|
456 | print >> fp, pot_header % {'time': timestamp, 'version': __version__}
|
---|
457 | # Sort the entries. First sort each particular entry's keys, then
|
---|
458 | # sort all the entries by their first item.
|
---|
459 | reverse = {}
|
---|
460 | for k, v in self.__messages.items():
|
---|
461 | keys = v.keys()
|
---|
462 | keys.sort()
|
---|
463 | reverse.setdefault(tuple(keys), []).append((k, v))
|
---|
464 | rkeys = reverse.keys()
|
---|
465 | rkeys.sort()
|
---|
466 | for rkey in rkeys:
|
---|
467 | rentries = reverse[rkey]
|
---|
468 | rentries.sort()
|
---|
469 | for k, v in rentries:
|
---|
470 | isdocstring = 0
|
---|
471 | # If the entry was gleaned out of a docstring, then add a
|
---|
472 | # comment stating so. This is to aid translators who may wish
|
---|
473 | # to skip translating some unimportant docstrings.
|
---|
474 | if reduce(operator.__add__, v.values()):
|
---|
475 | isdocstring = 1
|
---|
476 | # k is the message string, v is a dictionary-set of (filename,
|
---|
477 | # lineno) tuples. We want to sort the entries in v first by
|
---|
478 | # file name and then by line number.
|
---|
479 | v = v.keys()
|
---|
480 | v.sort()
|
---|
481 | if not options.writelocations:
|
---|
482 | pass
|
---|
483 | # location comments are different b/w Solaris and GNU:
|
---|
484 | elif options.locationstyle == options.SOLARIS:
|
---|
485 | for filename, lineno in v:
|
---|
486 | d = {'filename': filename, 'lineno': lineno}
|
---|
487 | print >>fp, _(
|
---|
488 | '# File: %(filename)s, line: %(lineno)d') % d
|
---|
489 | elif options.locationstyle == options.GNU:
|
---|
490 | # fit as many locations on one line, as long as the
|
---|
491 | # resulting line length doesn't exceeds 'options.width'
|
---|
492 | locline = '#:'
|
---|
493 | for filename, lineno in v:
|
---|
494 | d = {'filename': filename, 'lineno': lineno}
|
---|
495 | s = _(' %(filename)s:%(lineno)d') % d
|
---|
496 | if len(locline) + len(s) <= options.width:
|
---|
497 | locline = locline + s
|
---|
498 | else:
|
---|
499 | print >> fp, locline
|
---|
500 | locline = "#:" + s
|
---|
501 | if len(locline) > 2:
|
---|
502 | print >> fp, locline
|
---|
503 | if isdocstring:
|
---|
504 | print >> fp, '#, docstring'
|
---|
505 | print >> fp, 'msgid', normalize(k)
|
---|
506 | print >> fp, 'msgstr ""\n'
|
---|
507 |
|
---|
508 |
|
---|
509 | |
---|
510 |
|
---|
511 | def main():
|
---|
512 | global default_keywords
|
---|
513 | try:
|
---|
514 | opts, args = getopt.getopt(
|
---|
515 | sys.argv[1:],
|
---|
516 | 'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
---|
517 | ['extract-all', 'default-domain=', 'escape', 'help',
|
---|
518 | 'keyword=', 'no-default-keywords',
|
---|
519 | 'add-location', 'no-location', 'output=', 'output-dir=',
|
---|
520 | 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
---|
521 | 'docstrings', 'no-docstrings',
|
---|
522 | ])
|
---|
523 | except getopt.error, msg:
|
---|
524 | usage(1, msg)
|
---|
525 |
|
---|
526 | # for holding option values
|
---|
527 | class Options:
|
---|
528 | # constants
|
---|
529 | GNU = 1
|
---|
530 | SOLARIS = 2
|
---|
531 | # defaults
|
---|
532 | extractall = 0 # FIXME: currently this option has no effect at all.
|
---|
533 | escape = 0
|
---|
534 | keywords = []
|
---|
535 | outpath = ''
|
---|
536 | outfile = 'messages.pot'
|
---|
537 | writelocations = 1
|
---|
538 | locationstyle = GNU
|
---|
539 | verbose = 0
|
---|
540 | width = 78
|
---|
541 | excludefilename = ''
|
---|
542 | docstrings = 0
|
---|
543 | nodocstrings = {}
|
---|
544 |
|
---|
545 | options = Options()
|
---|
546 | locations = {'gnu' : options.GNU,
|
---|
547 | 'solaris' : options.SOLARIS,
|
---|
548 | }
|
---|
549 |
|
---|
550 | # parse options
|
---|
551 | for opt, arg in opts:
|
---|
552 | if opt in ('-h', '--help'):
|
---|
553 | usage(0)
|
---|
554 | elif opt in ('-a', '--extract-all'):
|
---|
555 | options.extractall = 1
|
---|
556 | elif opt in ('-d', '--default-domain'):
|
---|
557 | options.outfile = arg + '.pot'
|
---|
558 | elif opt in ('-E', '--escape'):
|
---|
559 | options.escape = 1
|
---|
560 | elif opt in ('-D', '--docstrings'):
|
---|
561 | options.docstrings = 1
|
---|
562 | elif opt in ('-k', '--keyword'):
|
---|
563 | options.keywords.append(arg)
|
---|
564 | elif opt in ('-K', '--no-default-keywords'):
|
---|
565 | default_keywords = []
|
---|
566 | elif opt in ('-n', '--add-location'):
|
---|
567 | options.writelocations = 1
|
---|
568 | elif opt in ('--no-location',):
|
---|
569 | options.writelocations = 0
|
---|
570 | elif opt in ('-S', '--style'):
|
---|
571 | options.locationstyle = locations.get(arg.lower())
|
---|
572 | if options.locationstyle is None:
|
---|
573 | usage(1, _('Invalid value for --style: %s') % arg)
|
---|
574 | elif opt in ('-o', '--output'):
|
---|
575 | options.outfile = arg
|
---|
576 | elif opt in ('-p', '--output-dir'):
|
---|
577 | options.outpath = arg
|
---|
578 | elif opt in ('-v', '--verbose'):
|
---|
579 | options.verbose = 1
|
---|
580 | elif opt in ('-V', '--version'):
|
---|
581 | print _('pygettext.py (xgettext for Python) %s') % __version__
|
---|
582 | sys.exit(0)
|
---|
583 | elif opt in ('-w', '--width'):
|
---|
584 | try:
|
---|
585 | options.width = int(arg)
|
---|
586 | except ValueError:
|
---|
587 | usage(1, _('--width argument must be an integer: %s') % arg)
|
---|
588 | elif opt in ('-x', '--exclude-file'):
|
---|
589 | options.excludefilename = arg
|
---|
590 | elif opt in ('-X', '--no-docstrings'):
|
---|
591 | fp = open(arg)
|
---|
592 | try:
|
---|
593 | while 1:
|
---|
594 | line = fp.readline()
|
---|
595 | if not line:
|
---|
596 | break
|
---|
597 | options.nodocstrings[line[:-1]] = 1
|
---|
598 | finally:
|
---|
599 | fp.close()
|
---|
600 |
|
---|
601 | # calculate escapes
|
---|
602 | make_escapes(options.escape)
|
---|
603 |
|
---|
604 | # calculate all keywords
|
---|
605 | options.keywords.extend(default_keywords)
|
---|
606 |
|
---|
607 | # initialize list of strings to exclude
|
---|
608 | if options.excludefilename:
|
---|
609 | try:
|
---|
610 | fp = open(options.excludefilename)
|
---|
611 | options.toexclude = fp.readlines()
|
---|
612 | fp.close()
|
---|
613 | except IOError:
|
---|
614 | print >> sys.stderr, _(
|
---|
615 | "Can't read --exclude-file: %s") % options.excludefilename
|
---|
616 | sys.exit(1)
|
---|
617 | else:
|
---|
618 | options.toexclude = []
|
---|
619 |
|
---|
620 | # resolve args to module lists
|
---|
621 | expanded = []
|
---|
622 | for arg in args:
|
---|
623 | if arg == '-':
|
---|
624 | expanded.append(arg)
|
---|
625 | else:
|
---|
626 | expanded.extend(getFilesForName(arg))
|
---|
627 | args = expanded
|
---|
628 |
|
---|
629 | # slurp through all the files
|
---|
630 | eater = TokenEater(options)
|
---|
631 | for filename in args:
|
---|
632 | if filename == '-':
|
---|
633 | if options.verbose:
|
---|
634 | print _('Reading standard input')
|
---|
635 | fp = sys.stdin
|
---|
636 | closep = 0
|
---|
637 | else:
|
---|
638 | if options.verbose:
|
---|
639 | print _('Working on %s') % filename
|
---|
640 | fp = open(filename)
|
---|
641 | closep = 1
|
---|
642 | try:
|
---|
643 | eater.set_filename(filename)
|
---|
644 | try:
|
---|
645 | tokenize.tokenize(fp.readline, eater)
|
---|
646 | except tokenize.TokenError, e:
|
---|
647 | print >> sys.stderr, '%s: %s, line %d, column %d' % (
|
---|
648 | e[0], filename, e[1][0], e[1][1])
|
---|
649 | finally:
|
---|
650 | if closep:
|
---|
651 | fp.close()
|
---|
652 |
|
---|
653 | # write the output
|
---|
654 | if options.outfile == '-':
|
---|
655 | fp = sys.stdout
|
---|
656 | closep = 0
|
---|
657 | else:
|
---|
658 | if options.outpath:
|
---|
659 | options.outfile = os.path.join(options.outpath, options.outfile)
|
---|
660 | fp = open(options.outfile, 'w')
|
---|
661 | closep = 1
|
---|
662 | try:
|
---|
663 | eater.write(fp)
|
---|
664 | finally:
|
---|
665 | if closep:
|
---|
666 | fp.close()
|
---|
667 |
|
---|
668 | |
---|
669 |
|
---|
670 | if __name__ == '__main__':
|
---|
671 | main()
|
---|
672 | # some more test strings
|
---|
673 | _(u'a unicode string')
|
---|
674 | # this one creates a warning
|
---|
675 | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
|
---|
676 | _('more' 'than' 'one' 'string')
|
---|