[2] | 1 | #! /usr/bin/env python
|
---|
| 2 | # -*- coding: iso-8859-1 -*-
|
---|
| 3 | # Originally written by Barry Warsaw <barry@zope.com>
|
---|
| 4 | #
|
---|
| 5 | # Minimally patched to make it even more xgettext compatible
|
---|
| 6 | # by Peter Funk <pf@artcom-gmbh.de>
|
---|
| 7 | #
|
---|
| 8 | # 2002-11-22 Jürgen Hermann <jh@web.de>
|
---|
| 9 | # Added checks that _() only contains string literals, and
|
---|
| 10 | # command line args are resolved to module lists, i.e. you
|
---|
| 11 | # can now pass a filename, a module or package name, or a
|
---|
| 12 | # directory (including globbing chars, important for Win32).
|
---|
| 13 | # Made docstring fit in 80 chars wide displays using pydoc.
|
---|
| 14 | #
|
---|
| 15 |
|
---|
| 16 | # for selftesting
|
---|
| 17 | try:
|
---|
| 18 | import fintl
|
---|
| 19 | _ = fintl.gettext
|
---|
| 20 | except ImportError:
|
---|
| 21 | _ = lambda s: s
|
---|
| 22 |
|
---|
| 23 | __doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
---|
| 24 |
|
---|
| 25 | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
---|
| 26 | internationalization of C programs. Most of these tools are independent of
|
---|
| 27 | the programming language and can be used from within Python programs.
|
---|
| 28 | Martin von Loewis' work[1] helps considerably in this regard.
|
---|
| 29 |
|
---|
| 30 | There's one problem though; xgettext is the program that scans source code
|
---|
| 31 | looking for message strings, but it groks only C (or C++). Python
|
---|
| 32 | introduces a few wrinkles, such as dual quoting characters, triple quoted
|
---|
| 33 | strings, and raw strings. xgettext understands none of this.
|
---|
| 34 |
|
---|
| 35 | Enter pygettext, which uses Python's standard tokenize module to scan
|
---|
| 36 | Python source code, generating .pot files identical to what GNU xgettext[2]
|
---|
| 37 | generates for C and C++ code. From there, the standard GNU tools can be
|
---|
| 38 | used.
|
---|
| 39 |
|
---|
| 40 | A word about marking Python strings as candidates for translation. GNU
|
---|
| 41 | xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
---|
| 42 | and gettext_noop. But those can be a lot of text to include all over your
|
---|
| 43 | code. C and C++ have a trick: they use the C preprocessor. Most
|
---|
| 44 | internationalized C source includes a #define for gettext() to _() so that
|
---|
| 45 | what has to be written in the source is much less. Thus these are both
|
---|
| 46 | translatable strings:
|
---|
| 47 |
|
---|
| 48 | gettext("Translatable String")
|
---|
| 49 | _("Translatable String")
|
---|
| 50 |
|
---|
| 51 | Python of course has no preprocessor so this doesn't work so well. Thus,
|
---|
| 52 | pygettext searches only for _() by default, but see the -k/--keyword flag
|
---|
| 53 | below for how to augment this.
|
---|
| 54 |
|
---|
| 55 | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
---|
| 56 | [2] http://www.gnu.org/software/gettext/gettext.html
|
---|
| 57 |
|
---|
| 58 | NOTE: pygettext attempts to be option and feature compatible with GNU
|
---|
| 59 | xgettext where ever possible. However some options are still missing or are
|
---|
| 60 | not fully implemented. Also, xgettext's use of command line switches with
|
---|
| 61 | option arguments is broken, and in these cases, pygettext just defines
|
---|
| 62 | additional switches.
|
---|
| 63 |
|
---|
| 64 | Usage: pygettext [options] inputfile ...
|
---|
| 65 |
|
---|
| 66 | Options:
|
---|
| 67 |
|
---|
| 68 | -a
|
---|
| 69 | --extract-all
|
---|
| 70 | Extract all strings.
|
---|
| 71 |
|
---|
| 72 | -d name
|
---|
| 73 | --default-domain=name
|
---|
| 74 | Rename the default output file from messages.pot to name.pot.
|
---|
| 75 |
|
---|
| 76 | -E
|
---|
| 77 | --escape
|
---|
| 78 | Replace non-ASCII characters with octal escape sequences.
|
---|
| 79 |
|
---|
| 80 | -D
|
---|
| 81 | --docstrings
|
---|
| 82 | Extract module, class, method, and function docstrings. These do
|
---|
| 83 | not need to be wrapped in _() markers, and in fact cannot be for
|
---|
| 84 | Python to consider them docstrings. (See also the -X option).
|
---|
| 85 |
|
---|
| 86 | -h
|
---|
| 87 | --help
|
---|
| 88 | Print this help message and exit.
|
---|
| 89 |
|
---|
| 90 | -k word
|
---|
| 91 | --keyword=word
|
---|
| 92 | Keywords to look for in addition to the default set, which are:
|
---|
| 93 | %(DEFAULTKEYWORDS)s
|
---|
| 94 |
|
---|
| 95 | You can have multiple -k flags on the command line.
|
---|
| 96 |
|
---|
| 97 | -K
|
---|
| 98 | --no-default-keywords
|
---|
| 99 | Disable the default set of keywords (see above). Any keywords
|
---|
| 100 | explicitly added with the -k/--keyword option are still recognized.
|
---|
| 101 |
|
---|
| 102 | --no-location
|
---|
| 103 | Do not write filename/lineno location comments.
|
---|
| 104 |
|
---|
| 105 | -n
|
---|
| 106 | --add-location
|
---|
| 107 | Write filename/lineno location comments indicating where each
|
---|
| 108 | extracted string is found in the source. These lines appear before
|
---|
| 109 | each msgid. The style of comments is controlled by the -S/--style
|
---|
| 110 | option. This is the default.
|
---|
| 111 |
|
---|
| 112 | -o filename
|
---|
| 113 | --output=filename
|
---|
| 114 | Rename the default output file from messages.pot to filename. If
|
---|
| 115 | filename is `-' then the output is sent to standard out.
|
---|
| 116 |
|
---|
| 117 | -p dir
|
---|
| 118 | --output-dir=dir
|
---|
| 119 | Output files will be placed in directory dir.
|
---|
| 120 |
|
---|
| 121 | -S stylename
|
---|
| 122 | --style stylename
|
---|
| 123 | Specify which style to use for location comments. Two styles are
|
---|
| 124 | supported:
|
---|
| 125 |
|
---|
| 126 | Solaris # File: filename, line: line-number
|
---|
| 127 | GNU #: filename:line
|
---|
| 128 |
|
---|
| 129 | The style name is case insensitive. GNU style is the default.
|
---|
| 130 |
|
---|
| 131 | -v
|
---|
| 132 | --verbose
|
---|
| 133 | Print the names of the files being processed.
|
---|
| 134 |
|
---|
| 135 | -V
|
---|
| 136 | --version
|
---|
| 137 | Print the version of pygettext and exit.
|
---|
| 138 |
|
---|
| 139 | -w columns
|
---|
| 140 | --width=columns
|
---|
| 141 | Set width of output to columns.
|
---|
| 142 |
|
---|
| 143 | -x filename
|
---|
| 144 | --exclude-file=filename
|
---|
| 145 | Specify a file that contains a list of strings that are not be
|
---|
| 146 | extracted from the input files. Each string to be excluded must
|
---|
| 147 | appear on a line by itself in the file.
|
---|
| 148 |
|
---|
| 149 | -X filename
|
---|
| 150 | --no-docstrings=filename
|
---|
| 151 | Specify a file that contains a list of files (one per line) that
|
---|
| 152 | should not have their docstrings extracted. This is only useful in
|
---|
| 153 | conjunction with the -D option above.
|
---|
| 154 |
|
---|
| 155 | If `inputfile' is -, standard input is read.
|
---|
| 156 | """)
|
---|
| 157 |
|
---|
| 158 | import os
|
---|
| 159 | import imp
|
---|
| 160 | import sys
|
---|
| 161 | import glob
|
---|
| 162 | import time
|
---|
| 163 | import getopt
|
---|
| 164 | import token
|
---|
| 165 | import tokenize
|
---|
| 166 | import operator
|
---|
| 167 |
|
---|
| 168 | __version__ = '1.5'
|
---|
| 169 |
|
---|
| 170 | default_keywords = ['_']
|
---|
| 171 | DEFAULTKEYWORDS = ', '.join(default_keywords)
|
---|
| 172 |
|
---|
| 173 | EMPTYSTRING = ''
|
---|
| 174 |
|
---|
| 175 |
|
---|
| 176 | |
---|
| 177 |
|
---|
| 178 | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
---|
| 179 | # there.
|
---|
| 180 | pot_header = _('''\
|
---|
| 181 | # SOME DESCRIPTIVE TITLE.
|
---|
| 182 | # Copyright (C) YEAR ORGANIZATION
|
---|
| 183 | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
---|
| 184 | #
|
---|
| 185 | msgid ""
|
---|
| 186 | msgstr ""
|
---|
| 187 | "Project-Id-Version: PACKAGE VERSION\\n"
|
---|
| 188 | "POT-Creation-Date: %(time)s\\n"
|
---|
| 189 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
---|
| 190 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
---|
| 191 | "Language-Team: LANGUAGE <LL@li.org>\\n"
|
---|
| 192 | "MIME-Version: 1.0\\n"
|
---|
| 193 | "Content-Type: text/plain; charset=CHARSET\\n"
|
---|
| 194 | "Content-Transfer-Encoding: ENCODING\\n"
|
---|
| 195 | "Generated-By: pygettext.py %(version)s\\n"
|
---|
| 196 |
|
---|
| 197 | ''')
|
---|
| 198 |
|
---|
| 199 | |
---|
| 200 |
|
---|
| 201 | def usage(code, msg=''):
|
---|
| 202 | print >> sys.stderr, __doc__ % globals()
|
---|
| 203 | if msg:
|
---|
| 204 | print >> sys.stderr, msg
|
---|
| 205 | sys.exit(code)
|
---|
| 206 |
|
---|
| 207 |
|
---|
| 208 | |
---|
| 209 |
|
---|
| 210 | escapes = []
|
---|
[391] | 211 |
|
---|
[2] | 212 | def make_escapes(pass_iso8859):
|
---|
| 213 | global escapes
|
---|
| 214 | escapes = [chr(i) for i in range(256)]
|
---|
| 215 | if pass_iso8859:
|
---|
| 216 | # Allow iso-8859 characters to pass through so that e.g. 'msgid
|
---|
| 217 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
---|
| 218 | # escape any character outside the 32..126 range.
|
---|
[391] | 219 | mod = 128
|
---|
| 220 | else:
|
---|
| 221 | mod = 256
|
---|
[2] | 222 | for i in range(mod):
|
---|
| 223 | if not(32 <= i <= 126):
|
---|
| 224 | escapes[i] = "\\%03o" % i
|
---|
| 225 | escapes[ord('\\')] = '\\\\'
|
---|
| 226 | escapes[ord('\t')] = '\\t'
|
---|
| 227 | escapes[ord('\r')] = '\\r'
|
---|
| 228 | escapes[ord('\n')] = '\\n'
|
---|
| 229 | escapes[ord('\"')] = '\\"'
|
---|
| 230 |
|
---|
| 231 |
|
---|
| 232 | def escape(s):
|
---|
| 233 | global escapes
|
---|
| 234 | s = list(s)
|
---|
| 235 | for i in range(len(s)):
|
---|
| 236 | s[i] = escapes[ord(s[i])]
|
---|
| 237 | return EMPTYSTRING.join(s)
|
---|
| 238 |
|
---|
| 239 |
|
---|
| 240 | def safe_eval(s):
|
---|
| 241 | # unwrap quotes, safely
|
---|
| 242 | return eval(s, {'__builtins__':{}}, {})
|
---|
| 243 |
|
---|
| 244 |
|
---|
| 245 | def normalize(s):
|
---|
| 246 | # This converts the various Python string types into a format that is
|
---|
| 247 | # appropriate for .po files, namely much closer to C style.
|
---|
| 248 | lines = s.split('\n')
|
---|
| 249 | if len(lines) == 1:
|
---|
| 250 | s = '"' + escape(s) + '"'
|
---|
| 251 | else:
|
---|
| 252 | if not lines[-1]:
|
---|
| 253 | del lines[-1]
|
---|
| 254 | lines[-1] = lines[-1] + '\n'
|
---|
| 255 | for i in range(len(lines)):
|
---|
| 256 | lines[i] = escape(lines[i])
|
---|
| 257 | lineterm = '\\n"\n"'
|
---|
| 258 | s = '""\n"' + lineterm.join(lines) + '"'
|
---|
| 259 | return s
|
---|
| 260 |
|
---|
| 261 | |
---|
| 262 |
|
---|
| 263 | def containsAny(str, set):
|
---|
| 264 | """Check whether 'str' contains ANY of the chars in 'set'"""
|
---|
| 265 | return 1 in [c in str for c in set]
|
---|
| 266 |
|
---|
| 267 |
|
---|
| 268 | def _visit_pyfiles(list, dirname, names):
|
---|
| 269 | """Helper for getFilesForName()."""
|
---|
| 270 | # get extension for python source files
|
---|
| 271 | if not globals().has_key('_py_ext'):
|
---|
| 272 | global _py_ext
|
---|
| 273 | _py_ext = [triple[0] for triple in imp.get_suffixes()
|
---|
| 274 | if triple[2] == imp.PY_SOURCE][0]
|
---|
| 275 |
|
---|
| 276 | # don't recurse into CVS directories
|
---|
| 277 | if 'CVS' in names:
|
---|
| 278 | names.remove('CVS')
|
---|
| 279 |
|
---|
| 280 | # add all *.py files to list
|
---|
| 281 | list.extend(
|
---|
| 282 | [os.path.join(dirname, file) for file in names
|
---|
| 283 | if os.path.splitext(file)[1] == _py_ext]
|
---|
| 284 | )
|
---|
| 285 |
|
---|
| 286 |
|
---|
| 287 | def _get_modpkg_path(dotted_name, pathlist=None):
|
---|
| 288 | """Get the filesystem path for a module or a package.
|
---|
| 289 |
|
---|
| 290 | Return the file system path to a file for a module, and to a directory for
|
---|
| 291 | a package. Return None if the name is not found, or is a builtin or
|
---|
| 292 | extension module.
|
---|
| 293 | """
|
---|
| 294 | # split off top-most name
|
---|
| 295 | parts = dotted_name.split('.', 1)
|
---|
| 296 |
|
---|
| 297 | if len(parts) > 1:
|
---|
| 298 | # we have a dotted path, import top-level package
|
---|
| 299 | try:
|
---|
| 300 | file, pathname, description = imp.find_module(parts[0], pathlist)
|
---|
| 301 | if file: file.close()
|
---|
| 302 | except ImportError:
|
---|
| 303 | return None
|
---|
| 304 |
|
---|
| 305 | # check if it's indeed a package
|
---|
| 306 | if description[2] == imp.PKG_DIRECTORY:
|
---|
| 307 | # recursively handle the remaining name parts
|
---|
| 308 | pathname = _get_modpkg_path(parts[1], [pathname])
|
---|
| 309 | else:
|
---|
| 310 | pathname = None
|
---|
| 311 | else:
|
---|
| 312 | # plain name
|
---|
| 313 | try:
|
---|
| 314 | file, pathname, description = imp.find_module(
|
---|
| 315 | dotted_name, pathlist)
|
---|
| 316 | if file:
|
---|
| 317 | file.close()
|
---|
| 318 | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
---|
| 319 | pathname = None
|
---|
| 320 | except ImportError:
|
---|
| 321 | pathname = None
|
---|
| 322 |
|
---|
| 323 | return pathname
|
---|
| 324 |
|
---|
| 325 |
|
---|
| 326 | def getFilesForName(name):
|
---|
| 327 | """Get a list of module files for a filename, a module or package name,
|
---|
| 328 | or a directory.
|
---|
| 329 | """
|
---|
| 330 | if not os.path.exists(name):
|
---|
| 331 | # check for glob chars
|
---|
| 332 | if containsAny(name, "*?[]"):
|
---|
| 333 | files = glob.glob(name)
|
---|
| 334 | list = []
|
---|
| 335 | for file in files:
|
---|
| 336 | list.extend(getFilesForName(file))
|
---|
| 337 | return list
|
---|
| 338 |
|
---|
| 339 | # try to find module or package
|
---|
| 340 | name = _get_modpkg_path(name)
|
---|
| 341 | if not name:
|
---|
| 342 | return []
|
---|
| 343 |
|
---|
| 344 | if os.path.isdir(name):
|
---|
| 345 | # find all python files in directory
|
---|
| 346 | list = []
|
---|
| 347 | os.path.walk(name, _visit_pyfiles, list)
|
---|
| 348 | return list
|
---|
| 349 | elif os.path.exists(name):
|
---|
| 350 | # a single file
|
---|
| 351 | return [name]
|
---|
| 352 |
|
---|
| 353 | return []
|
---|
| 354 |
|
---|
| 355 | |
---|
| 356 |
|
---|
| 357 | class TokenEater:
|
---|
| 358 | def __init__(self, options):
|
---|
| 359 | self.__options = options
|
---|
| 360 | self.__messages = {}
|
---|
| 361 | self.__state = self.__waiting
|
---|
| 362 | self.__data = []
|
---|
| 363 | self.__lineno = -1
|
---|
| 364 | self.__freshmodule = 1
|
---|
| 365 | self.__curfile = None
|
---|
| 366 |
|
---|
| 367 | def __call__(self, ttype, tstring, stup, etup, line):
|
---|
| 368 | # dispatch
|
---|
| 369 | ## import token
|
---|
| 370 | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
---|
| 371 | ## 'tstring:', tstring
|
---|
| 372 | self.__state(ttype, tstring, stup[0])
|
---|
| 373 |
|
---|
| 374 | def __waiting(self, ttype, tstring, lineno):
|
---|
| 375 | opts = self.__options
|
---|
| 376 | # Do docstring extractions, if enabled
|
---|
| 377 | if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
---|
| 378 | # module docstring?
|
---|
| 379 | if self.__freshmodule:
|
---|
| 380 | if ttype == tokenize.STRING:
|
---|
| 381 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
---|
| 382 | self.__freshmodule = 0
|
---|
| 383 | elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
---|
| 384 | self.__freshmodule = 0
|
---|
| 385 | return
|
---|
| 386 | # class docstring?
|
---|
| 387 | if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
---|
| 388 | self.__state = self.__suiteseen
|
---|
| 389 | return
|
---|
| 390 | if ttype == tokenize.NAME and tstring in opts.keywords:
|
---|
| 391 | self.__state = self.__keywordseen
|
---|
| 392 |
|
---|
| 393 | def __suiteseen(self, ttype, tstring, lineno):
|
---|
| 394 | # ignore anything until we see the colon
|
---|
| 395 | if ttype == tokenize.OP and tstring == ':':
|
---|
| 396 | self.__state = self.__suitedocstring
|
---|
| 397 |
|
---|
| 398 | def __suitedocstring(self, ttype, tstring, lineno):
|
---|
| 399 | # ignore any intervening noise
|
---|
| 400 | if ttype == tokenize.STRING:
|
---|
| 401 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
---|
| 402 | self.__state = self.__waiting
|
---|
| 403 | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
---|
| 404 | tokenize.COMMENT):
|
---|
| 405 | # there was no class docstring
|
---|
| 406 | self.__state = self.__waiting
|
---|
| 407 |
|
---|
| 408 | def __keywordseen(self, ttype, tstring, lineno):
|
---|
| 409 | if ttype == tokenize.OP and tstring == '(':
|
---|
| 410 | self.__data = []
|
---|
| 411 | self.__lineno = lineno
|
---|
| 412 | self.__state = self.__openseen
|
---|
| 413 | else:
|
---|
| 414 | self.__state = self.__waiting
|
---|
| 415 |
|
---|
| 416 | def __openseen(self, ttype, tstring, lineno):
|
---|
| 417 | if ttype == tokenize.OP and tstring == ')':
|
---|
| 418 | # We've seen the last of the translatable strings. Record the
|
---|
| 419 | # line number of the first line of the strings and update the list
|
---|
| 420 | # of messages seen. Reset state for the next batch. If there
|
---|
| 421 | # were no strings inside _(), then just ignore this entry.
|
---|
| 422 | if self.__data:
|
---|
| 423 | self.__addentry(EMPTYSTRING.join(self.__data))
|
---|
| 424 | self.__state = self.__waiting
|
---|
| 425 | elif ttype == tokenize.STRING:
|
---|
| 426 | self.__data.append(safe_eval(tstring))
|
---|
| 427 | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
---|
| 428 | token.NEWLINE, tokenize.NL]:
|
---|
| 429 | # warn if we see anything else than STRING or whitespace
|
---|
| 430 | print >> sys.stderr, _(
|
---|
| 431 | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
---|
| 432 | ) % {
|
---|
| 433 | 'token': tstring,
|
---|
| 434 | 'file': self.__curfile,
|
---|
| 435 | 'lineno': self.__lineno
|
---|
| 436 | }
|
---|
| 437 | self.__state = self.__waiting
|
---|
| 438 |
|
---|
| 439 | def __addentry(self, msg, lineno=None, isdocstring=0):
|
---|
| 440 | if lineno is None:
|
---|
| 441 | lineno = self.__lineno
|
---|
| 442 | if not msg in self.__options.toexclude:
|
---|
| 443 | entry = (self.__curfile, lineno)
|
---|
| 444 | self.__messages.setdefault(msg, {})[entry] = isdocstring
|
---|
| 445 |
|
---|
| 446 | def set_filename(self, filename):
|
---|
| 447 | self.__curfile = filename
|
---|
| 448 | self.__freshmodule = 1
|
---|
| 449 |
|
---|
| 450 | def write(self, fp):
|
---|
| 451 | options = self.__options
|
---|
| 452 | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
---|
| 453 | # The time stamp in the header doesn't have the same format as that
|
---|
| 454 | # generated by xgettext...
|
---|
| 455 | print >> fp, pot_header % {'time': timestamp, 'version': __version__}
|
---|
| 456 | # Sort the entries. First sort each particular entry's keys, then
|
---|
| 457 | # sort all the entries by their first item.
|
---|
| 458 | reverse = {}
|
---|
| 459 | for k, v in self.__messages.items():
|
---|
| 460 | keys = v.keys()
|
---|
| 461 | keys.sort()
|
---|
| 462 | reverse.setdefault(tuple(keys), []).append((k, v))
|
---|
| 463 | rkeys = reverse.keys()
|
---|
| 464 | rkeys.sort()
|
---|
| 465 | for rkey in rkeys:
|
---|
| 466 | rentries = reverse[rkey]
|
---|
| 467 | rentries.sort()
|
---|
| 468 | for k, v in rentries:
|
---|
| 469 | isdocstring = 0
|
---|
| 470 | # If the entry was gleaned out of a docstring, then add a
|
---|
| 471 | # comment stating so. This is to aid translators who may wish
|
---|
| 472 | # to skip translating some unimportant docstrings.
|
---|
| 473 | if reduce(operator.__add__, v.values()):
|
---|
| 474 | isdocstring = 1
|
---|
| 475 | # k is the message string, v is a dictionary-set of (filename,
|
---|
| 476 | # lineno) tuples. We want to sort the entries in v first by
|
---|
| 477 | # file name and then by line number.
|
---|
| 478 | v = v.keys()
|
---|
| 479 | v.sort()
|
---|
| 480 | if not options.writelocations:
|
---|
| 481 | pass
|
---|
| 482 | # location comments are different b/w Solaris and GNU:
|
---|
| 483 | elif options.locationstyle == options.SOLARIS:
|
---|
| 484 | for filename, lineno in v:
|
---|
| 485 | d = {'filename': filename, 'lineno': lineno}
|
---|
| 486 | print >>fp, _(
|
---|
| 487 | '# File: %(filename)s, line: %(lineno)d') % d
|
---|
| 488 | elif options.locationstyle == options.GNU:
|
---|
| 489 | # fit as many locations on one line, as long as the
|
---|
| 490 | # resulting line length doesn't exceeds 'options.width'
|
---|
| 491 | locline = '#:'
|
---|
| 492 | for filename, lineno in v:
|
---|
| 493 | d = {'filename': filename, 'lineno': lineno}
|
---|
| 494 | s = _(' %(filename)s:%(lineno)d') % d
|
---|
| 495 | if len(locline) + len(s) <= options.width:
|
---|
| 496 | locline = locline + s
|
---|
| 497 | else:
|
---|
| 498 | print >> fp, locline
|
---|
| 499 | locline = "#:" + s
|
---|
| 500 | if len(locline) > 2:
|
---|
| 501 | print >> fp, locline
|
---|
| 502 | if isdocstring:
|
---|
| 503 | print >> fp, '#, docstring'
|
---|
| 504 | print >> fp, 'msgid', normalize(k)
|
---|
| 505 | print >> fp, 'msgstr ""\n'
|
---|
| 506 |
|
---|
| 507 |
|
---|
| 508 | |
---|
| 509 |
|
---|
| 510 | def main():
|
---|
| 511 | global default_keywords
|
---|
| 512 | try:
|
---|
| 513 | opts, args = getopt.getopt(
|
---|
| 514 | sys.argv[1:],
|
---|
| 515 | 'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
---|
| 516 | ['extract-all', 'default-domain=', 'escape', 'help',
|
---|
| 517 | 'keyword=', 'no-default-keywords',
|
---|
| 518 | 'add-location', 'no-location', 'output=', 'output-dir=',
|
---|
| 519 | 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
---|
| 520 | 'docstrings', 'no-docstrings',
|
---|
| 521 | ])
|
---|
| 522 | except getopt.error, msg:
|
---|
| 523 | usage(1, msg)
|
---|
| 524 |
|
---|
| 525 | # for holding option values
|
---|
| 526 | class Options:
|
---|
| 527 | # constants
|
---|
| 528 | GNU = 1
|
---|
| 529 | SOLARIS = 2
|
---|
| 530 | # defaults
|
---|
| 531 | extractall = 0 # FIXME: currently this option has no effect at all.
|
---|
| 532 | escape = 0
|
---|
| 533 | keywords = []
|
---|
| 534 | outpath = ''
|
---|
| 535 | outfile = 'messages.pot'
|
---|
| 536 | writelocations = 1
|
---|
| 537 | locationstyle = GNU
|
---|
| 538 | verbose = 0
|
---|
| 539 | width = 78
|
---|
| 540 | excludefilename = ''
|
---|
| 541 | docstrings = 0
|
---|
| 542 | nodocstrings = {}
|
---|
| 543 |
|
---|
| 544 | options = Options()
|
---|
| 545 | locations = {'gnu' : options.GNU,
|
---|
| 546 | 'solaris' : options.SOLARIS,
|
---|
| 547 | }
|
---|
| 548 |
|
---|
| 549 | # parse options
|
---|
| 550 | for opt, arg in opts:
|
---|
| 551 | if opt in ('-h', '--help'):
|
---|
| 552 | usage(0)
|
---|
| 553 | elif opt in ('-a', '--extract-all'):
|
---|
| 554 | options.extractall = 1
|
---|
| 555 | elif opt in ('-d', '--default-domain'):
|
---|
| 556 | options.outfile = arg + '.pot'
|
---|
| 557 | elif opt in ('-E', '--escape'):
|
---|
| 558 | options.escape = 1
|
---|
| 559 | elif opt in ('-D', '--docstrings'):
|
---|
| 560 | options.docstrings = 1
|
---|
| 561 | elif opt in ('-k', '--keyword'):
|
---|
| 562 | options.keywords.append(arg)
|
---|
| 563 | elif opt in ('-K', '--no-default-keywords'):
|
---|
| 564 | default_keywords = []
|
---|
| 565 | elif opt in ('-n', '--add-location'):
|
---|
| 566 | options.writelocations = 1
|
---|
| 567 | elif opt in ('--no-location',):
|
---|
| 568 | options.writelocations = 0
|
---|
| 569 | elif opt in ('-S', '--style'):
|
---|
| 570 | options.locationstyle = locations.get(arg.lower())
|
---|
| 571 | if options.locationstyle is None:
|
---|
| 572 | usage(1, _('Invalid value for --style: %s') % arg)
|
---|
| 573 | elif opt in ('-o', '--output'):
|
---|
| 574 | options.outfile = arg
|
---|
| 575 | elif opt in ('-p', '--output-dir'):
|
---|
| 576 | options.outpath = arg
|
---|
| 577 | elif opt in ('-v', '--verbose'):
|
---|
| 578 | options.verbose = 1
|
---|
| 579 | elif opt in ('-V', '--version'):
|
---|
| 580 | print _('pygettext.py (xgettext for Python) %s') % __version__
|
---|
| 581 | sys.exit(0)
|
---|
| 582 | elif opt in ('-w', '--width'):
|
---|
| 583 | try:
|
---|
| 584 | options.width = int(arg)
|
---|
| 585 | except ValueError:
|
---|
| 586 | usage(1, _('--width argument must be an integer: %s') % arg)
|
---|
| 587 | elif opt in ('-x', '--exclude-file'):
|
---|
| 588 | options.excludefilename = arg
|
---|
| 589 | elif opt in ('-X', '--no-docstrings'):
|
---|
| 590 | fp = open(arg)
|
---|
| 591 | try:
|
---|
| 592 | while 1:
|
---|
| 593 | line = fp.readline()
|
---|
| 594 | if not line:
|
---|
[391] | 595 | break
|
---|
[2] | 596 | options.nodocstrings[line[:-1]] = 1
|
---|
| 597 | finally:
|
---|
| 598 | fp.close()
|
---|
| 599 |
|
---|
| 600 | # calculate escapes
|
---|
| 601 | make_escapes(not options.escape)
|
---|
| 602 |
|
---|
| 603 | # calculate all keywords
|
---|
| 604 | options.keywords.extend(default_keywords)
|
---|
| 605 |
|
---|
| 606 | # initialize list of strings to exclude
|
---|
| 607 | if options.excludefilename:
|
---|
| 608 | try:
|
---|
| 609 | fp = open(options.excludefilename)
|
---|
| 610 | options.toexclude = fp.readlines()
|
---|
| 611 | fp.close()
|
---|
| 612 | except IOError:
|
---|
| 613 | print >> sys.stderr, _(
|
---|
| 614 | "Can't read --exclude-file: %s") % options.excludefilename
|
---|
| 615 | sys.exit(1)
|
---|
| 616 | else:
|
---|
| 617 | options.toexclude = []
|
---|
| 618 |
|
---|
| 619 | # resolve args to module lists
|
---|
| 620 | expanded = []
|
---|
| 621 | for arg in args:
|
---|
| 622 | if arg == '-':
|
---|
| 623 | expanded.append(arg)
|
---|
| 624 | else:
|
---|
| 625 | expanded.extend(getFilesForName(arg))
|
---|
| 626 | args = expanded
|
---|
| 627 |
|
---|
| 628 | # slurp through all the files
|
---|
| 629 | eater = TokenEater(options)
|
---|
| 630 | for filename in args:
|
---|
| 631 | if filename == '-':
|
---|
| 632 | if options.verbose:
|
---|
| 633 | print _('Reading standard input')
|
---|
| 634 | fp = sys.stdin
|
---|
| 635 | closep = 0
|
---|
| 636 | else:
|
---|
| 637 | if options.verbose:
|
---|
| 638 | print _('Working on %s') % filename
|
---|
| 639 | fp = open(filename)
|
---|
| 640 | closep = 1
|
---|
| 641 | try:
|
---|
| 642 | eater.set_filename(filename)
|
---|
| 643 | try:
|
---|
| 644 | tokenize.tokenize(fp.readline, eater)
|
---|
| 645 | except tokenize.TokenError, e:
|
---|
| 646 | print >> sys.stderr, '%s: %s, line %d, column %d' % (
|
---|
| 647 | e[0], filename, e[1][0], e[1][1])
|
---|
| 648 | finally:
|
---|
| 649 | if closep:
|
---|
| 650 | fp.close()
|
---|
| 651 |
|
---|
| 652 | # write the output
|
---|
| 653 | if options.outfile == '-':
|
---|
| 654 | fp = sys.stdout
|
---|
| 655 | closep = 0
|
---|
| 656 | else:
|
---|
| 657 | if options.outpath:
|
---|
| 658 | options.outfile = os.path.join(options.outpath, options.outfile)
|
---|
| 659 | fp = open(options.outfile, 'w')
|
---|
| 660 | closep = 1
|
---|
| 661 | try:
|
---|
| 662 | eater.write(fp)
|
---|
| 663 | finally:
|
---|
| 664 | if closep:
|
---|
| 665 | fp.close()
|
---|
| 666 |
|
---|
| 667 | |
---|
| 668 |
|
---|
| 669 | if __name__ == '__main__':
|
---|
| 670 | main()
|
---|
| 671 | # some more test strings
|
---|
| 672 | _(u'a unicode string')
|
---|
| 673 | # this one creates a warning
|
---|
| 674 | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
|
---|
| 675 | _('more' 'than' 'one' 'string')
|
---|