| 1 | #! /usr/bin/env python
|
|---|
| 2 | # -*- coding: iso-8859-1 -*-
|
|---|
| 3 | # Originally written by Barry Warsaw <barry@zope.com>
|
|---|
| 4 | #
|
|---|
| 5 | # Minimally patched to make it even more xgettext compatible
|
|---|
| 6 | # by Peter Funk <pf@artcom-gmbh.de>
|
|---|
| 7 | #
|
|---|
| 8 | # 2002-11-22 Jürgen Hermann <jh@web.de>
|
|---|
| 9 | # Added checks that _() only contains string literals, and
|
|---|
| 10 | # command line args are resolved to module lists, i.e. you
|
|---|
| 11 | # can now pass a filename, a module or package name, or a
|
|---|
| 12 | # directory (including globbing chars, important for Win32).
|
|---|
| 13 | # Made docstring fit in 80 chars wide displays using pydoc.
|
|---|
| 14 | #
|
|---|
| 15 |
|
|---|
| 16 | # for selftesting
|
|---|
| 17 | try:
|
|---|
| 18 | import fintl
|
|---|
| 19 | _ = fintl.gettext
|
|---|
| 20 | except ImportError:
|
|---|
| 21 | _ = lambda s: s
|
|---|
| 22 |
|
|---|
| 23 | __doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
|---|
| 24 |
|
|---|
| 25 | Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
|---|
| 26 | internationalization of C programs. Most of these tools are independent of
|
|---|
| 27 | the programming language and can be used from within Python programs.
|
|---|
| 28 | Martin von Loewis' work[1] helps considerably in this regard.
|
|---|
| 29 |
|
|---|
| 30 | There's one problem though; xgettext is the program that scans source code
|
|---|
| 31 | looking for message strings, but it groks only C (or C++). Python
|
|---|
| 32 | introduces a few wrinkles, such as dual quoting characters, triple quoted
|
|---|
| 33 | strings, and raw strings. xgettext understands none of this.
|
|---|
| 34 |
|
|---|
| 35 | Enter pygettext, which uses Python's standard tokenize module to scan
|
|---|
| 36 | Python source code, generating .pot files identical to what GNU xgettext[2]
|
|---|
| 37 | generates for C and C++ code. From there, the standard GNU tools can be
|
|---|
| 38 | used.
|
|---|
| 39 |
|
|---|
| 40 | A word about marking Python strings as candidates for translation. GNU
|
|---|
| 41 | xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
|---|
| 42 | and gettext_noop. But those can be a lot of text to include all over your
|
|---|
| 43 | code. C and C++ have a trick: they use the C preprocessor. Most
|
|---|
| 44 | internationalized C source includes a #define for gettext() to _() so that
|
|---|
| 45 | what has to be written in the source is much less. Thus these are both
|
|---|
| 46 | translatable strings:
|
|---|
| 47 |
|
|---|
| 48 | gettext("Translatable String")
|
|---|
| 49 | _("Translatable String")
|
|---|
| 50 |
|
|---|
| 51 | Python of course has no preprocessor so this doesn't work so well. Thus,
|
|---|
| 52 | pygettext searches only for _() by default, but see the -k/--keyword flag
|
|---|
| 53 | below for how to augment this.
|
|---|
| 54 |
|
|---|
| 55 | [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
|---|
| 56 | [2] http://www.gnu.org/software/gettext/gettext.html
|
|---|
| 57 |
|
|---|
| 58 | NOTE: pygettext attempts to be option and feature compatible with GNU
|
|---|
| 59 | xgettext where ever possible. However some options are still missing or are
|
|---|
| 60 | not fully implemented. Also, xgettext's use of command line switches with
|
|---|
| 61 | option arguments is broken, and in these cases, pygettext just defines
|
|---|
| 62 | additional switches.
|
|---|
| 63 |
|
|---|
| 64 | Usage: pygettext [options] inputfile ...
|
|---|
| 65 |
|
|---|
| 66 | Options:
|
|---|
| 67 |
|
|---|
| 68 | -a
|
|---|
| 69 | --extract-all
|
|---|
| 70 | Extract all strings.
|
|---|
| 71 |
|
|---|
| 72 | -d name
|
|---|
| 73 | --default-domain=name
|
|---|
| 74 | Rename the default output file from messages.pot to name.pot.
|
|---|
| 75 |
|
|---|
| 76 | -E
|
|---|
| 77 | --escape
|
|---|
| 78 | Replace non-ASCII characters with octal escape sequences.
|
|---|
| 79 |
|
|---|
| 80 | -D
|
|---|
| 81 | --docstrings
|
|---|
| 82 | Extract module, class, method, and function docstrings. These do
|
|---|
| 83 | not need to be wrapped in _() markers, and in fact cannot be for
|
|---|
| 84 | Python to consider them docstrings. (See also the -X option).
|
|---|
| 85 |
|
|---|
| 86 | -h
|
|---|
| 87 | --help
|
|---|
| 88 | Print this help message and exit.
|
|---|
| 89 |
|
|---|
| 90 | -k word
|
|---|
| 91 | --keyword=word
|
|---|
| 92 | Keywords to look for in addition to the default set, which are:
|
|---|
| 93 | %(DEFAULTKEYWORDS)s
|
|---|
| 94 |
|
|---|
| 95 | You can have multiple -k flags on the command line.
|
|---|
| 96 |
|
|---|
| 97 | -K
|
|---|
| 98 | --no-default-keywords
|
|---|
| 99 | Disable the default set of keywords (see above). Any keywords
|
|---|
| 100 | explicitly added with the -k/--keyword option are still recognized.
|
|---|
| 101 |
|
|---|
| 102 | --no-location
|
|---|
| 103 | Do not write filename/lineno location comments.
|
|---|
| 104 |
|
|---|
| 105 | -n
|
|---|
| 106 | --add-location
|
|---|
| 107 | Write filename/lineno location comments indicating where each
|
|---|
| 108 | extracted string is found in the source. These lines appear before
|
|---|
| 109 | each msgid. The style of comments is controlled by the -S/--style
|
|---|
| 110 | option. This is the default.
|
|---|
| 111 |
|
|---|
| 112 | -o filename
|
|---|
| 113 | --output=filename
|
|---|
| 114 | Rename the default output file from messages.pot to filename. If
|
|---|
| 115 | filename is `-' then the output is sent to standard out.
|
|---|
| 116 |
|
|---|
| 117 | -p dir
|
|---|
| 118 | --output-dir=dir
|
|---|
| 119 | Output files will be placed in directory dir.
|
|---|
| 120 |
|
|---|
| 121 | -S stylename
|
|---|
| 122 | --style stylename
|
|---|
| 123 | Specify which style to use for location comments. Two styles are
|
|---|
| 124 | supported:
|
|---|
| 125 |
|
|---|
| 126 | Solaris # File: filename, line: line-number
|
|---|
| 127 | GNU #: filename:line
|
|---|
| 128 |
|
|---|
| 129 | The style name is case insensitive. GNU style is the default.
|
|---|
| 130 |
|
|---|
| 131 | -v
|
|---|
| 132 | --verbose
|
|---|
| 133 | Print the names of the files being processed.
|
|---|
| 134 |
|
|---|
| 135 | -V
|
|---|
| 136 | --version
|
|---|
| 137 | Print the version of pygettext and exit.
|
|---|
| 138 |
|
|---|
| 139 | -w columns
|
|---|
| 140 | --width=columns
|
|---|
| 141 | Set width of output to columns.
|
|---|
| 142 |
|
|---|
| 143 | -x filename
|
|---|
| 144 | --exclude-file=filename
|
|---|
| 145 | Specify a file that contains a list of strings that are not be
|
|---|
| 146 | extracted from the input files. Each string to be excluded must
|
|---|
| 147 | appear on a line by itself in the file.
|
|---|
| 148 |
|
|---|
| 149 | -X filename
|
|---|
| 150 | --no-docstrings=filename
|
|---|
| 151 | Specify a file that contains a list of files (one per line) that
|
|---|
| 152 | should not have their docstrings extracted. This is only useful in
|
|---|
| 153 | conjunction with the -D option above.
|
|---|
| 154 |
|
|---|
| 155 | If `inputfile' is -, standard input is read.
|
|---|
| 156 | """)
|
|---|
| 157 |
|
|---|
| 158 | import os
|
|---|
| 159 | import imp
|
|---|
| 160 | import sys
|
|---|
| 161 | import glob
|
|---|
| 162 | import time
|
|---|
| 163 | import getopt
|
|---|
| 164 | import token
|
|---|
| 165 | import tokenize
|
|---|
| 166 | import operator
|
|---|
| 167 |
|
|---|
| 168 | __version__ = '1.5'
|
|---|
| 169 |
|
|---|
| 170 | default_keywords = ['_']
|
|---|
| 171 | DEFAULTKEYWORDS = ', '.join(default_keywords)
|
|---|
| 172 |
|
|---|
| 173 | EMPTYSTRING = ''
|
|---|
| 174 |
|
|---|
| 175 |
|
|---|
| 176 | |
|---|
| 177 |
|
|---|
| 178 | # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
|---|
| 179 | # there.
|
|---|
| 180 | pot_header = _('''\
|
|---|
| 181 | # SOME DESCRIPTIVE TITLE.
|
|---|
| 182 | # Copyright (C) YEAR ORGANIZATION
|
|---|
| 183 | # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
|---|
| 184 | #
|
|---|
| 185 | msgid ""
|
|---|
| 186 | msgstr ""
|
|---|
| 187 | "Project-Id-Version: PACKAGE VERSION\\n"
|
|---|
| 188 | "POT-Creation-Date: %(time)s\\n"
|
|---|
| 189 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
|---|
| 190 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
|---|
| 191 | "Language-Team: LANGUAGE <LL@li.org>\\n"
|
|---|
| 192 | "MIME-Version: 1.0\\n"
|
|---|
| 193 | "Content-Type: text/plain; charset=CHARSET\\n"
|
|---|
| 194 | "Content-Transfer-Encoding: ENCODING\\n"
|
|---|
| 195 | "Generated-By: pygettext.py %(version)s\\n"
|
|---|
| 196 |
|
|---|
| 197 | ''')
|
|---|
| 198 |
|
|---|
| 199 | |
|---|
| 200 |
|
|---|
| 201 | def usage(code, msg=''):
|
|---|
| 202 | print >> sys.stderr, __doc__ % globals()
|
|---|
| 203 | if msg:
|
|---|
| 204 | print >> sys.stderr, msg
|
|---|
| 205 | sys.exit(code)
|
|---|
| 206 |
|
|---|
| 207 |
|
|---|
| 208 | |
|---|
| 209 |
|
|---|
| 210 | escapes = []
|
|---|
| 211 |
|
|---|
| 212 | def make_escapes(pass_iso8859):
|
|---|
| 213 | global escapes
|
|---|
| 214 | escapes = [chr(i) for i in range(256)]
|
|---|
| 215 | if pass_iso8859:
|
|---|
| 216 | # Allow iso-8859 characters to pass through so that e.g. 'msgid
|
|---|
| 217 | # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
|
|---|
| 218 | # escape any character outside the 32..126 range.
|
|---|
| 219 | mod = 128
|
|---|
| 220 | else:
|
|---|
| 221 | mod = 256
|
|---|
| 222 | for i in range(mod):
|
|---|
| 223 | if not(32 <= i <= 126):
|
|---|
| 224 | escapes[i] = "\\%03o" % i
|
|---|
| 225 | escapes[ord('\\')] = '\\\\'
|
|---|
| 226 | escapes[ord('\t')] = '\\t'
|
|---|
| 227 | escapes[ord('\r')] = '\\r'
|
|---|
| 228 | escapes[ord('\n')] = '\\n'
|
|---|
| 229 | escapes[ord('\"')] = '\\"'
|
|---|
| 230 |
|
|---|
| 231 |
|
|---|
| 232 | def escape(s):
|
|---|
| 233 | global escapes
|
|---|
| 234 | s = list(s)
|
|---|
| 235 | for i in range(len(s)):
|
|---|
| 236 | s[i] = escapes[ord(s[i])]
|
|---|
| 237 | return EMPTYSTRING.join(s)
|
|---|
| 238 |
|
|---|
| 239 |
|
|---|
| 240 | def safe_eval(s):
|
|---|
| 241 | # unwrap quotes, safely
|
|---|
| 242 | return eval(s, {'__builtins__':{}}, {})
|
|---|
| 243 |
|
|---|
| 244 |
|
|---|
| 245 | def normalize(s):
|
|---|
| 246 | # This converts the various Python string types into a format that is
|
|---|
| 247 | # appropriate for .po files, namely much closer to C style.
|
|---|
| 248 | lines = s.split('\n')
|
|---|
| 249 | if len(lines) == 1:
|
|---|
| 250 | s = '"' + escape(s) + '"'
|
|---|
| 251 | else:
|
|---|
| 252 | if not lines[-1]:
|
|---|
| 253 | del lines[-1]
|
|---|
| 254 | lines[-1] = lines[-1] + '\n'
|
|---|
| 255 | for i in range(len(lines)):
|
|---|
| 256 | lines[i] = escape(lines[i])
|
|---|
| 257 | lineterm = '\\n"\n"'
|
|---|
| 258 | s = '""\n"' + lineterm.join(lines) + '"'
|
|---|
| 259 | return s
|
|---|
| 260 |
|
|---|
| 261 | |
|---|
| 262 |
|
|---|
| 263 | def containsAny(str, set):
|
|---|
| 264 | """Check whether 'str' contains ANY of the chars in 'set'"""
|
|---|
| 265 | return 1 in [c in str for c in set]
|
|---|
| 266 |
|
|---|
| 267 |
|
|---|
| 268 | def _visit_pyfiles(list, dirname, names):
|
|---|
| 269 | """Helper for getFilesForName()."""
|
|---|
| 270 | # get extension for python source files
|
|---|
| 271 | if not globals().has_key('_py_ext'):
|
|---|
| 272 | global _py_ext
|
|---|
| 273 | _py_ext = [triple[0] for triple in imp.get_suffixes()
|
|---|
| 274 | if triple[2] == imp.PY_SOURCE][0]
|
|---|
| 275 |
|
|---|
| 276 | # don't recurse into CVS directories
|
|---|
| 277 | if 'CVS' in names:
|
|---|
| 278 | names.remove('CVS')
|
|---|
| 279 |
|
|---|
| 280 | # add all *.py files to list
|
|---|
| 281 | list.extend(
|
|---|
| 282 | [os.path.join(dirname, file) for file in names
|
|---|
| 283 | if os.path.splitext(file)[1] == _py_ext]
|
|---|
| 284 | )
|
|---|
| 285 |
|
|---|
| 286 |
|
|---|
| 287 | def _get_modpkg_path(dotted_name, pathlist=None):
|
|---|
| 288 | """Get the filesystem path for a module or a package.
|
|---|
| 289 |
|
|---|
| 290 | Return the file system path to a file for a module, and to a directory for
|
|---|
| 291 | a package. Return None if the name is not found, or is a builtin or
|
|---|
| 292 | extension module.
|
|---|
| 293 | """
|
|---|
| 294 | # split off top-most name
|
|---|
| 295 | parts = dotted_name.split('.', 1)
|
|---|
| 296 |
|
|---|
| 297 | if len(parts) > 1:
|
|---|
| 298 | # we have a dotted path, import top-level package
|
|---|
| 299 | try:
|
|---|
| 300 | file, pathname, description = imp.find_module(parts[0], pathlist)
|
|---|
| 301 | if file: file.close()
|
|---|
| 302 | except ImportError:
|
|---|
| 303 | return None
|
|---|
| 304 |
|
|---|
| 305 | # check if it's indeed a package
|
|---|
| 306 | if description[2] == imp.PKG_DIRECTORY:
|
|---|
| 307 | # recursively handle the remaining name parts
|
|---|
| 308 | pathname = _get_modpkg_path(parts[1], [pathname])
|
|---|
| 309 | else:
|
|---|
| 310 | pathname = None
|
|---|
| 311 | else:
|
|---|
| 312 | # plain name
|
|---|
| 313 | try:
|
|---|
| 314 | file, pathname, description = imp.find_module(
|
|---|
| 315 | dotted_name, pathlist)
|
|---|
| 316 | if file:
|
|---|
| 317 | file.close()
|
|---|
| 318 | if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
|---|
| 319 | pathname = None
|
|---|
| 320 | except ImportError:
|
|---|
| 321 | pathname = None
|
|---|
| 322 |
|
|---|
| 323 | return pathname
|
|---|
| 324 |
|
|---|
| 325 |
|
|---|
| 326 | def getFilesForName(name):
|
|---|
| 327 | """Get a list of module files for a filename, a module or package name,
|
|---|
| 328 | or a directory.
|
|---|
| 329 | """
|
|---|
| 330 | if not os.path.exists(name):
|
|---|
| 331 | # check for glob chars
|
|---|
| 332 | if containsAny(name, "*?[]"):
|
|---|
| 333 | files = glob.glob(name)
|
|---|
| 334 | list = []
|
|---|
| 335 | for file in files:
|
|---|
| 336 | list.extend(getFilesForName(file))
|
|---|
| 337 | return list
|
|---|
| 338 |
|
|---|
| 339 | # try to find module or package
|
|---|
| 340 | name = _get_modpkg_path(name)
|
|---|
| 341 | if not name:
|
|---|
| 342 | return []
|
|---|
| 343 |
|
|---|
| 344 | if os.path.isdir(name):
|
|---|
| 345 | # find all python files in directory
|
|---|
| 346 | list = []
|
|---|
| 347 | os.path.walk(name, _visit_pyfiles, list)
|
|---|
| 348 | return list
|
|---|
| 349 | elif os.path.exists(name):
|
|---|
| 350 | # a single file
|
|---|
| 351 | return [name]
|
|---|
| 352 |
|
|---|
| 353 | return []
|
|---|
| 354 |
|
|---|
| 355 | |
|---|
| 356 |
|
|---|
| 357 | class TokenEater:
|
|---|
| 358 | def __init__(self, options):
|
|---|
| 359 | self.__options = options
|
|---|
| 360 | self.__messages = {}
|
|---|
| 361 | self.__state = self.__waiting
|
|---|
| 362 | self.__data = []
|
|---|
| 363 | self.__lineno = -1
|
|---|
| 364 | self.__freshmodule = 1
|
|---|
| 365 | self.__curfile = None
|
|---|
| 366 |
|
|---|
| 367 | def __call__(self, ttype, tstring, stup, etup, line):
|
|---|
| 368 | # dispatch
|
|---|
| 369 | ## import token
|
|---|
| 370 | ## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
|---|
| 371 | ## 'tstring:', tstring
|
|---|
| 372 | self.__state(ttype, tstring, stup[0])
|
|---|
| 373 |
|
|---|
| 374 | def __waiting(self, ttype, tstring, lineno):
|
|---|
| 375 | opts = self.__options
|
|---|
| 376 | # Do docstring extractions, if enabled
|
|---|
| 377 | if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
|
|---|
| 378 | # module docstring?
|
|---|
| 379 | if self.__freshmodule:
|
|---|
| 380 | if ttype == tokenize.STRING:
|
|---|
| 381 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
|---|
| 382 | self.__freshmodule = 0
|
|---|
| 383 | elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
|---|
| 384 | self.__freshmodule = 0
|
|---|
| 385 | return
|
|---|
| 386 | # class docstring?
|
|---|
| 387 | if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
|---|
| 388 | self.__state = self.__suiteseen
|
|---|
| 389 | return
|
|---|
| 390 | if ttype == tokenize.NAME and tstring in opts.keywords:
|
|---|
| 391 | self.__state = self.__keywordseen
|
|---|
| 392 |
|
|---|
| 393 | def __suiteseen(self, ttype, tstring, lineno):
|
|---|
| 394 | # ignore anything until we see the colon
|
|---|
| 395 | if ttype == tokenize.OP and tstring == ':':
|
|---|
| 396 | self.__state = self.__suitedocstring
|
|---|
| 397 |
|
|---|
| 398 | def __suitedocstring(self, ttype, tstring, lineno):
|
|---|
| 399 | # ignore any intervening noise
|
|---|
| 400 | if ttype == tokenize.STRING:
|
|---|
| 401 | self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
|---|
| 402 | self.__state = self.__waiting
|
|---|
| 403 | elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
|---|
| 404 | tokenize.COMMENT):
|
|---|
| 405 | # there was no class docstring
|
|---|
| 406 | self.__state = self.__waiting
|
|---|
| 407 |
|
|---|
| 408 | def __keywordseen(self, ttype, tstring, lineno):
|
|---|
| 409 | if ttype == tokenize.OP and tstring == '(':
|
|---|
| 410 | self.__data = []
|
|---|
| 411 | self.__lineno = lineno
|
|---|
| 412 | self.__state = self.__openseen
|
|---|
| 413 | else:
|
|---|
| 414 | self.__state = self.__waiting
|
|---|
| 415 |
|
|---|
| 416 | def __openseen(self, ttype, tstring, lineno):
|
|---|
| 417 | if ttype == tokenize.OP and tstring == ')':
|
|---|
| 418 | # We've seen the last of the translatable strings. Record the
|
|---|
| 419 | # line number of the first line of the strings and update the list
|
|---|
| 420 | # of messages seen. Reset state for the next batch. If there
|
|---|
| 421 | # were no strings inside _(), then just ignore this entry.
|
|---|
| 422 | if self.__data:
|
|---|
| 423 | self.__addentry(EMPTYSTRING.join(self.__data))
|
|---|
| 424 | self.__state = self.__waiting
|
|---|
| 425 | elif ttype == tokenize.STRING:
|
|---|
| 426 | self.__data.append(safe_eval(tstring))
|
|---|
| 427 | elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
|---|
| 428 | token.NEWLINE, tokenize.NL]:
|
|---|
| 429 | # warn if we see anything else than STRING or whitespace
|
|---|
| 430 | print >> sys.stderr, _(
|
|---|
| 431 | '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
|
|---|
| 432 | ) % {
|
|---|
| 433 | 'token': tstring,
|
|---|
| 434 | 'file': self.__curfile,
|
|---|
| 435 | 'lineno': self.__lineno
|
|---|
| 436 | }
|
|---|
| 437 | self.__state = self.__waiting
|
|---|
| 438 |
|
|---|
| 439 | def __addentry(self, msg, lineno=None, isdocstring=0):
|
|---|
| 440 | if lineno is None:
|
|---|
| 441 | lineno = self.__lineno
|
|---|
| 442 | if not msg in self.__options.toexclude:
|
|---|
| 443 | entry = (self.__curfile, lineno)
|
|---|
| 444 | self.__messages.setdefault(msg, {})[entry] = isdocstring
|
|---|
| 445 |
|
|---|
| 446 | def set_filename(self, filename):
|
|---|
| 447 | self.__curfile = filename
|
|---|
| 448 | self.__freshmodule = 1
|
|---|
| 449 |
|
|---|
| 450 | def write(self, fp):
|
|---|
| 451 | options = self.__options
|
|---|
| 452 | timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
|
|---|
| 453 | # The time stamp in the header doesn't have the same format as that
|
|---|
| 454 | # generated by xgettext...
|
|---|
| 455 | print >> fp, pot_header % {'time': timestamp, 'version': __version__}
|
|---|
| 456 | # Sort the entries. First sort each particular entry's keys, then
|
|---|
| 457 | # sort all the entries by their first item.
|
|---|
| 458 | reverse = {}
|
|---|
| 459 | for k, v in self.__messages.items():
|
|---|
| 460 | keys = v.keys()
|
|---|
| 461 | keys.sort()
|
|---|
| 462 | reverse.setdefault(tuple(keys), []).append((k, v))
|
|---|
| 463 | rkeys = reverse.keys()
|
|---|
| 464 | rkeys.sort()
|
|---|
| 465 | for rkey in rkeys:
|
|---|
| 466 | rentries = reverse[rkey]
|
|---|
| 467 | rentries.sort()
|
|---|
| 468 | for k, v in rentries:
|
|---|
| 469 | isdocstring = 0
|
|---|
| 470 | # If the entry was gleaned out of a docstring, then add a
|
|---|
| 471 | # comment stating so. This is to aid translators who may wish
|
|---|
| 472 | # to skip translating some unimportant docstrings.
|
|---|
| 473 | if reduce(operator.__add__, v.values()):
|
|---|
| 474 | isdocstring = 1
|
|---|
| 475 | # k is the message string, v is a dictionary-set of (filename,
|
|---|
| 476 | # lineno) tuples. We want to sort the entries in v first by
|
|---|
| 477 | # file name and then by line number.
|
|---|
| 478 | v = v.keys()
|
|---|
| 479 | v.sort()
|
|---|
| 480 | if not options.writelocations:
|
|---|
| 481 | pass
|
|---|
| 482 | # location comments are different b/w Solaris and GNU:
|
|---|
| 483 | elif options.locationstyle == options.SOLARIS:
|
|---|
| 484 | for filename, lineno in v:
|
|---|
| 485 | d = {'filename': filename, 'lineno': lineno}
|
|---|
| 486 | print >>fp, _(
|
|---|
| 487 | '# File: %(filename)s, line: %(lineno)d') % d
|
|---|
| 488 | elif options.locationstyle == options.GNU:
|
|---|
| 489 | # fit as many locations on one line, as long as the
|
|---|
| 490 | # resulting line length doesn't exceeds 'options.width'
|
|---|
| 491 | locline = '#:'
|
|---|
| 492 | for filename, lineno in v:
|
|---|
| 493 | d = {'filename': filename, 'lineno': lineno}
|
|---|
| 494 | s = _(' %(filename)s:%(lineno)d') % d
|
|---|
| 495 | if len(locline) + len(s) <= options.width:
|
|---|
| 496 | locline = locline + s
|
|---|
| 497 | else:
|
|---|
| 498 | print >> fp, locline
|
|---|
| 499 | locline = "#:" + s
|
|---|
| 500 | if len(locline) > 2:
|
|---|
| 501 | print >> fp, locline
|
|---|
| 502 | if isdocstring:
|
|---|
| 503 | print >> fp, '#, docstring'
|
|---|
| 504 | print >> fp, 'msgid', normalize(k)
|
|---|
| 505 | print >> fp, 'msgstr ""\n'
|
|---|
| 506 |
|
|---|
| 507 |
|
|---|
| 508 | |
|---|
| 509 |
|
|---|
| 510 | def main():
|
|---|
| 511 | global default_keywords
|
|---|
| 512 | try:
|
|---|
| 513 | opts, args = getopt.getopt(
|
|---|
| 514 | sys.argv[1:],
|
|---|
| 515 | 'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
|---|
| 516 | ['extract-all', 'default-domain=', 'escape', 'help',
|
|---|
| 517 | 'keyword=', 'no-default-keywords',
|
|---|
| 518 | 'add-location', 'no-location', 'output=', 'output-dir=',
|
|---|
| 519 | 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
|---|
| 520 | 'docstrings', 'no-docstrings',
|
|---|
| 521 | ])
|
|---|
| 522 | except getopt.error, msg:
|
|---|
| 523 | usage(1, msg)
|
|---|
| 524 |
|
|---|
| 525 | # for holding option values
|
|---|
| 526 | class Options:
|
|---|
| 527 | # constants
|
|---|
| 528 | GNU = 1
|
|---|
| 529 | SOLARIS = 2
|
|---|
| 530 | # defaults
|
|---|
| 531 | extractall = 0 # FIXME: currently this option has no effect at all.
|
|---|
| 532 | escape = 0
|
|---|
| 533 | keywords = []
|
|---|
| 534 | outpath = ''
|
|---|
| 535 | outfile = 'messages.pot'
|
|---|
| 536 | writelocations = 1
|
|---|
| 537 | locationstyle = GNU
|
|---|
| 538 | verbose = 0
|
|---|
| 539 | width = 78
|
|---|
| 540 | excludefilename = ''
|
|---|
| 541 | docstrings = 0
|
|---|
| 542 | nodocstrings = {}
|
|---|
| 543 |
|
|---|
| 544 | options = Options()
|
|---|
| 545 | locations = {'gnu' : options.GNU,
|
|---|
| 546 | 'solaris' : options.SOLARIS,
|
|---|
| 547 | }
|
|---|
| 548 |
|
|---|
| 549 | # parse options
|
|---|
| 550 | for opt, arg in opts:
|
|---|
| 551 | if opt in ('-h', '--help'):
|
|---|
| 552 | usage(0)
|
|---|
| 553 | elif opt in ('-a', '--extract-all'):
|
|---|
| 554 | options.extractall = 1
|
|---|
| 555 | elif opt in ('-d', '--default-domain'):
|
|---|
| 556 | options.outfile = arg + '.pot'
|
|---|
| 557 | elif opt in ('-E', '--escape'):
|
|---|
| 558 | options.escape = 1
|
|---|
| 559 | elif opt in ('-D', '--docstrings'):
|
|---|
| 560 | options.docstrings = 1
|
|---|
| 561 | elif opt in ('-k', '--keyword'):
|
|---|
| 562 | options.keywords.append(arg)
|
|---|
| 563 | elif opt in ('-K', '--no-default-keywords'):
|
|---|
| 564 | default_keywords = []
|
|---|
| 565 | elif opt in ('-n', '--add-location'):
|
|---|
| 566 | options.writelocations = 1
|
|---|
| 567 | elif opt in ('--no-location',):
|
|---|
| 568 | options.writelocations = 0
|
|---|
| 569 | elif opt in ('-S', '--style'):
|
|---|
| 570 | options.locationstyle = locations.get(arg.lower())
|
|---|
| 571 | if options.locationstyle is None:
|
|---|
| 572 | usage(1, _('Invalid value for --style: %s') % arg)
|
|---|
| 573 | elif opt in ('-o', '--output'):
|
|---|
| 574 | options.outfile = arg
|
|---|
| 575 | elif opt in ('-p', '--output-dir'):
|
|---|
| 576 | options.outpath = arg
|
|---|
| 577 | elif opt in ('-v', '--verbose'):
|
|---|
| 578 | options.verbose = 1
|
|---|
| 579 | elif opt in ('-V', '--version'):
|
|---|
| 580 | print _('pygettext.py (xgettext for Python) %s') % __version__
|
|---|
| 581 | sys.exit(0)
|
|---|
| 582 | elif opt in ('-w', '--width'):
|
|---|
| 583 | try:
|
|---|
| 584 | options.width = int(arg)
|
|---|
| 585 | except ValueError:
|
|---|
| 586 | usage(1, _('--width argument must be an integer: %s') % arg)
|
|---|
| 587 | elif opt in ('-x', '--exclude-file'):
|
|---|
| 588 | options.excludefilename = arg
|
|---|
| 589 | elif opt in ('-X', '--no-docstrings'):
|
|---|
| 590 | fp = open(arg)
|
|---|
| 591 | try:
|
|---|
| 592 | while 1:
|
|---|
| 593 | line = fp.readline()
|
|---|
| 594 | if not line:
|
|---|
| 595 | break
|
|---|
| 596 | options.nodocstrings[line[:-1]] = 1
|
|---|
| 597 | finally:
|
|---|
| 598 | fp.close()
|
|---|
| 599 |
|
|---|
| 600 | # calculate escapes
|
|---|
| 601 | make_escapes(not options.escape)
|
|---|
| 602 |
|
|---|
| 603 | # calculate all keywords
|
|---|
| 604 | options.keywords.extend(default_keywords)
|
|---|
| 605 |
|
|---|
| 606 | # initialize list of strings to exclude
|
|---|
| 607 | if options.excludefilename:
|
|---|
| 608 | try:
|
|---|
| 609 | fp = open(options.excludefilename)
|
|---|
| 610 | options.toexclude = fp.readlines()
|
|---|
| 611 | fp.close()
|
|---|
| 612 | except IOError:
|
|---|
| 613 | print >> sys.stderr, _(
|
|---|
| 614 | "Can't read --exclude-file: %s") % options.excludefilename
|
|---|
| 615 | sys.exit(1)
|
|---|
| 616 | else:
|
|---|
| 617 | options.toexclude = []
|
|---|
| 618 |
|
|---|
| 619 | # resolve args to module lists
|
|---|
| 620 | expanded = []
|
|---|
| 621 | for arg in args:
|
|---|
| 622 | if arg == '-':
|
|---|
| 623 | expanded.append(arg)
|
|---|
| 624 | else:
|
|---|
| 625 | expanded.extend(getFilesForName(arg))
|
|---|
| 626 | args = expanded
|
|---|
| 627 |
|
|---|
| 628 | # slurp through all the files
|
|---|
| 629 | eater = TokenEater(options)
|
|---|
| 630 | for filename in args:
|
|---|
| 631 | if filename == '-':
|
|---|
| 632 | if options.verbose:
|
|---|
| 633 | print _('Reading standard input')
|
|---|
| 634 | fp = sys.stdin
|
|---|
| 635 | closep = 0
|
|---|
| 636 | else:
|
|---|
| 637 | if options.verbose:
|
|---|
| 638 | print _('Working on %s') % filename
|
|---|
| 639 | fp = open(filename)
|
|---|
| 640 | closep = 1
|
|---|
| 641 | try:
|
|---|
| 642 | eater.set_filename(filename)
|
|---|
| 643 | try:
|
|---|
| 644 | tokenize.tokenize(fp.readline, eater)
|
|---|
| 645 | except tokenize.TokenError, e:
|
|---|
| 646 | print >> sys.stderr, '%s: %s, line %d, column %d' % (
|
|---|
| 647 | e[0], filename, e[1][0], e[1][1])
|
|---|
| 648 | finally:
|
|---|
| 649 | if closep:
|
|---|
| 650 | fp.close()
|
|---|
| 651 |
|
|---|
| 652 | # write the output
|
|---|
| 653 | if options.outfile == '-':
|
|---|
| 654 | fp = sys.stdout
|
|---|
| 655 | closep = 0
|
|---|
| 656 | else:
|
|---|
| 657 | if options.outpath:
|
|---|
| 658 | options.outfile = os.path.join(options.outpath, options.outfile)
|
|---|
| 659 | fp = open(options.outfile, 'w')
|
|---|
| 660 | closep = 1
|
|---|
| 661 | try:
|
|---|
| 662 | eater.write(fp)
|
|---|
| 663 | finally:
|
|---|
| 664 | if closep:
|
|---|
| 665 | fp.close()
|
|---|
| 666 |
|
|---|
| 667 | |
|---|
| 668 |
|
|---|
| 669 | if __name__ == '__main__':
|
|---|
| 670 | main()
|
|---|
| 671 | # some more test strings
|
|---|
| 672 | _(u'a unicode string')
|
|---|
| 673 | # this one creates a warning
|
|---|
| 674 | _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
|
|---|
| 675 | _('more' 'than' 'one' 'string')
|
|---|