| 1 | #ifndef Py_UNICODEOBJECT_H
|
|---|
| 2 | #define Py_UNICODEOBJECT_H
|
|---|
| 3 |
|
|---|
| 4 | #include <stdarg.h>
|
|---|
| 5 |
|
|---|
| 6 | /*
|
|---|
| 7 |
|
|---|
| 8 | Unicode implementation based on original code by Fredrik Lundh,
|
|---|
| 9 | modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
|
|---|
| 10 | Unicode Integration Proposal (see file Misc/unicode.txt).
|
|---|
| 11 |
|
|---|
| 12 | Copyright (c) Corporation for National Research Initiatives.
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 | Original header:
|
|---|
| 16 | --------------------------------------------------------------------
|
|---|
| 17 |
|
|---|
| 18 | * Yet another Unicode string type for Python. This type supports the
|
|---|
| 19 | * 16-bit Basic Multilingual Plane (BMP) only.
|
|---|
| 20 | *
|
|---|
| 21 | * Written by Fredrik Lundh, January 1999.
|
|---|
| 22 | *
|
|---|
| 23 | * Copyright (c) 1999 by Secret Labs AB.
|
|---|
| 24 | * Copyright (c) 1999 by Fredrik Lundh.
|
|---|
| 25 | *
|
|---|
| 26 | * fredrik@pythonware.com
|
|---|
| 27 | * http://www.pythonware.com
|
|---|
| 28 | *
|
|---|
| 29 | * --------------------------------------------------------------------
|
|---|
| 30 | * This Unicode String Type is
|
|---|
| 31 | *
|
|---|
| 32 | * Copyright (c) 1999 by Secret Labs AB
|
|---|
| 33 | * Copyright (c) 1999 by Fredrik Lundh
|
|---|
| 34 | *
|
|---|
| 35 | * By obtaining, using, and/or copying this software and/or its
|
|---|
| 36 | * associated documentation, you agree that you have read, understood,
|
|---|
| 37 | * and will comply with the following terms and conditions:
|
|---|
| 38 | *
|
|---|
| 39 | * Permission to use, copy, modify, and distribute this software and its
|
|---|
| 40 | * associated documentation for any purpose and without fee is hereby
|
|---|
| 41 | * granted, provided that the above copyright notice appears in all
|
|---|
| 42 | * copies, and that both that copyright notice and this permission notice
|
|---|
| 43 | * appear in supporting documentation, and that the name of Secret Labs
|
|---|
| 44 | * AB or the author not be used in advertising or publicity pertaining to
|
|---|
| 45 | * distribution of the software without specific, written prior
|
|---|
| 46 | * permission.
|
|---|
| 47 | *
|
|---|
| 48 | * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
|---|
| 49 | * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|---|
| 50 | * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
|
|---|
| 51 | * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|---|
| 52 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|---|
| 53 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
|---|
| 54 | * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|---|
| 55 | * -------------------------------------------------------------------- */
|
|---|
| 56 |
|
|---|
| 57 | #include <ctype.h>
|
|---|
| 58 |
|
|---|
| 59 | /* === Internal API ======================================================= */
|
|---|
| 60 |
|
|---|
| 61 | /* --- Internal Unicode Format -------------------------------------------- */
|
|---|
| 62 |
|
|---|
| 63 | #ifndef Py_USING_UNICODE
|
|---|
| 64 |
|
|---|
| 65 | #define PyUnicode_Check(op) 0
|
|---|
| 66 | #define PyUnicode_CheckExact(op) 0
|
|---|
| 67 |
|
|---|
| 68 | #else
|
|---|
| 69 |
|
|---|
| 70 | /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
|
|---|
| 71 | properly set, but the default rules below doesn't set it. I'll
|
|---|
| 72 | sort this out some other day -- fredrik@pythonware.com */
|
|---|
| 73 |
|
|---|
| 74 | #ifndef Py_UNICODE_SIZE
|
|---|
| 75 | #error Must define Py_UNICODE_SIZE
|
|---|
| 76 | #endif
|
|---|
| 77 |
|
|---|
| 78 | /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
|
|---|
| 79 | strings are stored as UCS-2 (with limited support for UTF-16) */
|
|---|
| 80 |
|
|---|
| 81 | #if Py_UNICODE_SIZE >= 4
|
|---|
| 82 | #define Py_UNICODE_WIDE
|
|---|
| 83 | #endif
|
|---|
| 84 |
|
|---|
| 85 | /* Set these flags if the platform has "wchar.h", "wctype.h" and the
|
|---|
| 86 | wchar_t type is a 16-bit unsigned type */
|
|---|
| 87 | /* #define HAVE_WCHAR_H */
|
|---|
| 88 | /* #define HAVE_USABLE_WCHAR_T */
|
|---|
| 89 |
|
|---|
| 90 | /* Defaults for various platforms */
|
|---|
| 91 | #ifndef PY_UNICODE_TYPE
|
|---|
| 92 |
|
|---|
| 93 | /* Windows has a usable wchar_t type (unless we're using UCS-4) */
|
|---|
| 94 | # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
|
|---|
| 95 | # define HAVE_USABLE_WCHAR_T
|
|---|
| 96 | # define PY_UNICODE_TYPE wchar_t
|
|---|
| 97 | # endif
|
|---|
| 98 |
|
|---|
| 99 | # if defined(Py_UNICODE_WIDE)
|
|---|
| 100 | # define PY_UNICODE_TYPE Py_UCS4
|
|---|
| 101 | # endif
|
|---|
| 102 |
|
|---|
| 103 | #endif
|
|---|
| 104 |
|
|---|
| 105 | /* If the compiler provides a wchar_t type we try to support it
|
|---|
| 106 | through the interface functions PyUnicode_FromWideChar() and
|
|---|
| 107 | PyUnicode_AsWideChar(). */
|
|---|
| 108 |
|
|---|
| 109 | #ifdef HAVE_USABLE_WCHAR_T
|
|---|
| 110 | # ifndef HAVE_WCHAR_H
|
|---|
| 111 | # define HAVE_WCHAR_H
|
|---|
| 112 | # endif
|
|---|
| 113 | #endif
|
|---|
| 114 |
|
|---|
| 115 | #ifdef HAVE_WCHAR_H
|
|---|
| 116 | /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
|
|---|
| 117 | # ifdef _HAVE_BSDI
|
|---|
| 118 | # include <time.h>
|
|---|
| 119 | # endif
|
|---|
| 120 | # include <wchar.h>
|
|---|
| 121 | #endif
|
|---|
| 122 |
|
|---|
| 123 | /*
|
|---|
| 124 | * Use this typedef when you need to represent a UTF-16 surrogate pair
|
|---|
| 125 | * as single unsigned integer.
|
|---|
| 126 | */
|
|---|
| 127 | #if SIZEOF_INT >= 4
|
|---|
| 128 | typedef unsigned int Py_UCS4;
|
|---|
| 129 | #elif SIZEOF_LONG >= 4
|
|---|
| 130 | typedef unsigned long Py_UCS4;
|
|---|
| 131 | #endif
|
|---|
| 132 |
|
|---|
| 133 | /* Py_UNICODE is the native Unicode storage format (code unit) used by
|
|---|
| 134 | Python and represents a single Unicode element in the Unicode
|
|---|
| 135 | type. */
|
|---|
| 136 |
|
|---|
| 137 | typedef PY_UNICODE_TYPE Py_UNICODE;
|
|---|
| 138 |
|
|---|
| 139 | /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
|
|---|
| 140 |
|
|---|
| 141 | /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
|
|---|
| 142 | produce different external names and thus cause import errors in
|
|---|
| 143 | case Python interpreters and extensions with mixed compiled in
|
|---|
| 144 | Unicode width assumptions are combined. */
|
|---|
| 145 |
|
|---|
| 146 | #ifndef Py_UNICODE_WIDE
|
|---|
| 147 |
|
|---|
| 148 | # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
|
|---|
| 149 | # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
|
|---|
| 150 | # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
|
|---|
| 151 | # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
|
|---|
| 152 | # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
|
|---|
| 153 | # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
|
|---|
| 154 | # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
|
|---|
| 155 | # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
|
|---|
| 156 | # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
|
|---|
| 157 | # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
|
|---|
| 158 | # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
|
|---|
| 159 | # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
|
|---|
| 160 | # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
|
|---|
| 161 | # define PyUnicode_Compare PyUnicodeUCS2_Compare
|
|---|
| 162 | # define PyUnicode_Concat PyUnicodeUCS2_Concat
|
|---|
| 163 | # define PyUnicode_Contains PyUnicodeUCS2_Contains
|
|---|
| 164 | # define PyUnicode_Count PyUnicodeUCS2_Count
|
|---|
| 165 | # define PyUnicode_Decode PyUnicodeUCS2_Decode
|
|---|
| 166 | # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
|
|---|
| 167 | # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
|
|---|
| 168 | # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
|---|
| 169 | # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
|---|
| 170 | # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
|
|---|
| 171 | # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
|
|---|
| 172 | # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
|---|
| 173 | # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
|---|
| 174 | # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
|---|
| 175 | # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
|
|---|
| 176 | # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
|---|
| 177 | # define PyUnicode_Encode PyUnicodeUCS2_Encode
|
|---|
| 178 | # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
|---|
| 179 | # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
|
|---|
| 180 | # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
|
|---|
| 181 | # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
|
|---|
| 182 | # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
|
|---|
| 183 | # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
|
|---|
| 184 | # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
|
|---|
| 185 | # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
|
|---|
| 186 | # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
|
|---|
| 187 | # define PyUnicode_Find PyUnicodeUCS2_Find
|
|---|
| 188 | # define PyUnicode_Format PyUnicodeUCS2_Format
|
|---|
| 189 | # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
|
|---|
| 190 | # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
|
|---|
| 191 | # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
|
|---|
| 192 | # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
|
|---|
| 193 | # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
|
|---|
| 194 | # define PyUnicode_FromString PyUnicodeUCS2_FromString
|
|---|
| 195 | # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
|
|---|
| 196 | # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
|
|---|
| 197 | # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
|
|---|
| 198 | # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
|
|---|
| 199 | # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
|
|---|
| 200 | # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
|
|---|
| 201 | # define PyUnicode_Join PyUnicodeUCS2_Join
|
|---|
| 202 | # define PyUnicode_Partition PyUnicodeUCS2_Partition
|
|---|
| 203 | # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
|
|---|
| 204 | # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
|
|---|
| 205 | # define PyUnicode_Replace PyUnicodeUCS2_Replace
|
|---|
| 206 | # define PyUnicode_Resize PyUnicodeUCS2_Resize
|
|---|
| 207 | # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
|
|---|
| 208 | # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
|
|---|
| 209 | # define PyUnicode_Split PyUnicodeUCS2_Split
|
|---|
| 210 | # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
|
|---|
| 211 | # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
|
|---|
| 212 | # define PyUnicode_Translate PyUnicodeUCS2_Translate
|
|---|
| 213 | # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
|
|---|
| 214 | # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
|
|---|
| 215 | # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
|
|---|
| 216 | # define _PyUnicode_Init _PyUnicodeUCS2_Init
|
|---|
| 217 | # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
|
|---|
| 218 | # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
|
|---|
| 219 | # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
|
|---|
| 220 | # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
|
|---|
| 221 | # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
|
|---|
| 222 | # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
|
|---|
| 223 | # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
|
|---|
| 224 | # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
|
|---|
| 225 | # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
|
|---|
| 226 | # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
|
|---|
| 227 | # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
|
|---|
| 228 | # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
|
|---|
| 229 | # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
|
|---|
| 230 | # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
|
|---|
| 231 | # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
|
|---|
| 232 |
|
|---|
| 233 | #else
|
|---|
| 234 |
|
|---|
| 235 | # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
|
|---|
| 236 | # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
|
|---|
| 237 | # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
|
|---|
| 238 | # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
|
|---|
| 239 | # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
|
|---|
| 240 | # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
|
|---|
| 241 | # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
|
|---|
| 242 | # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
|
|---|
| 243 | # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
|
|---|
| 244 | # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
|
|---|
| 245 | # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
|
|---|
| 246 | # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
|
|---|
| 247 | # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
|
|---|
| 248 | # define PyUnicode_Compare PyUnicodeUCS4_Compare
|
|---|
| 249 | # define PyUnicode_Concat PyUnicodeUCS4_Concat
|
|---|
| 250 | # define PyUnicode_Contains PyUnicodeUCS4_Contains
|
|---|
| 251 | # define PyUnicode_Count PyUnicodeUCS4_Count
|
|---|
| 252 | # define PyUnicode_Decode PyUnicodeUCS4_Decode
|
|---|
| 253 | # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
|
|---|
| 254 | # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
|
|---|
| 255 | # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
|---|
| 256 | # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
|---|
| 257 | # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
|
|---|
| 258 | # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
|
|---|
| 259 | # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
|---|
| 260 | # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
|---|
| 261 | # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
|---|
| 262 | # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
|
|---|
| 263 | # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
|---|
| 264 | # define PyUnicode_Encode PyUnicodeUCS4_Encode
|
|---|
| 265 | # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
|---|
| 266 | # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
|
|---|
| 267 | # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
|
|---|
| 268 | # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
|
|---|
| 269 | # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
|
|---|
| 270 | # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
|
|---|
| 271 | # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
|
|---|
| 272 | # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
|
|---|
| 273 | # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
|
|---|
| 274 | # define PyUnicode_Find PyUnicodeUCS4_Find
|
|---|
| 275 | # define PyUnicode_Format PyUnicodeUCS4_Format
|
|---|
| 276 | # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
|
|---|
| 277 | # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
|
|---|
| 278 | # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
|
|---|
| 279 | # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
|
|---|
| 280 | # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
|
|---|
| 281 | # define PyUnicode_FromString PyUnicodeUCS4_FromString
|
|---|
| 282 | # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
|
|---|
| 283 | # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
|
|---|
| 284 | # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
|
|---|
| 285 | # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
|
|---|
| 286 | # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
|
|---|
| 287 | # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
|
|---|
| 288 | # define PyUnicode_Join PyUnicodeUCS4_Join
|
|---|
| 289 | # define PyUnicode_Partition PyUnicodeUCS4_Partition
|
|---|
| 290 | # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
|
|---|
| 291 | # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
|
|---|
| 292 | # define PyUnicode_Replace PyUnicodeUCS4_Replace
|
|---|
| 293 | # define PyUnicode_Resize PyUnicodeUCS4_Resize
|
|---|
| 294 | # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
|
|---|
| 295 | # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
|
|---|
| 296 | # define PyUnicode_Split PyUnicodeUCS4_Split
|
|---|
| 297 | # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
|
|---|
| 298 | # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
|
|---|
| 299 | # define PyUnicode_Translate PyUnicodeUCS4_Translate
|
|---|
| 300 | # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
|
|---|
| 301 | # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
|
|---|
| 302 | # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
|
|---|
| 303 | # define _PyUnicode_Init _PyUnicodeUCS4_Init
|
|---|
| 304 | # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
|
|---|
| 305 | # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
|
|---|
| 306 | # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
|
|---|
| 307 | # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
|
|---|
| 308 | # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
|
|---|
| 309 | # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
|
|---|
| 310 | # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
|
|---|
| 311 | # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
|
|---|
| 312 | # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
|
|---|
| 313 | # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
|
|---|
| 314 | # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
|
|---|
| 315 | # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
|
|---|
| 316 | # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
|
|---|
| 317 | # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
|
|---|
| 318 | # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
|
|---|
| 319 |
|
|---|
| 320 |
|
|---|
| 321 | #endif
|
|---|
| 322 |
|
|---|
| 323 | /* --- Internal Unicode Operations ---------------------------------------- */
|
|---|
| 324 |
|
|---|
| 325 | /* If you want Python to use the compiler's wctype.h functions instead
|
|---|
| 326 | of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
|
|---|
| 327 | configure Python using --with-wctype-functions. This reduces the
|
|---|
| 328 | interpreter's code size. */
|
|---|
| 329 |
|
|---|
| 330 | #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
|
|---|
| 331 |
|
|---|
| 332 | #include <wctype.h>
|
|---|
| 333 |
|
|---|
| 334 | #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
|
|---|
| 335 |
|
|---|
| 336 | #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
|
|---|
| 337 | #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
|
|---|
| 338 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
|
|---|
| 339 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
|
|---|
| 340 |
|
|---|
| 341 | #define Py_UNICODE_TOLOWER(ch) towlower(ch)
|
|---|
| 342 | #define Py_UNICODE_TOUPPER(ch) towupper(ch)
|
|---|
| 343 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
|
|---|
| 344 |
|
|---|
| 345 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
|
|---|
| 346 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
|
|---|
| 347 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
|
|---|
| 348 |
|
|---|
| 349 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
|
|---|
| 350 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
|
|---|
| 351 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
|
|---|
| 352 |
|
|---|
| 353 | #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
|
|---|
| 354 |
|
|---|
| 355 | #else
|
|---|
| 356 |
|
|---|
| 357 | /* Since splitting on whitespace is an important use case, and
|
|---|
| 358 | whitespace in most situations is solely ASCII whitespace, we
|
|---|
| 359 | optimize for the common case by using a quick look-up table
|
|---|
| 360 | _Py_ascii_whitespace (see below) with an inlined check.
|
|---|
| 361 |
|
|---|
| 362 | */
|
|---|
| 363 | #define Py_UNICODE_ISSPACE(ch) \
|
|---|
| 364 | ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
|
|---|
| 365 |
|
|---|
| 366 | #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
|
|---|
| 367 | #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
|
|---|
| 368 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
|
|---|
| 369 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
|
|---|
| 370 |
|
|---|
| 371 | #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
|
|---|
| 372 | #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
|
|---|
| 373 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
|
|---|
| 374 |
|
|---|
| 375 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
|
|---|
| 376 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
|
|---|
| 377 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
|
|---|
| 378 |
|
|---|
| 379 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
|
|---|
| 380 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
|
|---|
| 381 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
|
|---|
| 382 |
|
|---|
| 383 | #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
|
|---|
| 384 |
|
|---|
| 385 | #endif
|
|---|
| 386 |
|
|---|
| 387 | #define Py_UNICODE_ISALNUM(ch) \
|
|---|
| 388 | (Py_UNICODE_ISALPHA(ch) || \
|
|---|
| 389 | Py_UNICODE_ISDECIMAL(ch) || \
|
|---|
| 390 | Py_UNICODE_ISDIGIT(ch) || \
|
|---|
| 391 | Py_UNICODE_ISNUMERIC(ch))
|
|---|
| 392 |
|
|---|
| 393 | #define Py_UNICODE_COPY(target, source, length) \
|
|---|
| 394 | Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
|
|---|
| 395 |
|
|---|
| 396 | #define Py_UNICODE_FILL(target, value, length) \
|
|---|
| 397 | do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
|
|---|
| 398 | for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
|
|---|
| 399 | } while (0)
|
|---|
| 400 |
|
|---|
| 401 | /* Check if substring matches at given offset. the offset must be
|
|---|
| 402 | valid, and the substring must not be empty */
|
|---|
| 403 |
|
|---|
| 404 | #define Py_UNICODE_MATCH(string, offset, substring) \
|
|---|
| 405 | ((*((string)->str + (offset)) == *((substring)->str)) && \
|
|---|
| 406 | ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
|
|---|
| 407 | !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
|
|---|
| 408 |
|
|---|
| 409 | #ifdef __cplusplus
|
|---|
| 410 | extern "C" {
|
|---|
| 411 | #endif
|
|---|
| 412 |
|
|---|
| 413 | /* --- Unicode Type ------------------------------------------------------- */
|
|---|
| 414 |
|
|---|
| 415 | typedef struct {
|
|---|
| 416 | PyObject_HEAD
|
|---|
| 417 | Py_ssize_t length; /* Length of raw Unicode data in buffer */
|
|---|
| 418 | Py_UNICODE *str; /* Raw Unicode buffer */
|
|---|
| 419 | long hash; /* Hash value; -1 if not set */
|
|---|
| 420 | PyObject *defenc; /* (Default) Encoded version as Python
|
|---|
| 421 | string, or NULL; this is used for
|
|---|
| 422 | implementing the buffer protocol */
|
|---|
| 423 | } PyUnicodeObject;
|
|---|
| 424 |
|
|---|
| 425 | PyAPI_DATA(PyTypeObject) PyUnicode_Type;
|
|---|
| 426 |
|
|---|
| 427 | #define PyUnicode_Check(op) \
|
|---|
| 428 | PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
|
|---|
| 429 | #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
|
|---|
| 430 |
|
|---|
| 431 | /* Fast access macros */
|
|---|
| 432 | #define PyUnicode_GET_SIZE(op) \
|
|---|
| 433 | (((PyUnicodeObject *)(op))->length)
|
|---|
| 434 | #define PyUnicode_GET_DATA_SIZE(op) \
|
|---|
| 435 | (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
|
|---|
| 436 | #define PyUnicode_AS_UNICODE(op) \
|
|---|
| 437 | (((PyUnicodeObject *)(op))->str)
|
|---|
| 438 | #define PyUnicode_AS_DATA(op) \
|
|---|
| 439 | ((const char *)((PyUnicodeObject *)(op))->str)
|
|---|
| 440 |
|
|---|
| 441 | /* --- Constants ---------------------------------------------------------- */
|
|---|
| 442 |
|
|---|
| 443 | /* This Unicode character will be used as replacement character during
|
|---|
| 444 | decoding if the errors argument is set to "replace". Note: the
|
|---|
| 445 | Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
|
|---|
| 446 | Unicode 3.0. */
|
|---|
| 447 |
|
|---|
| 448 | #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
|
|---|
| 449 |
|
|---|
| 450 | /* === Public API ========================================================= */
|
|---|
| 451 |
|
|---|
| 452 | /* --- Plain Py_UNICODE --------------------------------------------------- */
|
|---|
| 453 |
|
|---|
| 454 | /* Create a Unicode Object from the Py_UNICODE buffer u of the given
|
|---|
| 455 | size.
|
|---|
| 456 |
|
|---|
| 457 | u may be NULL which causes the contents to be undefined. It is the
|
|---|
| 458 | user's responsibility to fill in the needed data afterwards. Note
|
|---|
| 459 | that modifying the Unicode object contents after construction is
|
|---|
| 460 | only allowed if u was set to NULL.
|
|---|
| 461 |
|
|---|
| 462 | The buffer is copied into the new object. */
|
|---|
| 463 |
|
|---|
| 464 | PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
|
|---|
| 465 | const Py_UNICODE *u, /* Unicode buffer */
|
|---|
| 466 | Py_ssize_t size /* size of buffer */
|
|---|
| 467 | );
|
|---|
| 468 |
|
|---|
| 469 | /* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
|
|---|
| 470 | PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
|
|---|
| 471 | const char *u, /* char buffer */
|
|---|
| 472 | Py_ssize_t size /* size of buffer */
|
|---|
| 473 | );
|
|---|
| 474 |
|
|---|
| 475 | /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
|
|---|
| 476 | Latin-1 encoded bytes */
|
|---|
| 477 | PyAPI_FUNC(PyObject*) PyUnicode_FromString(
|
|---|
| 478 | const char *u /* string */
|
|---|
| 479 | );
|
|---|
| 480 |
|
|---|
| 481 | /* Return a read-only pointer to the Unicode object's internal
|
|---|
| 482 | Py_UNICODE buffer. */
|
|---|
| 483 |
|
|---|
| 484 | PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
|
|---|
| 485 | PyObject *unicode /* Unicode object */
|
|---|
| 486 | );
|
|---|
| 487 |
|
|---|
| 488 | /* Get the length of the Unicode object. */
|
|---|
| 489 |
|
|---|
| 490 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
|
|---|
| 491 | PyObject *unicode /* Unicode object */
|
|---|
| 492 | );
|
|---|
| 493 |
|
|---|
| 494 | /* Get the maximum ordinal for a Unicode character. */
|
|---|
| 495 | PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
|
|---|
| 496 |
|
|---|
| 497 | /* Resize an already allocated Unicode object to the new size length.
|
|---|
| 498 |
|
|---|
| 499 | *unicode is modified to point to the new (resized) object and 0
|
|---|
| 500 | returned on success.
|
|---|
| 501 |
|
|---|
| 502 | This API may only be called by the function which also called the
|
|---|
| 503 | Unicode constructor. The refcount on the object must be 1. Otherwise,
|
|---|
| 504 | an error is returned.
|
|---|
| 505 |
|
|---|
| 506 | Error handling is implemented as follows: an exception is set, -1
|
|---|
| 507 | is returned and *unicode left untouched.
|
|---|
| 508 |
|
|---|
| 509 | */
|
|---|
| 510 |
|
|---|
| 511 | PyAPI_FUNC(int) PyUnicode_Resize(
|
|---|
| 512 | PyObject **unicode, /* Pointer to the Unicode object */
|
|---|
| 513 | Py_ssize_t length /* New length */
|
|---|
| 514 | );
|
|---|
| 515 |
|
|---|
| 516 | /* Coerce obj to an Unicode object and return a reference with
|
|---|
| 517 | *incremented* refcount.
|
|---|
| 518 |
|
|---|
| 519 | Coercion is done in the following way:
|
|---|
| 520 |
|
|---|
| 521 | 1. String and other char buffer compatible objects are decoded
|
|---|
| 522 | under the assumptions that they contain data using the current
|
|---|
| 523 | default encoding. Decoding is done in "strict" mode.
|
|---|
| 524 |
|
|---|
| 525 | 2. All other objects (including Unicode objects) raise an
|
|---|
| 526 | exception.
|
|---|
| 527 |
|
|---|
| 528 | The API returns NULL in case of an error. The caller is responsible
|
|---|
| 529 | for decref'ing the returned objects.
|
|---|
| 530 |
|
|---|
| 531 | */
|
|---|
| 532 |
|
|---|
| 533 | PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
|
|---|
| 534 | register PyObject *obj, /* Object */
|
|---|
| 535 | const char *encoding, /* encoding */
|
|---|
| 536 | const char *errors /* error handling */
|
|---|
| 537 | );
|
|---|
| 538 |
|
|---|
| 539 | /* Coerce obj to an Unicode object and return a reference with
|
|---|
| 540 | *incremented* refcount.
|
|---|
| 541 |
|
|---|
| 542 | Unicode objects are passed back as-is (subclasses are converted to
|
|---|
| 543 | true Unicode objects), all other objects are delegated to
|
|---|
| 544 | PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
|
|---|
| 545 | using the default encoding as basis for decoding the object.
|
|---|
| 546 |
|
|---|
| 547 | The API returns NULL in case of an error. The caller is responsible
|
|---|
| 548 | for decref'ing the returned objects.
|
|---|
| 549 |
|
|---|
| 550 | */
|
|---|
| 551 |
|
|---|
| 552 | PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
|
|---|
| 553 | register PyObject *obj /* Object */
|
|---|
| 554 | );
|
|---|
| 555 |
|
|---|
| 556 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
|
|---|
| 557 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
|
|---|
| 558 |
|
|---|
| 559 | /* Format the object based on the format_spec, as defined in PEP 3101
|
|---|
| 560 | (Advanced String Formatting). */
|
|---|
| 561 | PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
|
|---|
| 562 | Py_UNICODE *format_spec,
|
|---|
| 563 | Py_ssize_t format_spec_len);
|
|---|
| 564 |
|
|---|
| 565 | /* --- wchar_t support for platforms which support it --------------------- */
|
|---|
| 566 |
|
|---|
| 567 | #ifdef HAVE_WCHAR_H
|
|---|
| 568 |
|
|---|
| 569 | /* Create a Unicode Object from the whcar_t buffer w of the given
|
|---|
| 570 | size.
|
|---|
| 571 |
|
|---|
| 572 | The buffer is copied into the new object. */
|
|---|
| 573 |
|
|---|
| 574 | PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
|
|---|
| 575 | register const wchar_t *w, /* wchar_t buffer */
|
|---|
| 576 | Py_ssize_t size /* size of buffer */
|
|---|
| 577 | );
|
|---|
| 578 |
|
|---|
| 579 | /* Copies the Unicode Object contents into the wchar_t buffer w. At
|
|---|
| 580 | most size wchar_t characters are copied.
|
|---|
| 581 |
|
|---|
| 582 | Note that the resulting wchar_t string may or may not be
|
|---|
| 583 | 0-terminated. It is the responsibility of the caller to make sure
|
|---|
| 584 | that the wchar_t string is 0-terminated in case this is required by
|
|---|
| 585 | the application.
|
|---|
| 586 |
|
|---|
| 587 | Returns the number of wchar_t characters copied (excluding a
|
|---|
| 588 | possibly trailing 0-termination character) or -1 in case of an
|
|---|
| 589 | error. */
|
|---|
| 590 |
|
|---|
| 591 | PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
|
|---|
| 592 | PyUnicodeObject *unicode, /* Unicode object */
|
|---|
| 593 | register wchar_t *w, /* wchar_t buffer */
|
|---|
| 594 | Py_ssize_t size /* size of buffer */
|
|---|
| 595 | );
|
|---|
| 596 |
|
|---|
| 597 | #endif
|
|---|
| 598 |
|
|---|
| 599 | /* --- Unicode ordinals --------------------------------------------------- */
|
|---|
| 600 |
|
|---|
| 601 | /* Create a Unicode Object from the given Unicode code point ordinal.
|
|---|
| 602 |
|
|---|
| 603 | The ordinal must be in range(0x10000) on narrow Python builds
|
|---|
| 604 | (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
|
|---|
| 605 | raised in case it is not.
|
|---|
| 606 |
|
|---|
| 607 | */
|
|---|
| 608 |
|
|---|
| 609 | PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
|
|---|
| 610 |
|
|---|
| 611 | /* --- Free-list management ----------------------------------------------- */
|
|---|
| 612 |
|
|---|
| 613 | /* Clear the free list used by the Unicode implementation.
|
|---|
| 614 |
|
|---|
| 615 | This can be used to release memory used for objects on the free
|
|---|
| 616 | list back to the Python memory allocator.
|
|---|
| 617 |
|
|---|
| 618 | */
|
|---|
| 619 |
|
|---|
| 620 | PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
|
|---|
| 621 |
|
|---|
| 622 | /* === Builtin Codecs =====================================================
|
|---|
| 623 |
|
|---|
| 624 | Many of these APIs take two arguments encoding and errors. These
|
|---|
| 625 | parameters encoding and errors have the same semantics as the ones
|
|---|
| 626 | of the builtin unicode() API.
|
|---|
| 627 |
|
|---|
| 628 | Setting encoding to NULL causes the default encoding to be used.
|
|---|
| 629 |
|
|---|
| 630 | Error handling is set by errors which may also be set to NULL
|
|---|
| 631 | meaning to use the default handling defined for the codec. Default
|
|---|
| 632 | error handling for all builtin codecs is "strict" (ValueErrors are
|
|---|
| 633 | raised).
|
|---|
| 634 |
|
|---|
| 635 | The codecs all use a similar interface. Only deviation from the
|
|---|
| 636 | generic ones are documented.
|
|---|
| 637 |
|
|---|
| 638 | */
|
|---|
| 639 |
|
|---|
| 640 | /* --- Manage the default encoding ---------------------------------------- */
|
|---|
| 641 |
|
|---|
| 642 | /* Return a Python string holding the default encoded value of the
|
|---|
| 643 | Unicode object.
|
|---|
| 644 |
|
|---|
| 645 | The resulting string is cached in the Unicode object for subsequent
|
|---|
| 646 | usage by this function. The cached version is needed to implement
|
|---|
| 647 | the character buffer interface and will live (at least) as long as
|
|---|
| 648 | the Unicode object itself.
|
|---|
| 649 |
|
|---|
| 650 | The refcount of the string is *not* incremented.
|
|---|
| 651 |
|
|---|
| 652 | *** Exported for internal use by the interpreter only !!! ***
|
|---|
| 653 |
|
|---|
| 654 | */
|
|---|
| 655 |
|
|---|
| 656 | PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
|
|---|
| 657 | PyObject *, const char *);
|
|---|
| 658 |
|
|---|
| 659 | /* Returns the currently active default encoding.
|
|---|
| 660 |
|
|---|
| 661 | The default encoding is currently implemented as run-time settable
|
|---|
| 662 | process global. This may change in future versions of the
|
|---|
| 663 | interpreter to become a parameter which is managed on a per-thread
|
|---|
| 664 | basis.
|
|---|
| 665 |
|
|---|
| 666 | */
|
|---|
| 667 |
|
|---|
| 668 | PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
|
|---|
| 669 |
|
|---|
| 670 | /* Sets the currently active default encoding.
|
|---|
| 671 |
|
|---|
| 672 | Returns 0 on success, -1 in case of an error.
|
|---|
| 673 |
|
|---|
| 674 | */
|
|---|
| 675 |
|
|---|
| 676 | PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
|
|---|
| 677 | const char *encoding /* Encoding name in standard form */
|
|---|
| 678 | );
|
|---|
| 679 |
|
|---|
| 680 | /* --- Generic Codecs ----------------------------------------------------- */
|
|---|
| 681 |
|
|---|
| 682 | /* Create a Unicode object by decoding the encoded string s of the
|
|---|
| 683 | given size. */
|
|---|
| 684 |
|
|---|
| 685 | PyAPI_FUNC(PyObject*) PyUnicode_Decode(
|
|---|
| 686 | const char *s, /* encoded string */
|
|---|
| 687 | Py_ssize_t size, /* size of buffer */
|
|---|
| 688 | const char *encoding, /* encoding */
|
|---|
| 689 | const char *errors /* error handling */
|
|---|
| 690 | );
|
|---|
| 691 |
|
|---|
| 692 | /* Encodes a Py_UNICODE buffer of the given size and returns a
|
|---|
| 693 | Python string object. */
|
|---|
| 694 |
|
|---|
| 695 | PyAPI_FUNC(PyObject*) PyUnicode_Encode(
|
|---|
| 696 | const Py_UNICODE *s, /* Unicode char buffer */
|
|---|
| 697 | Py_ssize_t size, /* number of Py_UNICODE chars to encode */
|
|---|
| 698 | const char *encoding, /* encoding */
|
|---|
| 699 | const char *errors /* error handling */
|
|---|
| 700 | );
|
|---|
| 701 |
|
|---|
| 702 | /* Encodes a Unicode object and returns the result as Python
|
|---|
| 703 | object. */
|
|---|
| 704 |
|
|---|
| 705 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
|
|---|
| 706 | PyObject *unicode, /* Unicode object */
|
|---|
| 707 | const char *encoding, /* encoding */
|
|---|
| 708 | const char *errors /* error handling */
|
|---|
| 709 | );
|
|---|
| 710 |
|
|---|
| 711 | /* Encodes a Unicode object and returns the result as Python string
|
|---|
| 712 | object. */
|
|---|
| 713 |
|
|---|
| 714 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
|
|---|
| 715 | PyObject *unicode, /* Unicode object */
|
|---|
| 716 | const char *encoding, /* encoding */
|
|---|
| 717 | const char *errors /* error handling */
|
|---|
| 718 | );
|
|---|
| 719 |
|
|---|
| 720 | PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
|
|---|
| 721 | PyObject* string /* 256 character map */
|
|---|
| 722 | );
|
|---|
| 723 |
|
|---|
| 724 |
|
|---|
| 725 | /* --- UTF-7 Codecs ------------------------------------------------------- */
|
|---|
| 726 |
|
|---|
| 727 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
|
|---|
| 728 | const char *string, /* UTF-7 encoded string */
|
|---|
| 729 | Py_ssize_t length, /* size of string */
|
|---|
| 730 | const char *errors /* error handling */
|
|---|
| 731 | );
|
|---|
| 732 |
|
|---|
| 733 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
|
|---|
| 734 | const char *string, /* UTF-7 encoded string */
|
|---|
| 735 | Py_ssize_t length, /* size of string */
|
|---|
| 736 | const char *errors, /* error handling */
|
|---|
| 737 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 738 | );
|
|---|
| 739 |
|
|---|
| 740 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
|
|---|
| 741 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 742 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 743 | int base64SetO, /* Encode RFC2152 Set O characters in base64 */
|
|---|
| 744 | int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
|
|---|
| 745 | const char *errors /* error handling */
|
|---|
| 746 | );
|
|---|
| 747 |
|
|---|
| 748 | /* --- UTF-8 Codecs ------------------------------------------------------- */
|
|---|
| 749 |
|
|---|
| 750 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
|
|---|
| 751 | const char *string, /* UTF-8 encoded string */
|
|---|
| 752 | Py_ssize_t length, /* size of string */
|
|---|
| 753 | const char *errors /* error handling */
|
|---|
| 754 | );
|
|---|
| 755 |
|
|---|
| 756 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
|
|---|
| 757 | const char *string, /* UTF-8 encoded string */
|
|---|
| 758 | Py_ssize_t length, /* size of string */
|
|---|
| 759 | const char *errors, /* error handling */
|
|---|
| 760 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 761 | );
|
|---|
| 762 |
|
|---|
| 763 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
|---|
| 764 | PyObject *unicode /* Unicode object */
|
|---|
| 765 | );
|
|---|
| 766 |
|
|---|
| 767 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
|
|---|
| 768 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 769 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 770 | const char *errors /* error handling */
|
|---|
| 771 | );
|
|---|
| 772 |
|
|---|
| 773 | /* --- UTF-32 Codecs ------------------------------------------------------ */
|
|---|
| 774 |
|
|---|
| 775 | /* Decodes length bytes from a UTF-32 encoded buffer string and returns
|
|---|
| 776 | the corresponding Unicode object.
|
|---|
| 777 |
|
|---|
| 778 | errors (if non-NULL) defines the error handling. It defaults
|
|---|
| 779 | to "strict".
|
|---|
| 780 |
|
|---|
| 781 | If byteorder is non-NULL, the decoder starts decoding using the
|
|---|
| 782 | given byte order:
|
|---|
| 783 |
|
|---|
| 784 | *byteorder == -1: little endian
|
|---|
| 785 | *byteorder == 0: native order
|
|---|
| 786 | *byteorder == 1: big endian
|
|---|
| 787 |
|
|---|
| 788 | In native mode, the first four bytes of the stream are checked for a
|
|---|
| 789 | BOM mark. If found, the BOM mark is analysed, the byte order
|
|---|
| 790 | adjusted and the BOM skipped. In the other modes, no BOM mark
|
|---|
| 791 | interpretation is done. After completion, *byteorder is set to the
|
|---|
| 792 | current byte order at the end of input data.
|
|---|
| 793 |
|
|---|
| 794 | If byteorder is NULL, the codec starts in native order mode.
|
|---|
| 795 |
|
|---|
| 796 | */
|
|---|
| 797 |
|
|---|
| 798 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
|
|---|
| 799 | const char *string, /* UTF-32 encoded string */
|
|---|
| 800 | Py_ssize_t length, /* size of string */
|
|---|
| 801 | const char *errors, /* error handling */
|
|---|
| 802 | int *byteorder /* pointer to byteorder to use
|
|---|
| 803 | 0=native;-1=LE,1=BE; updated on
|
|---|
| 804 | exit */
|
|---|
| 805 | );
|
|---|
| 806 |
|
|---|
| 807 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
|
|---|
| 808 | const char *string, /* UTF-32 encoded string */
|
|---|
| 809 | Py_ssize_t length, /* size of string */
|
|---|
| 810 | const char *errors, /* error handling */
|
|---|
| 811 | int *byteorder, /* pointer to byteorder to use
|
|---|
| 812 | 0=native;-1=LE,1=BE; updated on
|
|---|
| 813 | exit */
|
|---|
| 814 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 815 | );
|
|---|
| 816 |
|
|---|
| 817 | /* Returns a Python string using the UTF-32 encoding in native byte
|
|---|
| 818 | order. The string always starts with a BOM mark. */
|
|---|
| 819 |
|
|---|
| 820 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
|
|---|
| 821 | PyObject *unicode /* Unicode object */
|
|---|
| 822 | );
|
|---|
| 823 |
|
|---|
| 824 | /* Returns a Python string object holding the UTF-32 encoded value of
|
|---|
| 825 | the Unicode data.
|
|---|
| 826 |
|
|---|
| 827 | If byteorder is not 0, output is written according to the following
|
|---|
| 828 | byte order:
|
|---|
| 829 |
|
|---|
| 830 | byteorder == -1: little endian
|
|---|
| 831 | byteorder == 0: native byte order (writes a BOM mark)
|
|---|
| 832 | byteorder == 1: big endian
|
|---|
| 833 |
|
|---|
| 834 | If byteorder is 0, the output string will always start with the
|
|---|
| 835 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
|
|---|
| 836 | prepended.
|
|---|
| 837 |
|
|---|
| 838 | */
|
|---|
| 839 |
|
|---|
| 840 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
|
|---|
| 841 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 842 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 843 | const char *errors, /* error handling */
|
|---|
| 844 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
|
|---|
| 845 | );
|
|---|
| 846 |
|
|---|
| 847 | /* --- UTF-16 Codecs ------------------------------------------------------ */
|
|---|
| 848 |
|
|---|
| 849 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns
|
|---|
| 850 | the corresponding Unicode object.
|
|---|
| 851 |
|
|---|
| 852 | errors (if non-NULL) defines the error handling. It defaults
|
|---|
| 853 | to "strict".
|
|---|
| 854 |
|
|---|
| 855 | If byteorder is non-NULL, the decoder starts decoding using the
|
|---|
| 856 | given byte order:
|
|---|
| 857 |
|
|---|
| 858 | *byteorder == -1: little endian
|
|---|
| 859 | *byteorder == 0: native order
|
|---|
| 860 | *byteorder == 1: big endian
|
|---|
| 861 |
|
|---|
| 862 | In native mode, the first two bytes of the stream are checked for a
|
|---|
| 863 | BOM mark. If found, the BOM mark is analysed, the byte order
|
|---|
| 864 | adjusted and the BOM skipped. In the other modes, no BOM mark
|
|---|
| 865 | interpretation is done. After completion, *byteorder is set to the
|
|---|
| 866 | current byte order at the end of input data.
|
|---|
| 867 |
|
|---|
| 868 | If byteorder is NULL, the codec starts in native order mode.
|
|---|
| 869 |
|
|---|
| 870 | */
|
|---|
| 871 |
|
|---|
| 872 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
|
|---|
| 873 | const char *string, /* UTF-16 encoded string */
|
|---|
| 874 | Py_ssize_t length, /* size of string */
|
|---|
| 875 | const char *errors, /* error handling */
|
|---|
| 876 | int *byteorder /* pointer to byteorder to use
|
|---|
| 877 | 0=native;-1=LE,1=BE; updated on
|
|---|
| 878 | exit */
|
|---|
| 879 | );
|
|---|
| 880 |
|
|---|
| 881 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
|
|---|
| 882 | const char *string, /* UTF-16 encoded string */
|
|---|
| 883 | Py_ssize_t length, /* size of string */
|
|---|
| 884 | const char *errors, /* error handling */
|
|---|
| 885 | int *byteorder, /* pointer to byteorder to use
|
|---|
| 886 | 0=native;-1=LE,1=BE; updated on
|
|---|
| 887 | exit */
|
|---|
| 888 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 889 | );
|
|---|
| 890 |
|
|---|
| 891 | /* Returns a Python string using the UTF-16 encoding in native byte
|
|---|
| 892 | order. The string always starts with a BOM mark. */
|
|---|
| 893 |
|
|---|
| 894 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
|
|---|
| 895 | PyObject *unicode /* Unicode object */
|
|---|
| 896 | );
|
|---|
| 897 |
|
|---|
| 898 | /* Returns a Python string object holding the UTF-16 encoded value of
|
|---|
| 899 | the Unicode data.
|
|---|
| 900 |
|
|---|
| 901 | If byteorder is not 0, output is written according to the following
|
|---|
| 902 | byte order:
|
|---|
| 903 |
|
|---|
| 904 | byteorder == -1: little endian
|
|---|
| 905 | byteorder == 0: native byte order (writes a BOM mark)
|
|---|
| 906 | byteorder == 1: big endian
|
|---|
| 907 |
|
|---|
| 908 | If byteorder is 0, the output string will always start with the
|
|---|
| 909 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
|
|---|
| 910 | prepended.
|
|---|
| 911 |
|
|---|
| 912 | Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
|
|---|
| 913 | UCS-2. This trick makes it possible to add full UTF-16 capabilities
|
|---|
| 914 | at a later point without compromising the APIs.
|
|---|
| 915 |
|
|---|
| 916 | */
|
|---|
| 917 |
|
|---|
| 918 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
|
|---|
| 919 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 920 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 921 | const char *errors, /* error handling */
|
|---|
| 922 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
|
|---|
| 923 | );
|
|---|
| 924 |
|
|---|
| 925 | /* --- Unicode-Escape Codecs ---------------------------------------------- */
|
|---|
| 926 |
|
|---|
| 927 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
|
|---|
| 928 | const char *string, /* Unicode-Escape encoded string */
|
|---|
| 929 | Py_ssize_t length, /* size of string */
|
|---|
| 930 | const char *errors /* error handling */
|
|---|
| 931 | );
|
|---|
| 932 |
|
|---|
| 933 | PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
|
|---|
| 934 | PyObject *unicode /* Unicode object */
|
|---|
| 935 | );
|
|---|
| 936 |
|
|---|
| 937 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
|
|---|
| 938 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 939 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
|---|
| 940 | );
|
|---|
| 941 |
|
|---|
| 942 | /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
|
|---|
| 943 |
|
|---|
| 944 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
|
|---|
| 945 | const char *string, /* Raw-Unicode-Escape encoded string */
|
|---|
| 946 | Py_ssize_t length, /* size of string */
|
|---|
| 947 | const char *errors /* error handling */
|
|---|
| 948 | );
|
|---|
| 949 |
|
|---|
| 950 | PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
|
|---|
| 951 | PyObject *unicode /* Unicode object */
|
|---|
| 952 | );
|
|---|
| 953 |
|
|---|
| 954 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
|
|---|
| 955 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 956 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
|---|
| 957 | );
|
|---|
| 958 |
|
|---|
| 959 | /* --- Unicode Internal Codec ---------------------------------------------
|
|---|
| 960 |
|
|---|
| 961 | Only for internal use in _codecsmodule.c */
|
|---|
| 962 |
|
|---|
| 963 | PyObject *_PyUnicode_DecodeUnicodeInternal(
|
|---|
| 964 | const char *string,
|
|---|
| 965 | Py_ssize_t length,
|
|---|
| 966 | const char *errors
|
|---|
| 967 | );
|
|---|
| 968 |
|
|---|
| 969 | /* --- Latin-1 Codecs -----------------------------------------------------
|
|---|
| 970 |
|
|---|
| 971 | Note: Latin-1 corresponds to the first 256 Unicode ordinals.
|
|---|
| 972 |
|
|---|
| 973 | */
|
|---|
| 974 |
|
|---|
| 975 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
|
|---|
| 976 | const char *string, /* Latin-1 encoded string */
|
|---|
| 977 | Py_ssize_t length, /* size of string */
|
|---|
| 978 | const char *errors /* error handling */
|
|---|
| 979 | );
|
|---|
| 980 |
|
|---|
| 981 | PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
|
|---|
| 982 | PyObject *unicode /* Unicode object */
|
|---|
| 983 | );
|
|---|
| 984 |
|
|---|
| 985 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
|
|---|
| 986 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 987 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 988 | const char *errors /* error handling */
|
|---|
| 989 | );
|
|---|
| 990 |
|
|---|
| 991 | /* --- ASCII Codecs -------------------------------------------------------
|
|---|
| 992 |
|
|---|
| 993 | Only 7-bit ASCII data is excepted. All other codes generate errors.
|
|---|
| 994 |
|
|---|
| 995 | */
|
|---|
| 996 |
|
|---|
| 997 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
|
|---|
| 998 | const char *string, /* ASCII encoded string */
|
|---|
| 999 | Py_ssize_t length, /* size of string */
|
|---|
| 1000 | const char *errors /* error handling */
|
|---|
| 1001 | );
|
|---|
| 1002 |
|
|---|
| 1003 | PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
|
|---|
| 1004 | PyObject *unicode /* Unicode object */
|
|---|
| 1005 | );
|
|---|
| 1006 |
|
|---|
| 1007 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
|
|---|
| 1008 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 1009 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 1010 | const char *errors /* error handling */
|
|---|
| 1011 | );
|
|---|
| 1012 |
|
|---|
| 1013 | /* --- Character Map Codecs -----------------------------------------------
|
|---|
| 1014 |
|
|---|
| 1015 | This codec uses mappings to encode and decode characters.
|
|---|
| 1016 |
|
|---|
| 1017 | Decoding mappings must map single string characters to single
|
|---|
| 1018 | Unicode characters, integers (which are then interpreted as Unicode
|
|---|
| 1019 | ordinals) or None (meaning "undefined mapping" and causing an
|
|---|
| 1020 | error).
|
|---|
| 1021 |
|
|---|
| 1022 | Encoding mappings must map single Unicode characters to single
|
|---|
| 1023 | string characters, integers (which are then interpreted as Latin-1
|
|---|
| 1024 | ordinals) or None (meaning "undefined mapping" and causing an
|
|---|
| 1025 | error).
|
|---|
| 1026 |
|
|---|
| 1027 | If a character lookup fails with a LookupError, the character is
|
|---|
| 1028 | copied as-is meaning that its ordinal value will be interpreted as
|
|---|
| 1029 | Unicode or Latin-1 ordinal resp. Because of this mappings only need
|
|---|
| 1030 | to contain those mappings which map characters to different code
|
|---|
| 1031 | points.
|
|---|
| 1032 |
|
|---|
| 1033 | */
|
|---|
| 1034 |
|
|---|
| 1035 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
|
|---|
| 1036 | const char *string, /* Encoded string */
|
|---|
| 1037 | Py_ssize_t length, /* size of string */
|
|---|
| 1038 | PyObject *mapping, /* character mapping
|
|---|
| 1039 | (char ordinal -> unicode ordinal) */
|
|---|
| 1040 | const char *errors /* error handling */
|
|---|
| 1041 | );
|
|---|
| 1042 |
|
|---|
| 1043 | PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
|
|---|
| 1044 | PyObject *unicode, /* Unicode object */
|
|---|
| 1045 | PyObject *mapping /* character mapping
|
|---|
| 1046 | (unicode ordinal -> char ordinal) */
|
|---|
| 1047 | );
|
|---|
| 1048 |
|
|---|
| 1049 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
|
|---|
| 1050 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 1051 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 1052 | PyObject *mapping, /* character mapping
|
|---|
| 1053 | (unicode ordinal -> char ordinal) */
|
|---|
| 1054 | const char *errors /* error handling */
|
|---|
| 1055 | );
|
|---|
| 1056 |
|
|---|
| 1057 | /* Translate a Py_UNICODE buffer of the given length by applying a
|
|---|
| 1058 | character mapping table to it and return the resulting Unicode
|
|---|
| 1059 | object.
|
|---|
| 1060 |
|
|---|
| 1061 | The mapping table must map Unicode ordinal integers to Unicode
|
|---|
| 1062 | ordinal integers or None (causing deletion of the character).
|
|---|
| 1063 |
|
|---|
| 1064 | Mapping tables may be dictionaries or sequences. Unmapped character
|
|---|
| 1065 | ordinals (ones which cause a LookupError) are left untouched and
|
|---|
| 1066 | are copied as-is.
|
|---|
| 1067 |
|
|---|
| 1068 | */
|
|---|
| 1069 |
|
|---|
| 1070 | PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
|
|---|
| 1071 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 1072 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 1073 | PyObject *table, /* Translate table */
|
|---|
| 1074 | const char *errors /* error handling */
|
|---|
| 1075 | );
|
|---|
| 1076 |
|
|---|
| 1077 | #ifdef MS_WIN32
|
|---|
| 1078 |
|
|---|
| 1079 | /* --- MBCS codecs for Windows -------------------------------------------- */
|
|---|
| 1080 |
|
|---|
| 1081 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
|
|---|
| 1082 | const char *string, /* MBCS encoded string */
|
|---|
| 1083 | Py_ssize_t length, /* size of string */
|
|---|
| 1084 | const char *errors /* error handling */
|
|---|
| 1085 | );
|
|---|
| 1086 |
|
|---|
| 1087 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
|
|---|
| 1088 | const char *string, /* MBCS encoded string */
|
|---|
| 1089 | Py_ssize_t length, /* size of string */
|
|---|
| 1090 | const char *errors, /* error handling */
|
|---|
| 1091 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 1092 | );
|
|---|
| 1093 |
|
|---|
| 1094 | PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
|
|---|
| 1095 | PyObject *unicode /* Unicode object */
|
|---|
| 1096 | );
|
|---|
| 1097 |
|
|---|
| 1098 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
|
|---|
| 1099 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 1100 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 1101 | const char *errors /* error handling */
|
|---|
| 1102 | );
|
|---|
| 1103 |
|
|---|
| 1104 | #endif /* MS_WIN32 */
|
|---|
| 1105 |
|
|---|
| 1106 | /* --- Decimal Encoder ---------------------------------------------------- */
|
|---|
| 1107 |
|
|---|
| 1108 | /* Takes a Unicode string holding a decimal value and writes it into
|
|---|
| 1109 | an output buffer using standard ASCII digit codes.
|
|---|
| 1110 |
|
|---|
| 1111 | The output buffer has to provide at least length+1 bytes of storage
|
|---|
| 1112 | area. The output string is 0-terminated.
|
|---|
| 1113 |
|
|---|
| 1114 | The encoder converts whitespace to ' ', decimal characters to their
|
|---|
| 1115 | corresponding ASCII digit and all other Latin-1 characters except
|
|---|
| 1116 | \0 as-is. Characters outside this range (Unicode ordinals 1-256)
|
|---|
| 1117 | are treated as errors. This includes embedded NULL bytes.
|
|---|
| 1118 |
|
|---|
| 1119 | Error handling is defined by the errors argument:
|
|---|
| 1120 |
|
|---|
| 1121 | NULL or "strict": raise a ValueError
|
|---|
| 1122 | "ignore": ignore the wrong characters (these are not copied to the
|
|---|
| 1123 | output buffer)
|
|---|
| 1124 | "replace": replaces illegal characters with '?'
|
|---|
| 1125 |
|
|---|
| 1126 | Returns 0 on success, -1 on failure.
|
|---|
| 1127 |
|
|---|
| 1128 | */
|
|---|
| 1129 |
|
|---|
| 1130 | PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
|
|---|
| 1131 | Py_UNICODE *s, /* Unicode buffer */
|
|---|
| 1132 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 1133 | char *output, /* Output buffer; must have size >= length */
|
|---|
| 1134 | const char *errors /* error handling */
|
|---|
| 1135 | );
|
|---|
| 1136 |
|
|---|
| 1137 | /* --- Methods & Slots ----------------------------------------------------
|
|---|
| 1138 |
|
|---|
| 1139 | These are capable of handling Unicode objects and strings on input
|
|---|
| 1140 | (we refer to them as strings in the descriptions) and return
|
|---|
| 1141 | Unicode objects or integers as apporpriate. */
|
|---|
| 1142 |
|
|---|
| 1143 | /* Concat two strings giving a new Unicode string. */
|
|---|
| 1144 |
|
|---|
| 1145 | PyAPI_FUNC(PyObject*) PyUnicode_Concat(
|
|---|
| 1146 | PyObject *left, /* Left string */
|
|---|
| 1147 | PyObject *right /* Right string */
|
|---|
| 1148 | );
|
|---|
| 1149 |
|
|---|
| 1150 | /* Split a string giving a list of Unicode strings.
|
|---|
| 1151 |
|
|---|
| 1152 | If sep is NULL, splitting will be done at all whitespace
|
|---|
| 1153 | substrings. Otherwise, splits occur at the given separator.
|
|---|
| 1154 |
|
|---|
| 1155 | At most maxsplit splits will be done. If negative, no limit is set.
|
|---|
| 1156 |
|
|---|
| 1157 | Separators are not included in the resulting list.
|
|---|
| 1158 |
|
|---|
| 1159 | */
|
|---|
| 1160 |
|
|---|
| 1161 | PyAPI_FUNC(PyObject*) PyUnicode_Split(
|
|---|
| 1162 | PyObject *s, /* String to split */
|
|---|
| 1163 | PyObject *sep, /* String separator */
|
|---|
| 1164 | Py_ssize_t maxsplit /* Maxsplit count */
|
|---|
| 1165 | );
|
|---|
| 1166 |
|
|---|
| 1167 | /* Dito, but split at line breaks.
|
|---|
| 1168 |
|
|---|
| 1169 | CRLF is considered to be one line break. Line breaks are not
|
|---|
| 1170 | included in the resulting list. */
|
|---|
| 1171 |
|
|---|
| 1172 | PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
|
|---|
| 1173 | PyObject *s, /* String to split */
|
|---|
| 1174 | int keepends /* If true, line end markers are included */
|
|---|
| 1175 | );
|
|---|
| 1176 |
|
|---|
| 1177 | /* Partition a string using a given separator. */
|
|---|
| 1178 |
|
|---|
| 1179 | PyAPI_FUNC(PyObject*) PyUnicode_Partition(
|
|---|
| 1180 | PyObject *s, /* String to partition */
|
|---|
| 1181 | PyObject *sep /* String separator */
|
|---|
| 1182 | );
|
|---|
| 1183 |
|
|---|
| 1184 | /* Partition a string using a given separator, searching from the end of the
|
|---|
| 1185 | string. */
|
|---|
| 1186 |
|
|---|
| 1187 | PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
|
|---|
| 1188 | PyObject *s, /* String to partition */
|
|---|
| 1189 | PyObject *sep /* String separator */
|
|---|
| 1190 | );
|
|---|
| 1191 |
|
|---|
| 1192 | /* Split a string giving a list of Unicode strings.
|
|---|
| 1193 |
|
|---|
| 1194 | If sep is NULL, splitting will be done at all whitespace
|
|---|
| 1195 | substrings. Otherwise, splits occur at the given separator.
|
|---|
| 1196 |
|
|---|
| 1197 | At most maxsplit splits will be done. But unlike PyUnicode_Split
|
|---|
| 1198 | PyUnicode_RSplit splits from the end of the string. If negative,
|
|---|
| 1199 | no limit is set.
|
|---|
| 1200 |
|
|---|
| 1201 | Separators are not included in the resulting list.
|
|---|
| 1202 |
|
|---|
| 1203 | */
|
|---|
| 1204 |
|
|---|
| 1205 | PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
|
|---|
| 1206 | PyObject *s, /* String to split */
|
|---|
| 1207 | PyObject *sep, /* String separator */
|
|---|
| 1208 | Py_ssize_t maxsplit /* Maxsplit count */
|
|---|
| 1209 | );
|
|---|
| 1210 |
|
|---|
| 1211 | /* Translate a string by applying a character mapping table to it and
|
|---|
| 1212 | return the resulting Unicode object.
|
|---|
| 1213 |
|
|---|
| 1214 | The mapping table must map Unicode ordinal integers to Unicode
|
|---|
| 1215 | ordinal integers or None (causing deletion of the character).
|
|---|
| 1216 |
|
|---|
| 1217 | Mapping tables may be dictionaries or sequences. Unmapped character
|
|---|
| 1218 | ordinals (ones which cause a LookupError) are left untouched and
|
|---|
| 1219 | are copied as-is.
|
|---|
| 1220 |
|
|---|
| 1221 | */
|
|---|
| 1222 |
|
|---|
| 1223 | PyAPI_FUNC(PyObject *) PyUnicode_Translate(
|
|---|
| 1224 | PyObject *str, /* String */
|
|---|
| 1225 | PyObject *table, /* Translate table */
|
|---|
| 1226 | const char *errors /* error handling */
|
|---|
| 1227 | );
|
|---|
| 1228 |
|
|---|
| 1229 | /* Join a sequence of strings using the given separator and return
|
|---|
| 1230 | the resulting Unicode string. */
|
|---|
| 1231 |
|
|---|
| 1232 | PyAPI_FUNC(PyObject*) PyUnicode_Join(
|
|---|
| 1233 | PyObject *separator, /* Separator string */
|
|---|
| 1234 | PyObject *seq /* Sequence object */
|
|---|
| 1235 | );
|
|---|
| 1236 |
|
|---|
| 1237 | /* Return 1 if substr matches str[start:end] at the given tail end, 0
|
|---|
| 1238 | otherwise. */
|
|---|
| 1239 |
|
|---|
| 1240 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
|
|---|
| 1241 | PyObject *str, /* String */
|
|---|
| 1242 | PyObject *substr, /* Prefix or Suffix string */
|
|---|
| 1243 | Py_ssize_t start, /* Start index */
|
|---|
| 1244 | Py_ssize_t end, /* Stop index */
|
|---|
| 1245 | int direction /* Tail end: -1 prefix, +1 suffix */
|
|---|
| 1246 | );
|
|---|
| 1247 |
|
|---|
| 1248 | /* Return the first position of substr in str[start:end] using the
|
|---|
| 1249 | given search direction or -1 if not found. -2 is returned in case
|
|---|
| 1250 | an error occurred and an exception is set. */
|
|---|
| 1251 |
|
|---|
| 1252 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
|
|---|
| 1253 | PyObject *str, /* String */
|
|---|
| 1254 | PyObject *substr, /* Substring to find */
|
|---|
| 1255 | Py_ssize_t start, /* Start index */
|
|---|
| 1256 | Py_ssize_t end, /* Stop index */
|
|---|
| 1257 | int direction /* Find direction: +1 forward, -1 backward */
|
|---|
| 1258 | );
|
|---|
| 1259 |
|
|---|
| 1260 | /* Count the number of occurrences of substr in str[start:end]. */
|
|---|
| 1261 |
|
|---|
| 1262 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
|
|---|
| 1263 | PyObject *str, /* String */
|
|---|
| 1264 | PyObject *substr, /* Substring to count */
|
|---|
| 1265 | Py_ssize_t start, /* Start index */
|
|---|
| 1266 | Py_ssize_t end /* Stop index */
|
|---|
| 1267 | );
|
|---|
| 1268 |
|
|---|
| 1269 | /* Replace at most maxcount occurrences of substr in str with replstr
|
|---|
| 1270 | and return the resulting Unicode object. */
|
|---|
| 1271 |
|
|---|
| 1272 | PyAPI_FUNC(PyObject *) PyUnicode_Replace(
|
|---|
| 1273 | PyObject *str, /* String */
|
|---|
| 1274 | PyObject *substr, /* Substring to find */
|
|---|
| 1275 | PyObject *replstr, /* Substring to replace */
|
|---|
| 1276 | Py_ssize_t maxcount /* Max. number of replacements to apply;
|
|---|
| 1277 | -1 = all */
|
|---|
| 1278 | );
|
|---|
| 1279 |
|
|---|
| 1280 | /* Compare two strings and return -1, 0, 1 for less than, equal,
|
|---|
| 1281 | greater than resp. */
|
|---|
| 1282 |
|
|---|
| 1283 | PyAPI_FUNC(int) PyUnicode_Compare(
|
|---|
| 1284 | PyObject *left, /* Left string */
|
|---|
| 1285 | PyObject *right /* Right string */
|
|---|
| 1286 | );
|
|---|
| 1287 |
|
|---|
| 1288 | /* Rich compare two strings and return one of the following:
|
|---|
| 1289 |
|
|---|
| 1290 | - NULL in case an exception was raised
|
|---|
| 1291 | - Py_True or Py_False for successfuly comparisons
|
|---|
| 1292 | - Py_NotImplemented in case the type combination is unknown
|
|---|
| 1293 |
|
|---|
| 1294 | Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
|
|---|
| 1295 | case the conversion of the arguments to Unicode fails with a
|
|---|
| 1296 | UnicodeDecodeError.
|
|---|
| 1297 |
|
|---|
| 1298 | Possible values for op:
|
|---|
| 1299 |
|
|---|
| 1300 | Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
|
|---|
| 1301 |
|
|---|
| 1302 | */
|
|---|
| 1303 |
|
|---|
| 1304 | PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
|
|---|
| 1305 | PyObject *left, /* Left string */
|
|---|
| 1306 | PyObject *right, /* Right string */
|
|---|
| 1307 | int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
|
|---|
| 1308 | );
|
|---|
| 1309 |
|
|---|
| 1310 | /* Apply a argument tuple or dictionary to a format string and return
|
|---|
| 1311 | the resulting Unicode string. */
|
|---|
| 1312 |
|
|---|
| 1313 | PyAPI_FUNC(PyObject *) PyUnicode_Format(
|
|---|
| 1314 | PyObject *format, /* Format string */
|
|---|
| 1315 | PyObject *args /* Argument tuple or dictionary */
|
|---|
| 1316 | );
|
|---|
| 1317 |
|
|---|
| 1318 | /* Checks whether element is contained in container and return 1/0
|
|---|
| 1319 | accordingly.
|
|---|
| 1320 |
|
|---|
| 1321 | element has to coerce to an one element Unicode string. -1 is
|
|---|
| 1322 | returned in case of an error. */
|
|---|
| 1323 |
|
|---|
| 1324 | PyAPI_FUNC(int) PyUnicode_Contains(
|
|---|
| 1325 | PyObject *container, /* Container string */
|
|---|
| 1326 | PyObject *element /* Element string */
|
|---|
| 1327 | );
|
|---|
| 1328 |
|
|---|
| 1329 | /* Externally visible for str.strip(unicode) */
|
|---|
| 1330 | PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
|
|---|
| 1331 | PyUnicodeObject *self,
|
|---|
| 1332 | int striptype,
|
|---|
| 1333 | PyObject *sepobj
|
|---|
| 1334 | );
|
|---|
| 1335 |
|
|---|
| 1336 | /* === Characters Type APIs =============================================== */
|
|---|
| 1337 |
|
|---|
| 1338 | /* Helper array used by Py_UNICODE_ISSPACE(). */
|
|---|
| 1339 |
|
|---|
| 1340 | PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
|
|---|
| 1341 |
|
|---|
| 1342 | /* These should not be used directly. Use the Py_UNICODE_IS* and
|
|---|
| 1343 | Py_UNICODE_TO* macros instead.
|
|---|
| 1344 |
|
|---|
| 1345 | These APIs are implemented in Objects/unicodectype.c.
|
|---|
| 1346 |
|
|---|
| 1347 | */
|
|---|
| 1348 |
|
|---|
| 1349 | PyAPI_FUNC(int) _PyUnicode_IsLowercase(
|
|---|
| 1350 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1351 | );
|
|---|
| 1352 |
|
|---|
| 1353 | PyAPI_FUNC(int) _PyUnicode_IsUppercase(
|
|---|
| 1354 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1355 | );
|
|---|
| 1356 |
|
|---|
| 1357 | PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
|
|---|
| 1358 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1359 | );
|
|---|
| 1360 |
|
|---|
| 1361 | PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
|
|---|
| 1362 | const Py_UNICODE ch /* Unicode character */
|
|---|
| 1363 | );
|
|---|
| 1364 |
|
|---|
| 1365 | PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
|
|---|
| 1366 | const Py_UNICODE ch /* Unicode character */
|
|---|
| 1367 | );
|
|---|
| 1368 |
|
|---|
| 1369 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
|
|---|
| 1370 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1371 | );
|
|---|
| 1372 |
|
|---|
| 1373 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
|
|---|
| 1374 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1375 | );
|
|---|
| 1376 |
|
|---|
| 1377 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
|
|---|
| 1378 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1379 | );
|
|---|
| 1380 |
|
|---|
| 1381 | PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
|
|---|
| 1382 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1383 | );
|
|---|
| 1384 |
|
|---|
| 1385 | PyAPI_FUNC(int) _PyUnicode_ToDigit(
|
|---|
| 1386 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1387 | );
|
|---|
| 1388 |
|
|---|
| 1389 | PyAPI_FUNC(double) _PyUnicode_ToNumeric(
|
|---|
| 1390 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1391 | );
|
|---|
| 1392 |
|
|---|
| 1393 | PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
|
|---|
| 1394 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1395 | );
|
|---|
| 1396 |
|
|---|
| 1397 | PyAPI_FUNC(int) _PyUnicode_IsDigit(
|
|---|
| 1398 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1399 | );
|
|---|
| 1400 |
|
|---|
| 1401 | PyAPI_FUNC(int) _PyUnicode_IsNumeric(
|
|---|
| 1402 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1403 | );
|
|---|
| 1404 |
|
|---|
| 1405 | PyAPI_FUNC(int) _PyUnicode_IsAlpha(
|
|---|
| 1406 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1407 | );
|
|---|
| 1408 |
|
|---|
| 1409 | #ifdef __cplusplus
|
|---|
| 1410 | }
|
|---|
| 1411 | #endif
|
|---|
| 1412 | #endif /* Py_USING_UNICODE */
|
|---|
| 1413 | #endif /* !Py_UNICODEOBJECT_H */
|
|---|