Changeset 391 for python/trunk/Modules/cjkcodecs/multibytecodec.c
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Modules/cjkcodecs/multibytecodec.c
r2 r391 46 46 47 47 static PyObject *multibytecodec_encode(MultibyteCodec *, 48 49 50 51 #define MBENC_RESET 48 MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t, 49 PyObject *, int); 50 51 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */ 52 52 53 53 static PyObject * 54 54 make_tuple(PyObject *object, Py_ssize_t len) 55 55 { 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 56 PyObject *v, *w; 57 58 if (object == NULL) 59 return NULL; 60 61 v = PyTuple_New(2); 62 if (v == NULL) { 63 Py_DECREF(object); 64 return NULL; 65 } 66 PyTuple_SET_ITEM(v, 0, object); 67 68 w = PyInt_FromSsize_t(len); 69 if (w == NULL) { 70 Py_DECREF(v); 71 return NULL; 72 } 73 PyTuple_SET_ITEM(v, 1, w); 74 75 return v; 76 76 } 77 77 … … 79 79 internal_error_callback(const char *errors) 80 80 { 81 82 83 84 85 86 87 88 81 if (errors == NULL || strcmp(errors, "strict") == 0) 82 return ERROR_STRICT; 83 else if (strcmp(errors, "ignore") == 0) 84 return ERROR_IGNORE; 85 else if (strcmp(errors, "replace") == 0) 86 return ERROR_REPLACE; 87 else 88 return PyString_FromString(errors); 89 89 } 90 90 … … 92 92 call_error_callback(PyObject *errors, PyObject *exc) 93 93 { 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 94 PyObject *args, *cb, *r; 95 96 assert(PyString_Check(errors)); 97 cb = PyCodec_LookupError(PyString_AS_STRING(errors)); 98 if (cb == NULL) 99 return NULL; 100 101 args = PyTuple_New(1); 102 if (args == NULL) { 103 Py_DECREF(cb); 104 return NULL; 105 } 106 107 PyTuple_SET_ITEM(args, 0, exc); 108 Py_INCREF(exc); 109 110 r = PyObject_CallObject(cb, args); 111 Py_DECREF(args); 112 Py_DECREF(cb); 113 return r; 114 114 } 115 115 … … 117 117 codecctx_errors_get(MultibyteStatefulCodecContext *self) 118 118 { 119 120 121 122 123 124 125 126 127 128 129 130 131 132 119 const char *errors; 120 121 if (self->errors == ERROR_STRICT) 122 errors = "strict"; 123 else if (self->errors == ERROR_IGNORE) 124 errors = "ignore"; 125 else if (self->errors == ERROR_REPLACE) 126 errors = "replace"; 127 else { 128 Py_INCREF(self->errors); 129 return self->errors; 130 } 131 132 return PyString_FromString(errors); 133 133 } 134 134 135 135 static int 136 136 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value, 137 138 { 139 140 141 142 143 144 145 146 147 148 149 150 151 152 137 void *closure) 138 { 139 PyObject *cb; 140 141 if (!PyString_Check(value)) { 142 PyErr_SetString(PyExc_TypeError, "errors must be a string"); 143 return -1; 144 } 145 146 cb = internal_error_callback(PyString_AS_STRING(value)); 147 if (cb == NULL) 148 return -1; 149 150 ERROR_DECREF(self->errors); 151 self->errors = cb; 152 return 0; 153 153 } 154 154 155 155 /* This getset handlers list is used by all the stateful codec objects */ 156 156 static PyGetSetDef codecctx_getsets[] = { 157 {"errors",(getter)codecctx_errors_get,158 159 160 157 {"errors", (getter)codecctx_errors_get, 158 (setter)codecctx_errors_set, 159 PyDoc_STR("how to treat errors")}, 160 {NULL,} 161 161 }; 162 162 … … 164 164 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) 165 165 { 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 } 185 #define REQUIRE_ENCODEBUFFER(buf, s) { 186 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)\187 if (expand_encodebuffer(buf, s) == -1)\188 goto errorexit;\166 Py_ssize_t orgpos, orgsize, incsize; 167 168 orgpos = (Py_ssize_t)((char *)buf->outbuf - 169 PyString_AS_STRING(buf->outobj)); 170 orgsize = PyString_GET_SIZE(buf->outobj); 171 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); 172 173 if (orgsize > PY_SSIZE_T_MAX - incsize) 174 return -1; 175 176 if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1) 177 return -1; 178 179 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos; 180 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj) 181 + PyString_GET_SIZE(buf->outobj); 182 183 return 0; 184 } 185 #define REQUIRE_ENCODEBUFFER(buf, s) { \ 186 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ 187 if (expand_encodebuffer(buf, s) == -1) \ 188 goto errorexit; \ 189 189 } 190 190 … … 192 192 expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) 193 193 { 194 195 196 197 198 199 200 201 202 203 204 205 206 207 } 208 #define REQUIRE_DECODEBUFFER(buf, s) { 209 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)\210 if (expand_decodebuffer(buf, s) == -1)\211 goto errorexit;\194 Py_ssize_t orgpos, orgsize; 195 196 orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj)); 197 orgsize = PyUnicode_GET_SIZE(buf->outobj); 198 if (PyUnicode_Resize(&buf->outobj, orgsize + ( 199 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1) 200 return -1; 201 202 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos; 203 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj) 204 + PyUnicode_GET_SIZE(buf->outobj); 205 206 return 0; 207 } 208 #define REQUIRE_DECODEBUFFER(buf, s) { \ 209 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ 210 if (expand_decodebuffer(buf, s) == -1) \ 211 goto errorexit; \ 212 212 } 213 213 … … 219 219 static int 220 220 multibytecodec_encerror(MultibyteCodec *codec, 221 222 223 224 { 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 221 MultibyteCodec_State *state, 222 MultibyteEncodeBuffer *buf, 223 PyObject *errors, Py_ssize_t e) 224 { 225 PyObject *retobj = NULL, *retstr = NULL, *tobj; 226 Py_ssize_t retstrsize, newpos; 227 Py_ssize_t esize, start, end; 228 const char *reason; 229 230 if (e > 0) { 231 reason = "illegal multibyte sequence"; 232 esize = e; 233 } 234 else { 235 switch (e) { 236 case MBERR_TOOSMALL: 237 REQUIRE_ENCODEBUFFER(buf, -1); 238 return 0; /* retry it */ 239 case MBERR_TOOFEW: 240 reason = "incomplete multibyte sequence"; 241 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 242 break; 243 case MBERR_INTERNAL: 244 PyErr_SetString(PyExc_RuntimeError, 245 "internal codec error"); 246 return -1; 247 default: 248 PyErr_SetString(PyExc_RuntimeError, 249 "unknown runtime error"); 250 return -1; 251 } 252 } 253 254 if (errors == ERROR_REPLACE) { 255 const Py_UNICODE replchar = '?', *inbuf = &replchar; 256 Py_ssize_t r; 257 258 for (;;) { 259 Py_ssize_t outleft; 260 261 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); 262 r = codec->encode(state, codec->config, &inbuf, 1, 263 &buf->outbuf, outleft, 0); 264 if (r == MBERR_TOOSMALL) { 265 REQUIRE_ENCODEBUFFER(buf, -1); 266 continue; 267 } 268 else 269 break; 270 } 271 272 if (r != 0) { 273 REQUIRE_ENCODEBUFFER(buf, 1); 274 *buf->outbuf++ = '?'; 275 } 276 } 277 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { 278 buf->inbuf += esize; 279 return 0; 280 } 281 282 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); 283 end = start + esize; 284 285 /* use cached exception object if available */ 286 if (buf->excobj == NULL) { 287 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding, 288 buf->inbuf_top, 289 buf->inbuf_end - buf->inbuf_top, 290 start, end, reason); 291 if (buf->excobj == NULL) 292 goto errorexit; 293 } 294 else 295 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 || 296 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 || 297 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0) 298 goto errorexit; 299 300 if (errors == ERROR_STRICT) { 301 PyCodec_StrictErrors(buf->excobj); 302 goto errorexit; 303 } 304 305 retobj = call_error_callback(errors, buf->excobj); 306 if (retobj == NULL) 307 goto errorexit; 308 309 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || 310 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || 311 !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || 312 PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { 313 PyErr_SetString(PyExc_TypeError, 314 "encoding error handler must return " 315 "(unicode, int) tuple"); 316 goto errorexit; 317 } 318 319 { 320 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); 321 322 retstr = multibytecodec_encode(codec, state, &uraw, 323 PyUnicode_GET_SIZE(tobj), ERROR_STRICT, 324 MBENC_FLUSH); 325 if (retstr == NULL) 326 goto errorexit; 327 } 328 329 retstrsize = PyString_GET_SIZE(retstr); 330 REQUIRE_ENCODEBUFFER(buf, retstrsize); 331 332 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize); 333 buf->outbuf += retstrsize; 334 335 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); 336 if (newpos < 0 && !PyErr_Occurred()) 337 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); 338 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { 339 PyErr_Clear(); 340 PyErr_Format(PyExc_IndexError, 341 "position %zd from error handler out of bounds", 342 newpos); 343 goto errorexit; 344 } 345 buf->inbuf = buf->inbuf_top + newpos; 346 347 Py_DECREF(retobj); 348 Py_DECREF(retstr); 349 return 0; 350 350 351 351 errorexit: 352 353 354 352 Py_XDECREF(retobj); 353 Py_XDECREF(retstr); 354 return -1; 355 355 } 356 356 357 357 static int 358 358 multibytecodec_decerror(MultibyteCodec *codec, 359 360 361 362 { 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 359 MultibyteCodec_State *state, 360 MultibyteDecodeBuffer *buf, 361 PyObject *errors, Py_ssize_t e) 362 { 363 PyObject *retobj = NULL, *retuni = NULL; 364 Py_ssize_t retunisize, newpos; 365 const char *reason; 366 Py_ssize_t esize, start, end; 367 368 if (e > 0) { 369 reason = "illegal multibyte sequence"; 370 esize = e; 371 } 372 else { 373 switch (e) { 374 case MBERR_TOOSMALL: 375 REQUIRE_DECODEBUFFER(buf, -1); 376 return 0; /* retry it */ 377 case MBERR_TOOFEW: 378 reason = "incomplete multibyte sequence"; 379 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 380 break; 381 case MBERR_INTERNAL: 382 PyErr_SetString(PyExc_RuntimeError, 383 "internal codec error"); 384 return -1; 385 default: 386 PyErr_SetString(PyExc_RuntimeError, 387 "unknown runtime error"); 388 return -1; 389 } 390 } 391 392 if (errors == ERROR_REPLACE) { 393 REQUIRE_DECODEBUFFER(buf, 1); 394 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER; 395 } 396 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { 397 buf->inbuf += esize; 398 return 0; 399 } 400 401 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top); 402 end = start + esize; 403 404 /* use cached exception object if available */ 405 if (buf->excobj == NULL) { 406 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding, 407 (const char *)buf->inbuf_top, 408 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top), 409 start, end, reason); 410 if (buf->excobj == NULL) 411 goto errorexit; 412 } 413 else 414 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) || 415 PyUnicodeDecodeError_SetEnd(buf->excobj, end) || 416 PyUnicodeDecodeError_SetReason(buf->excobj, reason)) 417 goto errorexit; 418 419 if (errors == ERROR_STRICT) { 420 PyCodec_StrictErrors(buf->excobj); 421 goto errorexit; 422 } 423 424 retobj = call_error_callback(errors, buf->excobj); 425 if (retobj == NULL) 426 goto errorexit; 427 428 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || 429 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || 430 !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) || 431 PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) { 432 PyErr_SetString(PyExc_TypeError, 433 "decoding error handler must return " 434 "(unicode, int) tuple"); 435 goto errorexit; 436 } 437 438 retunisize = PyUnicode_GET_SIZE(retuni); 439 if (retunisize > 0) { 440 REQUIRE_DECODEBUFFER(buf, retunisize); 441 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni), 442 retunisize * Py_UNICODE_SIZE); 443 buf->outbuf += retunisize; 444 } 445 446 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); 447 if (newpos < 0 && !PyErr_Occurred()) 448 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top); 449 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) { 450 PyErr_Clear(); 451 PyErr_Format(PyExc_IndexError, 452 "position %zd from error handler out of bounds", 453 newpos); 454 goto errorexit; 455 } 456 buf->inbuf = buf->inbuf_top + newpos; 457 Py_DECREF(retobj); 458 return 0; 459 459 460 460 errorexit: 461 462 461 Py_XDECREF(retobj); 462 return -1; 463 463 } 464 464 465 465 static PyObject * 466 466 multibytecodec_encode(MultibyteCodec *codec, 467 468 469 470 { 471 472 473 474 if (datalen == 0)475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 *data = buf.inbuf; 502 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) 503 break; 504 else if (multibytecodec_encerror(codec, state, &buf, errors,r)) 505 goto errorexit; 506 else if (r == MBERR_TOOFEW) 507 break; 508 } 509 510 if (codec->encreset != NULL) 511 for (;;) { 512 Py_ssize_t outleft; 513 514 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 515 r = codec->encreset(state, codec->config, &buf.outbuf, 516 outleft); 517 if (r == 0) 518 break; 519 else if (multibytecodec_encerror(codec, state, 520 &buf, errors, r)) 521 goto errorexit; 522 } 523 524 finalsize = (Py_ssize_t)((char *)buf.outbuf - 525 PyString_AS_STRING(buf.outobj)); 526 527 if (finalsize != PyString_GET_SIZE(buf.outobj))528 if (_PyString_Resize(&buf.outobj, finalsize) == -1) 529 goto errorexit; 530 531 532 467 MultibyteCodec_State *state, 468 const Py_UNICODE **data, Py_ssize_t datalen, 469 PyObject *errors, int flags) 470 { 471 MultibyteEncodeBuffer buf; 472 Py_ssize_t finalsize, r = 0; 473 474 if (datalen == 0 && !(flags & MBENC_RESET)) 475 return PyString_FromString(""); 476 477 buf.excobj = NULL; 478 buf.inbuf = buf.inbuf_top = *data; 479 buf.inbuf_end = buf.inbuf_top + datalen; 480 481 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) { 482 PyErr_NoMemory(); 483 goto errorexit; 484 } 485 486 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16); 487 if (buf.outobj == NULL) 488 goto errorexit; 489 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj); 490 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj); 491 492 while (buf.inbuf < buf.inbuf_end) { 493 Py_ssize_t inleft, outleft; 494 495 /* we don't reuse inleft and outleft here. 496 * error callbacks can relocate the cursor anywhere on buffer*/ 497 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); 498 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 499 r = codec->encode(state, codec->config, &buf.inbuf, inleft, 500 &buf.outbuf, outleft, flags); 501 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) 502 break; 503 else if (multibytecodec_encerror(codec, state, &buf, errors,r)) 504 goto errorexit; 505 else if (r == MBERR_TOOFEW) 506 break; 507 } 508 509 if (codec->encreset != NULL && (flags & MBENC_RESET)) 510 for (;;) { 511 Py_ssize_t outleft; 512 513 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 514 r = codec->encreset(state, codec->config, &buf.outbuf, 515 outleft); 516 if (r == 0) 517 break; 518 else if (multibytecodec_encerror(codec, state, 519 &buf, errors, r)) 520 goto errorexit; 521 } 522 523 finalsize = (Py_ssize_t)((char *)buf.outbuf - 524 PyString_AS_STRING(buf.outobj)); 525 526 if (finalsize != PyString_GET_SIZE(buf.outobj)) 527 if (_PyString_Resize(&buf.outobj, finalsize) == -1) 528 goto errorexit; 529 530 *data = buf.inbuf; 531 Py_XDECREF(buf.excobj); 532 return buf.outobj; 533 533 534 534 errorexit: 535 536 537 535 Py_XDECREF(buf.excobj); 536 Py_XDECREF(buf.outobj); 537 return NULL; 538 538 } 539 539 540 540 static PyObject * 541 541 MultibyteCodec_Encode(MultibyteCodecObject *self, 542 543 { 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 542 PyObject *args, PyObject *kwargs) 543 { 544 MultibyteCodec_State state; 545 Py_UNICODE *data; 546 PyObject *errorcb, *r, *arg, *ucvt; 547 const char *errors = NULL; 548 Py_ssize_t datalen; 549 550 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode", 551 codeckwarglist, &arg, &errors)) 552 return NULL; 553 554 if (PyUnicode_Check(arg)) 555 ucvt = NULL; 556 else { 557 arg = ucvt = PyObject_Unicode(arg); 558 if (arg == NULL) 559 return NULL; 560 else if (!PyUnicode_Check(arg)) { 561 PyErr_SetString(PyExc_TypeError, 562 "couldn't convert the object to unicode."); 563 Py_DECREF(ucvt); 564 return NULL; 565 } 566 } 567 568 data = PyUnicode_AS_UNICODE(arg); 569 datalen = PyUnicode_GET_SIZE(arg); 570 571 errorcb = internal_error_callback(errors); 572 if (errorcb == NULL) { 573 Py_XDECREF(ucvt); 574 return NULL; 575 } 576 577 if (self->codec->encinit != NULL && 578 self->codec->encinit(&state, self->codec->config) != 0) 579 goto errorexit; 580 r = multibytecodec_encode(self->codec, &state, 581 (const Py_UNICODE **)&data, datalen, errorcb, 582 MBENC_FLUSH | MBENC_RESET); 583 if (r == NULL) 584 goto errorexit; 585 586 ERROR_DECREF(errorcb); 587 Py_XDECREF(ucvt); 588 return make_tuple(r, datalen); 589 589 590 590 errorexit: 591 592 593 591 ERROR_DECREF(errorcb); 592 Py_XDECREF(ucvt); 593 return NULL; 594 594 } 595 595 596 596 static PyObject * 597 597 MultibyteCodec_Decode(MultibyteCodecObject *self, 598 599 { 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 598 PyObject *args, PyObject *kwargs) 599 { 600 MultibyteCodec_State state; 601 MultibyteDecodeBuffer buf; 602 PyObject *errorcb; 603 Py_buffer pdata; 604 const char *data, *errors = NULL; 605 Py_ssize_t datalen, finalsize; 606 607 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode", 608 codeckwarglist, &pdata, &errors)) 609 return NULL; 610 data = pdata.buf; 611 datalen = pdata.len; 612 613 errorcb = internal_error_callback(errors); 614 if (errorcb == NULL) { 615 PyBuffer_Release(&pdata); 616 return NULL; 617 } 618 619 if (datalen == 0) { 620 PyBuffer_Release(&pdata); 621 ERROR_DECREF(errorcb); 622 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0); 623 } 624 625 buf.excobj = NULL; 626 buf.inbuf = buf.inbuf_top = (unsigned char *)data; 627 buf.inbuf_end = buf.inbuf_top + datalen; 628 buf.outobj = PyUnicode_FromUnicode(NULL, datalen); 629 if (buf.outobj == NULL) 630 goto errorexit; 631 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj); 632 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj); 633 634 if (self->codec->decinit != NULL && 635 self->codec->decinit(&state, self->codec->config) != 0) 636 goto errorexit; 637 638 while (buf.inbuf < buf.inbuf_end) { 639 Py_ssize_t inleft, outleft, r; 640 641 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf); 642 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); 643 644 r = self->codec->decode(&state, self->codec->config, 645 &buf.inbuf, inleft, &buf.outbuf, outleft); 646 if (r == 0) 647 break; 648 else if (multibytecodec_decerror(self->codec, &state, 649 &buf, errorcb, r)) 650 goto errorexit; 651 } 652 653 finalsize = (Py_ssize_t)(buf.outbuf - 654 PyUnicode_AS_UNICODE(buf.outobj)); 655 656 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 657 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 658 goto errorexit; 659 660 PyBuffer_Release(&pdata); 661 Py_XDECREF(buf.excobj); 662 ERROR_DECREF(errorcb); 663 return make_tuple(buf.outobj, datalen); 664 664 665 665 errorexit: 666 667 668 669 670 671 666 PyBuffer_Release(&pdata); 667 ERROR_DECREF(errorcb); 668 Py_XDECREF(buf.excobj); 669 Py_XDECREF(buf.outobj); 670 671 return NULL; 672 672 } 673 673 674 674 static struct PyMethodDef multibytecodec_methods[] = { 675 {"encode",(PyCFunction)MultibyteCodec_Encode,676 677 678 {"decode",(PyCFunction)MultibyteCodec_Decode,679 680 681 {NULL,NULL},675 {"encode", (PyCFunction)MultibyteCodec_Encode, 676 METH_VARARGS | METH_KEYWORDS, 677 MultibyteCodec_Encode__doc__}, 678 {"decode", (PyCFunction)MultibyteCodec_Decode, 679 METH_VARARGS | METH_KEYWORDS, 680 MultibyteCodec_Decode__doc__}, 681 {NULL, NULL}, 682 682 }; 683 683 … … 685 685 multibytecodec_dealloc(MultibyteCodecObject *self) 686 686 { 687 687 PyObject_Del(self); 688 688 } 689 689 690 690 static PyTypeObject MultibyteCodec_Type = { 691 692 "MultibyteCodec",/* tp_name */693 sizeof(MultibyteCodecObject),/* tp_basicsize */694 0,/* tp_itemsize */695 696 697 0,/* tp_print */698 0,/* tp_getattr */699 0,/* tp_setattr */700 0,/* tp_compare */701 0,/* tp_repr */702 0,/* tp_as_number */703 0,/* tp_as_sequence */704 0,/* tp_as_mapping */705 0,/* tp_hash */706 0,/* tp_call */707 0,/* tp_str */708 PyObject_GenericGetAttr,/* tp_getattro */709 0,/* tp_setattro */710 0,/* tp_as_buffer */711 Py_TPFLAGS_DEFAULT,/* tp_flags */712 0,/* tp_doc */713 0,/* tp_traverse */714 0,/* tp_clear */715 0,/* tp_richcompare */716 0,/* tp_weaklistoffset */717 0,/* tp_iter */718 0,/* tp_iterext */719 multibytecodec_methods,/* tp_methods */691 PyVarObject_HEAD_INIT(NULL, 0) 692 "MultibyteCodec", /* tp_name */ 693 sizeof(MultibyteCodecObject), /* tp_basicsize */ 694 0, /* tp_itemsize */ 695 /* methods */ 696 (destructor)multibytecodec_dealloc, /* tp_dealloc */ 697 0, /* tp_print */ 698 0, /* tp_getattr */ 699 0, /* tp_setattr */ 700 0, /* tp_compare */ 701 0, /* tp_repr */ 702 0, /* tp_as_number */ 703 0, /* tp_as_sequence */ 704 0, /* tp_as_mapping */ 705 0, /* tp_hash */ 706 0, /* tp_call */ 707 0, /* tp_str */ 708 PyObject_GenericGetAttr, /* tp_getattro */ 709 0, /* tp_setattro */ 710 0, /* tp_as_buffer */ 711 Py_TPFLAGS_DEFAULT, /* tp_flags */ 712 0, /* tp_doc */ 713 0, /* tp_traverse */ 714 0, /* tp_clear */ 715 0, /* tp_richcompare */ 716 0, /* tp_weaklistoffset */ 717 0, /* tp_iter */ 718 0, /* tp_iterext */ 719 multibytecodec_methods, /* tp_methods */ 720 720 }; 721 721 … … 725 725 */ 726 726 727 #define STATEFUL_DCTX(o) 728 #define STATEFUL_ECTX(o) 727 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o)) 728 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o)) 729 729 730 730 static PyObject * 731 731 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx, 732 733 { 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 (const Py_UNICODE **)&inbuf,780 datalen, ctx->errors, final ? MBENC_FLUSH: 0);781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 732 PyObject *unistr, int final) 733 { 734 PyObject *ucvt, *r = NULL; 735 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL; 736 Py_ssize_t datalen, origpending; 737 738 if (PyUnicode_Check(unistr)) 739 ucvt = NULL; 740 else { 741 unistr = ucvt = PyObject_Unicode(unistr); 742 if (unistr == NULL) 743 return NULL; 744 else if (!PyUnicode_Check(unistr)) { 745 PyErr_SetString(PyExc_TypeError, 746 "couldn't convert the object to unicode."); 747 Py_DECREF(ucvt); 748 return NULL; 749 } 750 } 751 752 datalen = PyUnicode_GET_SIZE(unistr); 753 origpending = ctx->pendingsize; 754 755 if (origpending > 0) { 756 if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) { 757 PyErr_NoMemory(); 758 /* inbuf_tmp == NULL */ 759 goto errorexit; 760 } 761 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize); 762 if (inbuf_tmp == NULL) 763 goto errorexit; 764 memcpy(inbuf_tmp, ctx->pending, 765 Py_UNICODE_SIZE * ctx->pendingsize); 766 memcpy(inbuf_tmp + ctx->pendingsize, 767 PyUnicode_AS_UNICODE(unistr), 768 Py_UNICODE_SIZE * datalen); 769 datalen += ctx->pendingsize; 770 ctx->pendingsize = 0; 771 inbuf = inbuf_tmp; 772 } 773 else 774 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr); 775 776 inbuf_end = inbuf + datalen; 777 778 r = multibytecodec_encode(ctx->codec, &ctx->state, 779 (const Py_UNICODE **)&inbuf, datalen, 780 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0); 781 if (r == NULL) { 782 /* recover the original pending buffer */ 783 if (origpending > 0) 784 memcpy(ctx->pending, inbuf_tmp, 785 Py_UNICODE_SIZE * origpending); 786 ctx->pendingsize = origpending; 787 goto errorexit; 788 } 789 790 if (inbuf < inbuf_end) { 791 ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf); 792 if (ctx->pendingsize > MAXENCPENDING) { 793 /* normal codecs can't reach here */ 794 ctx->pendingsize = 0; 795 PyErr_SetString(PyExc_UnicodeError, 796 "pending buffer overflow"); 797 goto errorexit; 798 } 799 memcpy(ctx->pending, inbuf, 800 ctx->pendingsize * Py_UNICODE_SIZE); 801 } 802 803 if (inbuf_tmp != NULL) 804 PyMem_Del(inbuf_tmp); 805 Py_XDECREF(ucvt); 806 return r; 807 807 808 808 errorexit: 809 810 811 812 813 809 if (inbuf_tmp != NULL) 810 PyMem_Del(inbuf_tmp); 811 Py_XDECREF(r); 812 Py_XDECREF(ucvt); 813 return NULL; 814 814 } 815 815 816 816 static int 817 817 decoder_append_pending(MultibyteStatefulDecoderContext *ctx, 818 819 { 820 821 822 823 824 825 826 827 828 829 830 818 MultibyteDecodeBuffer *buf) 819 { 820 Py_ssize_t npendings; 821 822 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 823 if (npendings + ctx->pendingsize > MAXDECPENDING || 824 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) { 825 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow"); 826 return -1; 827 } 828 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings); 829 ctx->pendingsize += npendings; 830 return 0; 831 831 } 832 832 833 833 static int 834 834 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data, 835 836 { 837 838 839 840 841 842 843 844 845 846 847 848 835 Py_ssize_t size) 836 { 837 buf->inbuf = buf->inbuf_top = (const unsigned char *)data; 838 buf->inbuf_end = buf->inbuf_top + size; 839 if (buf->outobj == NULL) { /* only if outobj is not allocated yet */ 840 buf->outobj = PyUnicode_FromUnicode(NULL, size); 841 if (buf->outobj == NULL) 842 return -1; 843 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj); 844 buf->outbuf_end = buf->outbuf + 845 PyUnicode_GET_SIZE(buf->outobj); 846 } 847 848 return 0; 849 849 } 850 850 851 851 static int 852 852 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx, 853 854 { 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 853 MultibyteDecodeBuffer *buf) 854 { 855 while (buf->inbuf < buf->inbuf_end) { 856 Py_ssize_t inleft, outleft; 857 Py_ssize_t r; 858 859 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); 860 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); 861 862 r = ctx->codec->decode(&ctx->state, ctx->codec->config, 863 &buf->inbuf, inleft, &buf->outbuf, outleft); 864 if (r == 0 || r == MBERR_TOOFEW) 865 break; 866 else if (multibytecodec_decerror(ctx->codec, &ctx->state, 867 buf, ctx->errors, r)) 868 return -1; 869 } 870 return 0; 871 871 } 872 872 … … 878 878 static PyObject * 879 879 mbiencoder_encode(MultibyteIncrementalEncoderObject *self, 880 881 { 882 883 884 885 886 887 888 889 880 PyObject *args, PyObject *kwargs) 881 { 882 PyObject *data; 883 int final = 0; 884 885 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode", 886 incrementalkwarglist, &data, &final)) 887 return NULL; 888 889 return encoder_encode_stateful(STATEFUL_ECTX(self), data, final); 890 890 } 891 891 … … 893 893 mbiencoder_reset(MultibyteIncrementalEncoderObject *self) 894 894 { 895 896 897 898 899 900 895 if (self->codec->decreset != NULL && 896 self->codec->decreset(&self->state, self->codec->config) != 0) 897 return NULL; 898 self->pendingsize = 0; 899 900 Py_RETURN_NONE; 901 901 } 902 902 903 903 static struct PyMethodDef mbiencoder_methods[] = { 904 {"encode",(PyCFunction)mbiencoder_encode,905 906 {"reset",(PyCFunction)mbiencoder_reset,907 908 {NULL,NULL},904 {"encode", (PyCFunction)mbiencoder_encode, 905 METH_VARARGS | METH_KEYWORDS, NULL}, 906 {"reset", (PyCFunction)mbiencoder_reset, 907 METH_NOARGS, NULL}, 908 {NULL, NULL}, 909 909 }; 910 910 … … 912 912 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 913 913 { 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 914 MultibyteIncrementalEncoderObject *self; 915 PyObject *codec = NULL; 916 char *errors = NULL; 917 918 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder", 919 incnewkwarglist, &errors)) 920 return NULL; 921 922 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0); 923 if (self == NULL) 924 return NULL; 925 926 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 927 if (codec == NULL) 928 goto errorexit; 929 if (!MultibyteCodec_Check(codec)) { 930 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 931 goto errorexit; 932 } 933 934 self->codec = ((MultibyteCodecObject *)codec)->codec; 935 self->pendingsize = 0; 936 self->errors = internal_error_callback(errors); 937 if (self->errors == NULL) 938 goto errorexit; 939 if (self->codec->encinit != NULL && 940 self->codec->encinit(&self->state, self->codec->config) != 0) 941 goto errorexit; 942 943 Py_DECREF(codec); 944 return (PyObject *)self; 945 945 946 946 errorexit: 947 948 949 947 Py_XDECREF(self); 948 Py_XDECREF(codec); 949 return NULL; 950 950 } 951 951 … … 953 953 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds) 954 954 { 955 955 return 0; 956 956 } 957 957 958 958 static int 959 959 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self, 960 961 { 962 963 964 960 visitproc visit, void *arg) 961 { 962 if (ERROR_ISCUSTOM(self->errors)) 963 Py_VISIT(self->errors); 964 return 0; 965 965 } 966 966 … … 968 968 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self) 969 969 { 970 971 972 970 PyObject_GC_UnTrack(self); 971 ERROR_DECREF(self->errors); 972 Py_TYPE(self)->tp_free(self); 973 973 } 974 974 975 975 static PyTypeObject MultibyteIncrementalEncoder_Type = { 976 977 "MultibyteIncrementalEncoder",/* tp_name */978 979 0,/* tp_itemsize */980 981 982 0,/* tp_print */983 0,/* tp_getattr */984 0,/* tp_setattr */985 0,/* tp_compare */986 0,/* tp_repr */987 0,/* tp_as_number */988 0,/* tp_as_sequence */989 0,/* tp_as_mapping */990 0,/* tp_hash */991 0,/* tp_call */992 0,/* tp_str */993 PyObject_GenericGetAttr,/* tp_getattro */994 0,/* tp_setattro */995 0,/* tp_as_buffer */996 997 | Py_TPFLAGS_BASETYPE,/* tp_flags */998 0,/* tp_doc */999 (traverseproc)mbiencoder_traverse,/* tp_traverse */1000 0,/* tp_clear */1001 0,/* tp_richcompare */1002 0,/* tp_weaklistoffset */1003 0,/* tp_iter */1004 0,/* tp_iterext */1005 mbiencoder_methods,/* tp_methods */1006 0,/* tp_members */1007 codecctx_getsets,/* tp_getset */1008 0,/* tp_base */1009 0,/* tp_dict */1010 0,/* tp_descr_get */1011 0,/* tp_descr_set */1012 0,/* tp_dictoffset */1013 mbiencoder_init,/* tp_init */1014 0,/* tp_alloc */1015 mbiencoder_new,/* tp_new */976 PyVarObject_HEAD_INIT(NULL, 0) 977 "MultibyteIncrementalEncoder", /* tp_name */ 978 sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */ 979 0, /* tp_itemsize */ 980 /* methods */ 981 (destructor)mbiencoder_dealloc, /* tp_dealloc */ 982 0, /* tp_print */ 983 0, /* tp_getattr */ 984 0, /* tp_setattr */ 985 0, /* tp_compare */ 986 0, /* tp_repr */ 987 0, /* tp_as_number */ 988 0, /* tp_as_sequence */ 989 0, /* tp_as_mapping */ 990 0, /* tp_hash */ 991 0, /* tp_call */ 992 0, /* tp_str */ 993 PyObject_GenericGetAttr, /* tp_getattro */ 994 0, /* tp_setattro */ 995 0, /* tp_as_buffer */ 996 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 997 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 998 0, /* tp_doc */ 999 (traverseproc)mbiencoder_traverse, /* tp_traverse */ 1000 0, /* tp_clear */ 1001 0, /* tp_richcompare */ 1002 0, /* tp_weaklistoffset */ 1003 0, /* tp_iter */ 1004 0, /* tp_iterext */ 1005 mbiencoder_methods, /* tp_methods */ 1006 0, /* tp_members */ 1007 codecctx_getsets, /* tp_getset */ 1008 0, /* tp_base */ 1009 0, /* tp_dict */ 1010 0, /* tp_descr_get */ 1011 0, /* tp_descr_set */ 1012 0, /* tp_dictoffset */ 1013 mbiencoder_init, /* tp_init */ 1014 0, /* tp_alloc */ 1015 mbiencoder_new, /* tp_new */ 1016 1016 }; 1017 1017 … … 1023 1023 static PyObject * 1024 1024 mbidecoder_decode(MultibyteIncrementalDecoderObject *self, 1025 1026 { 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1025 PyObject *args, PyObject *kwargs) 1026 { 1027 MultibyteDecodeBuffer buf; 1028 char *data, *wdata = NULL; 1029 Py_buffer pdata; 1030 Py_ssize_t wsize, finalsize = 0, size, origpending; 1031 int final = 0; 1032 1033 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode", 1034 incrementalkwarglist, &pdata, &final)) 1035 return NULL; 1036 data = pdata.buf; 1037 size = pdata.len; 1038 1039 buf.outobj = buf.excobj = NULL; 1040 origpending = self->pendingsize; 1041 1042 if (self->pendingsize == 0) { 1043 wsize = size; 1044 wdata = data; 1045 } 1046 else { 1047 if (size > PY_SSIZE_T_MAX - self->pendingsize) { 1048 PyErr_NoMemory(); 1049 goto errorexit; 1050 } 1051 wsize = size + self->pendingsize; 1052 wdata = PyMem_Malloc(wsize); 1053 if (wdata == NULL) 1054 goto errorexit; 1055 memcpy(wdata, self->pending, self->pendingsize); 1056 memcpy(wdata + self->pendingsize, data, size); 1057 self->pendingsize = 0; 1058 } 1059 1060 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0) 1061 goto errorexit; 1062 1063 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf)) 1064 goto errorexit; 1065 1066 if (final && buf.inbuf < buf.inbuf_end) { 1067 if (multibytecodec_decerror(self->codec, &self->state, 1068 &buf, self->errors, MBERR_TOOFEW)) { 1069 /* recover the original pending buffer */ 1070 memcpy(self->pending, wdata, origpending); 1071 self->pendingsize = origpending; 1072 goto errorexit; 1073 } 1074 } 1075 1076 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */ 1077 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0) 1078 goto errorexit; 1079 } 1080 1081 finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj)); 1082 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 1083 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 1084 goto errorexit; 1085 1086 PyBuffer_Release(&pdata); 1087 if (wdata != data) 1088 PyMem_Del(wdata); 1089 Py_XDECREF(buf.excobj); 1090 return buf.outobj; 1091 1091 1092 1092 errorexit: 1093 1094 1095 1096 1097 1098 1093 PyBuffer_Release(&pdata); 1094 if (wdata != NULL && wdata != data) 1095 PyMem_Del(wdata); 1096 Py_XDECREF(buf.excobj); 1097 Py_XDECREF(buf.outobj); 1098 return NULL; 1099 1099 } 1100 1100 … … 1102 1102 mbidecoder_reset(MultibyteIncrementalDecoderObject *self) 1103 1103 { 1104 1105 1106 1107 1108 1109 1104 if (self->codec->decreset != NULL && 1105 self->codec->decreset(&self->state, self->codec->config) != 0) 1106 return NULL; 1107 self->pendingsize = 0; 1108 1109 Py_RETURN_NONE; 1110 1110 } 1111 1111 1112 1112 static struct PyMethodDef mbidecoder_methods[] = { 1113 {"decode",(PyCFunction)mbidecoder_decode,1114 1115 {"reset",(PyCFunction)mbidecoder_reset,1116 1117 {NULL,NULL},1113 {"decode", (PyCFunction)mbidecoder_decode, 1114 METH_VARARGS | METH_KEYWORDS, NULL}, 1115 {"reset", (PyCFunction)mbidecoder_reset, 1116 METH_NOARGS, NULL}, 1117 {NULL, NULL}, 1118 1118 }; 1119 1119 … … 1121 1121 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1122 1122 { 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1123 MultibyteIncrementalDecoderObject *self; 1124 PyObject *codec = NULL; 1125 char *errors = NULL; 1126 1127 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder", 1128 incnewkwarglist, &errors)) 1129 return NULL; 1130 1131 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0); 1132 if (self == NULL) 1133 return NULL; 1134 1135 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1136 if (codec == NULL) 1137 goto errorexit; 1138 if (!MultibyteCodec_Check(codec)) { 1139 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1140 goto errorexit; 1141 } 1142 1143 self->codec = ((MultibyteCodecObject *)codec)->codec; 1144 self->pendingsize = 0; 1145 self->errors = internal_error_callback(errors); 1146 if (self->errors == NULL) 1147 goto errorexit; 1148 if (self->codec->decinit != NULL && 1149 self->codec->decinit(&self->state, self->codec->config) != 0) 1150 goto errorexit; 1151 1152 Py_DECREF(codec); 1153 return (PyObject *)self; 1154 1154 1155 1155 errorexit: 1156 1157 1158 1156 Py_XDECREF(self); 1157 Py_XDECREF(codec); 1158 return NULL; 1159 1159 } 1160 1160 … … 1162 1162 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds) 1163 1163 { 1164 1164 return 0; 1165 1165 } 1166 1166 1167 1167 static int 1168 1168 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self, 1169 1170 { 1171 1172 1173 1169 visitproc visit, void *arg) 1170 { 1171 if (ERROR_ISCUSTOM(self->errors)) 1172 Py_VISIT(self->errors); 1173 return 0; 1174 1174 } 1175 1175 … … 1177 1177 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self) 1178 1178 { 1179 1180 1181 1179 PyObject_GC_UnTrack(self); 1180 ERROR_DECREF(self->errors); 1181 Py_TYPE(self)->tp_free(self); 1182 1182 } 1183 1183 1184 1184 static PyTypeObject MultibyteIncrementalDecoder_Type = { 1185 1186 "MultibyteIncrementalDecoder",/* tp_name */1187 1188 0,/* tp_itemsize */1189 1190 1191 0,/* tp_print */1192 0,/* tp_getattr */1193 0,/* tp_setattr */1194 0,/* tp_compare */1195 0,/* tp_repr */1196 0,/* tp_as_number */1197 0,/* tp_as_sequence */1198 0,/* tp_as_mapping */1199 0,/* tp_hash */1200 0,/* tp_call */1201 0,/* tp_str */1202 PyObject_GenericGetAttr,/* tp_getattro */1203 0,/* tp_setattro */1204 0,/* tp_as_buffer */1205 1206 | Py_TPFLAGS_BASETYPE,/* tp_flags */1207 0,/* tp_doc */1208 (traverseproc)mbidecoder_traverse,/* tp_traverse */1209 0,/* tp_clear */1210 0,/* tp_richcompare */1211 0,/* tp_weaklistoffset */1212 0,/* tp_iter */1213 0,/* tp_iterext */1214 mbidecoder_methods,/* tp_methods */1215 0,/* tp_members */1216 codecctx_getsets,/* tp_getset */1217 0,/* tp_base */1218 0,/* tp_dict */1219 0,/* tp_descr_get */1220 0,/* tp_descr_set */1221 0,/* tp_dictoffset */1222 mbidecoder_init,/* tp_init */1223 0,/* tp_alloc */1224 mbidecoder_new,/* tp_new */1185 PyVarObject_HEAD_INIT(NULL, 0) 1186 "MultibyteIncrementalDecoder", /* tp_name */ 1187 sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */ 1188 0, /* tp_itemsize */ 1189 /* methods */ 1190 (destructor)mbidecoder_dealloc, /* tp_dealloc */ 1191 0, /* tp_print */ 1192 0, /* tp_getattr */ 1193 0, /* tp_setattr */ 1194 0, /* tp_compare */ 1195 0, /* tp_repr */ 1196 0, /* tp_as_number */ 1197 0, /* tp_as_sequence */ 1198 0, /* tp_as_mapping */ 1199 0, /* tp_hash */ 1200 0, /* tp_call */ 1201 0, /* tp_str */ 1202 PyObject_GenericGetAttr, /* tp_getattro */ 1203 0, /* tp_setattro */ 1204 0, /* tp_as_buffer */ 1205 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1206 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1207 0, /* tp_doc */ 1208 (traverseproc)mbidecoder_traverse, /* tp_traverse */ 1209 0, /* tp_clear */ 1210 0, /* tp_richcompare */ 1211 0, /* tp_weaklistoffset */ 1212 0, /* tp_iter */ 1213 0, /* tp_iterext */ 1214 mbidecoder_methods, /* tp_methods */ 1215 0, /* tp_members */ 1216 codecctx_getsets, /* tp_getset */ 1217 0, /* tp_base */ 1218 0, /* tp_dict */ 1219 0, /* tp_descr_get */ 1220 0, /* tp_descr_set */ 1221 0, /* tp_dictoffset */ 1222 mbidecoder_init, /* tp_init */ 1223 0, /* tp_alloc */ 1224 mbidecoder_new, /* tp_new */ 1225 1225 }; 1226 1226 … … 1232 1232 static PyObject * 1233 1233 mbstreamreader_iread(MultibyteStreamReaderObject *self, 1234 1235 { 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1234 const char *method, Py_ssize_t sizehint) 1235 { 1236 MultibyteDecodeBuffer buf; 1237 PyObject *cres; 1238 Py_ssize_t rsize, finalsize = 0; 1239 1240 if (sizehint == 0) 1241 return PyUnicode_FromUnicode(NULL, 0); 1242 1243 buf.outobj = buf.excobj = NULL; 1244 cres = NULL; 1245 1246 for (;;) { 1247 int endoffile; 1248 1249 if (sizehint < 0) 1250 cres = PyObject_CallMethod(self->stream, 1251 (char *)method, NULL); 1252 else 1253 cres = PyObject_CallMethod(self->stream, 1254 (char *)method, "i", sizehint); 1255 if (cres == NULL) 1256 goto errorexit; 1257 1258 if (!PyString_Check(cres)) { 1259 PyErr_SetString(PyExc_TypeError, 1260 "stream function returned a " 1261 "non-string object"); 1262 goto errorexit; 1263 } 1264 1265 endoffile = (PyString_GET_SIZE(cres) == 0); 1266 1267 if (self->pendingsize > 0) { 1268 PyObject *ctr; 1269 char *ctrdata; 1270 1271 if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) { 1272 PyErr_NoMemory(); 1273 goto errorexit; 1274 } 1275 rsize = PyString_GET_SIZE(cres) + self->pendingsize; 1276 ctr = PyString_FromStringAndSize(NULL, rsize); 1277 if (ctr == NULL) 1278 goto errorexit; 1279 ctrdata = PyString_AS_STRING(ctr); 1280 memcpy(ctrdata, self->pending, self->pendingsize); 1281 memcpy(ctrdata + self->pendingsize, 1282 PyString_AS_STRING(cres), 1283 PyString_GET_SIZE(cres)); 1284 Py_DECREF(cres); 1285 cres = ctr; 1286 self->pendingsize = 0; 1287 } 1288 1289 rsize = PyString_GET_SIZE(cres); 1290 if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres), 1291 rsize) != 0) 1292 goto errorexit; 1293 1294 if (rsize > 0 && decoder_feed_buffer( 1295 (MultibyteStatefulDecoderContext *)self, &buf)) 1296 goto errorexit; 1297 1298 if (endoffile || sizehint < 0) { 1299 if (buf.inbuf < buf.inbuf_end && 1300 multibytecodec_decerror(self->codec, &self->state, 1301 &buf, self->errors, MBERR_TOOFEW)) 1302 goto errorexit; 1303 } 1304 1305 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */ 1306 if (decoder_append_pending(STATEFUL_DCTX(self), 1307 &buf) != 0) 1308 goto errorexit; 1309 } 1310 1311 finalsize = (Py_ssize_t)(buf.outbuf - 1312 PyUnicode_AS_UNICODE(buf.outobj)); 1313 Py_DECREF(cres); 1314 cres = NULL; 1315 1316 if (sizehint < 0 || finalsize != 0 || rsize == 0) 1317 break; 1318 1319 sizehint = 1; /* read 1 more byte and retry */ 1320 } 1321 1322 if (finalsize != PyUnicode_GET_SIZE(buf.outobj)) 1323 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1) 1324 goto errorexit; 1325 1326 Py_XDECREF(cres); 1327 Py_XDECREF(buf.excobj); 1328 return buf.outobj; 1329 1329 1330 1330 errorexit: 1331 1332 1333 1334 1331 Py_XDECREF(cres); 1332 Py_XDECREF(buf.excobj); 1333 Py_XDECREF(buf.outobj); 1334 return NULL; 1335 1335 } 1336 1336 … … 1338 1338 mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args) 1339 1339 { 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1340 PyObject *sizeobj = NULL; 1341 Py_ssize_t size; 1342 1343 if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj)) 1344 return NULL; 1345 1346 if (sizeobj == Py_None || sizeobj == NULL) 1347 size = -1; 1348 else if (PyInt_Check(sizeobj)) 1349 size = PyInt_AsSsize_t(sizeobj); 1350 else { 1351 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1352 return NULL; 1353 } 1354 1355 return mbstreamreader_iread(self, "read", size); 1356 1356 } 1357 1357 … … 1359 1359 mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args) 1360 1360 { 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1361 PyObject *sizeobj = NULL; 1362 Py_ssize_t size; 1363 1364 if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj)) 1365 return NULL; 1366 1367 if (sizeobj == Py_None || sizeobj == NULL) 1368 size = -1; 1369 else if (PyInt_Check(sizeobj)) 1370 size = PyInt_AsSsize_t(sizeobj); 1371 else { 1372 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1373 return NULL; 1374 } 1375 1376 return mbstreamreader_iread(self, "readline", size); 1377 1377 } 1378 1378 … … 1380 1380 mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args) 1381 1381 { 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1382 PyObject *sizehintobj = NULL, *r, *sr; 1383 Py_ssize_t sizehint; 1384 1385 if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj)) 1386 return NULL; 1387 1388 if (sizehintobj == Py_None || sizehintobj == NULL) 1389 sizehint = -1; 1390 else if (PyInt_Check(sizehintobj)) 1391 sizehint = PyInt_AsSsize_t(sizehintobj); 1392 else { 1393 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer"); 1394 return NULL; 1395 } 1396 1397 r = mbstreamreader_iread(self, "read", sizehint); 1398 if (r == NULL) 1399 return NULL; 1400 1401 sr = PyUnicode_Splitlines(r, 1); 1402 Py_DECREF(r); 1403 return sr; 1404 1404 } 1405 1405 … … 1407 1407 mbstreamreader_reset(MultibyteStreamReaderObject *self) 1408 1408 { 1409 1410 1411 1412 1413 1414 1409 if (self->codec->decreset != NULL && 1410 self->codec->decreset(&self->state, self->codec->config) != 0) 1411 return NULL; 1412 self->pendingsize = 0; 1413 1414 Py_RETURN_NONE; 1415 1415 } 1416 1416 1417 1417 static struct PyMethodDef mbstreamreader_methods[] = { 1418 {"read",(PyCFunction)mbstreamreader_read,1419 1420 {"readline",(PyCFunction)mbstreamreader_readline,1421 1422 {"readlines",(PyCFunction)mbstreamreader_readlines,1423 1424 {"reset",(PyCFunction)mbstreamreader_reset,1425 1426 {NULL,NULL},1418 {"read", (PyCFunction)mbstreamreader_read, 1419 METH_VARARGS, NULL}, 1420 {"readline", (PyCFunction)mbstreamreader_readline, 1421 METH_VARARGS, NULL}, 1422 {"readlines", (PyCFunction)mbstreamreader_readlines, 1423 METH_VARARGS, NULL}, 1424 {"reset", (PyCFunction)mbstreamreader_reset, 1425 METH_NOARGS, NULL}, 1426 {NULL, NULL}, 1427 1427 }; 1428 1428 1429 1429 static PyMemberDef mbstreamreader_members[] = { 1430 {"stream",T_OBJECT,1431 1432 1433 1430 {"stream", T_OBJECT, 1431 offsetof(MultibyteStreamReaderObject, stream), 1432 READONLY, NULL}, 1433 {NULL,} 1434 1434 }; 1435 1435 … … 1437 1437 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1438 1438 { 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1439 MultibyteStreamReaderObject *self; 1440 PyObject *stream, *codec = NULL; 1441 char *errors = NULL; 1442 1443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader", 1444 streamkwarglist, &stream, &errors)) 1445 return NULL; 1446 1447 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0); 1448 if (self == NULL) 1449 return NULL; 1450 1451 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1452 if (codec == NULL) 1453 goto errorexit; 1454 if (!MultibyteCodec_Check(codec)) { 1455 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1456 goto errorexit; 1457 } 1458 1459 self->codec = ((MultibyteCodecObject *)codec)->codec; 1460 self->stream = stream; 1461 Py_INCREF(stream); 1462 self->pendingsize = 0; 1463 self->errors = internal_error_callback(errors); 1464 if (self->errors == NULL) 1465 goto errorexit; 1466 if (self->codec->decinit != NULL && 1467 self->codec->decinit(&self->state, self->codec->config) != 0) 1468 goto errorexit; 1469 1470 Py_DECREF(codec); 1471 return (PyObject *)self; 1472 1472 1473 1473 errorexit: 1474 1475 1476 1474 Py_XDECREF(self); 1475 Py_XDECREF(codec); 1476 return NULL; 1477 1477 } 1478 1478 … … 1480 1480 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds) 1481 1481 { 1482 1482 return 0; 1483 1483 } 1484 1484 1485 1485 static int 1486 1486 mbstreamreader_traverse(MultibyteStreamReaderObject *self, 1487 1488 { 1489 1490 1491 1492 1487 visitproc visit, void *arg) 1488 { 1489 if (ERROR_ISCUSTOM(self->errors)) 1490 Py_VISIT(self->errors); 1491 Py_VISIT(self->stream); 1492 return 0; 1493 1493 } 1494 1494 … … 1496 1496 mbstreamreader_dealloc(MultibyteStreamReaderObject *self) 1497 1497 { 1498 1499 1500 1501 1498 PyObject_GC_UnTrack(self); 1499 ERROR_DECREF(self->errors); 1500 Py_XDECREF(self->stream); 1501 Py_TYPE(self)->tp_free(self); 1502 1502 } 1503 1503 1504 1504 static PyTypeObject MultibyteStreamReader_Type = { 1505 1506 "MultibyteStreamReader",/* tp_name */1507 1508 0,/* tp_itemsize */1509 1510 1511 0,/* tp_print */1512 0,/* tp_getattr */1513 0,/* tp_setattr */1514 0,/* tp_compare */1515 0,/* tp_repr */1516 0,/* tp_as_number */1517 0,/* tp_as_sequence */1518 0,/* tp_as_mapping */1519 0,/* tp_hash */1520 0,/* tp_call */1521 0,/* tp_str */1522 PyObject_GenericGetAttr,/* tp_getattro */1523 0,/* tp_setattro */1524 0,/* tp_as_buffer */1525 1526 | Py_TPFLAGS_BASETYPE,/* tp_flags */1527 0,/* tp_doc */1528 (traverseproc)mbstreamreader_traverse,/* tp_traverse */1529 0,/* tp_clear */1530 0,/* tp_richcompare */1531 0,/* tp_weaklistoffset */1532 0,/* tp_iter */1533 0,/* tp_iterext */1534 mbstreamreader_methods,/* tp_methods */1535 mbstreamreader_members,/* tp_members */1536 codecctx_getsets,/* tp_getset */1537 0,/* tp_base */1538 0,/* tp_dict */1539 0,/* tp_descr_get */1540 0,/* tp_descr_set */1541 0,/* tp_dictoffset */1542 mbstreamreader_init,/* tp_init */1543 0,/* tp_alloc */1544 mbstreamreader_new,/* tp_new */1505 PyVarObject_HEAD_INIT(NULL, 0) 1506 "MultibyteStreamReader", /* tp_name */ 1507 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */ 1508 0, /* tp_itemsize */ 1509 /* methods */ 1510 (destructor)mbstreamreader_dealloc, /* tp_dealloc */ 1511 0, /* tp_print */ 1512 0, /* tp_getattr */ 1513 0, /* tp_setattr */ 1514 0, /* tp_compare */ 1515 0, /* tp_repr */ 1516 0, /* tp_as_number */ 1517 0, /* tp_as_sequence */ 1518 0, /* tp_as_mapping */ 1519 0, /* tp_hash */ 1520 0, /* tp_call */ 1521 0, /* tp_str */ 1522 PyObject_GenericGetAttr, /* tp_getattro */ 1523 0, /* tp_setattro */ 1524 0, /* tp_as_buffer */ 1525 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1526 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1527 0, /* tp_doc */ 1528 (traverseproc)mbstreamreader_traverse, /* tp_traverse */ 1529 0, /* tp_clear */ 1530 0, /* tp_richcompare */ 1531 0, /* tp_weaklistoffset */ 1532 0, /* tp_iter */ 1533 0, /* tp_iterext */ 1534 mbstreamreader_methods, /* tp_methods */ 1535 mbstreamreader_members, /* tp_members */ 1536 codecctx_getsets, /* tp_getset */ 1537 0, /* tp_base */ 1538 0, /* tp_dict */ 1539 0, /* tp_descr_get */ 1540 0, /* tp_descr_set */ 1541 0, /* tp_dictoffset */ 1542 mbstreamreader_init, /* tp_init */ 1543 0, /* tp_alloc */ 1544 mbstreamreader_new, /* tp_new */ 1545 1545 }; 1546 1546 … … 1552 1552 static int 1553 1553 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self, 1554 1555 { 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1554 PyObject *unistr) 1555 { 1556 PyObject *str, *wr; 1557 1558 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0); 1559 if (str == NULL) 1560 return -1; 1561 1562 wr = PyObject_CallMethod(self->stream, "write", "O", str); 1563 Py_DECREF(str); 1564 if (wr == NULL) 1565 return -1; 1566 1567 Py_DECREF(wr); 1568 return 0; 1569 1569 } 1570 1570 … … 1572 1572 mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj) 1573 1573 { 1574 1575 1576 1577 1574 if (mbstreamwriter_iwrite(self, strobj)) 1575 return NULL; 1576 else 1577 Py_RETURN_NONE; 1578 1578 } 1579 1579 … … 1581 1581 mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines) 1582 1582 { 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1583 PyObject *strobj; 1584 int i, r; 1585 1586 if (!PySequence_Check(lines)) { 1587 PyErr_SetString(PyExc_TypeError, 1588 "arg must be a sequence object"); 1589 return NULL; 1590 } 1591 1592 for (i = 0; i < PySequence_Length(lines); i++) { 1593 /* length can be changed even within this loop */ 1594 strobj = PySequence_GetItem(lines, i); 1595 if (strobj == NULL) 1596 return NULL; 1597 1598 r = mbstreamwriter_iwrite(self, strobj); 1599 Py_DECREF(strobj); 1600 if (r == -1) 1601 return NULL; 1602 } 1603 1604 Py_RETURN_NONE; 1605 1605 } 1606 1606 … … 1608 1608 mbstreamwriter_reset(MultibyteStreamWriterObject *self) 1609 1609 { 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1610 const Py_UNICODE *pending; 1611 PyObject *pwrt; 1612 1613 pending = self->pending; 1614 pwrt = multibytecodec_encode(self->codec, &self->state, 1615 &pending, self->pendingsize, self->errors, 1616 MBENC_FLUSH | MBENC_RESET); 1617 /* some pending buffer can be truncated when UnicodeEncodeError is 1618 * raised on 'strict' mode. but, 'reset' method is designed to 1619 * reset the pending buffer or states so failed string sequence 1620 * ought to be missed */ 1621 self->pendingsize = 0; 1622 if (pwrt == NULL) 1623 return NULL; 1624 1625 if (PyString_Size(pwrt) > 0) { 1626 PyObject *wr; 1627 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt); 1628 if (wr == NULL) { 1629 Py_DECREF(pwrt); 1630 return NULL; 1631 } 1632 } 1633 Py_DECREF(pwrt); 1634 1635 Py_RETURN_NONE; 1636 1636 } 1637 1637 … … 1639 1639 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1640 1640 { 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1641 MultibyteStreamWriterObject *self; 1642 PyObject *stream, *codec = NULL; 1643 char *errors = NULL; 1644 1645 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter", 1646 streamkwarglist, &stream, &errors)) 1647 return NULL; 1648 1649 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0); 1650 if (self == NULL) 1651 return NULL; 1652 1653 codec = PyObject_GetAttrString((PyObject *)type, "codec"); 1654 if (codec == NULL) 1655 goto errorexit; 1656 if (!MultibyteCodec_Check(codec)) { 1657 PyErr_SetString(PyExc_TypeError, "codec is unexpected type"); 1658 goto errorexit; 1659 } 1660 1661 self->codec = ((MultibyteCodecObject *)codec)->codec; 1662 self->stream = stream; 1663 Py_INCREF(stream); 1664 self->pendingsize = 0; 1665 self->errors = internal_error_callback(errors); 1666 if (self->errors == NULL) 1667 goto errorexit; 1668 if (self->codec->encinit != NULL && 1669 self->codec->encinit(&self->state, self->codec->config) != 0) 1670 goto errorexit; 1671 1672 Py_DECREF(codec); 1673 return (PyObject *)self; 1674 1674 1675 1675 errorexit: 1676 1677 1678 1676 Py_XDECREF(self); 1677 Py_XDECREF(codec); 1678 return NULL; 1679 1679 } 1680 1680 … … 1682 1682 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds) 1683 1683 { 1684 1684 return 0; 1685 1685 } 1686 1686 1687 1687 static int 1688 1688 mbstreamwriter_traverse(MultibyteStreamWriterObject *self, 1689 1690 { 1691 1692 1693 1694 1689 visitproc visit, void *arg) 1690 { 1691 if (ERROR_ISCUSTOM(self->errors)) 1692 Py_VISIT(self->errors); 1693 Py_VISIT(self->stream); 1694 return 0; 1695 1695 } 1696 1696 … … 1698 1698 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self) 1699 1699 { 1700 1701 1702 1703 1700 PyObject_GC_UnTrack(self); 1701 ERROR_DECREF(self->errors); 1702 Py_XDECREF(self->stream); 1703 Py_TYPE(self)->tp_free(self); 1704 1704 } 1705 1705 1706 1706 static struct PyMethodDef mbstreamwriter_methods[] = { 1707 {"write",(PyCFunction)mbstreamwriter_write,1708 1709 {"writelines",(PyCFunction)mbstreamwriter_writelines,1710 1711 {"reset",(PyCFunction)mbstreamwriter_reset,1712 1713 {NULL,NULL},1707 {"write", (PyCFunction)mbstreamwriter_write, 1708 METH_O, NULL}, 1709 {"writelines", (PyCFunction)mbstreamwriter_writelines, 1710 METH_O, NULL}, 1711 {"reset", (PyCFunction)mbstreamwriter_reset, 1712 METH_NOARGS, NULL}, 1713 {NULL, NULL}, 1714 1714 }; 1715 1715 1716 1716 static PyMemberDef mbstreamwriter_members[] = { 1717 {"stream",T_OBJECT,1718 1719 1720 1717 {"stream", T_OBJECT, 1718 offsetof(MultibyteStreamWriterObject, stream), 1719 READONLY, NULL}, 1720 {NULL,} 1721 1721 }; 1722 1722 1723 1723 static PyTypeObject MultibyteStreamWriter_Type = { 1724 1725 "MultibyteStreamWriter",/* tp_name */1726 1727 0,/* tp_itemsize */1728 1729 1730 0,/* tp_print */1731 0,/* tp_getattr */1732 0,/* tp_setattr */1733 0,/* tp_compare */1734 0,/* tp_repr */1735 0,/* tp_as_number */1736 0,/* tp_as_sequence */1737 0,/* tp_as_mapping */1738 0,/* tp_hash */1739 0,/* tp_call */1740 0,/* tp_str */1741 PyObject_GenericGetAttr,/* tp_getattro */1742 0,/* tp_setattro */1743 0,/* tp_as_buffer */1744 1745 | Py_TPFLAGS_BASETYPE,/* tp_flags */1746 0,/* tp_doc */1747 (traverseproc)mbstreamwriter_traverse,/* tp_traverse */1748 0,/* tp_clear */1749 0,/* tp_richcompare */1750 0,/* tp_weaklistoffset */1751 0,/* tp_iter */1752 0,/* tp_iterext */1753 mbstreamwriter_methods,/* tp_methods */1754 mbstreamwriter_members,/* tp_members */1755 codecctx_getsets,/* tp_getset */1756 0,/* tp_base */1757 0,/* tp_dict */1758 0,/* tp_descr_get */1759 0,/* tp_descr_set */1760 0,/* tp_dictoffset */1761 mbstreamwriter_init,/* tp_init */1762 0,/* tp_alloc */1763 mbstreamwriter_new,/* tp_new */1724 PyVarObject_HEAD_INIT(NULL, 0) 1725 "MultibyteStreamWriter", /* tp_name */ 1726 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */ 1727 0, /* tp_itemsize */ 1728 /* methods */ 1729 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */ 1730 0, /* tp_print */ 1731 0, /* tp_getattr */ 1732 0, /* tp_setattr */ 1733 0, /* tp_compare */ 1734 0, /* tp_repr */ 1735 0, /* tp_as_number */ 1736 0, /* tp_as_sequence */ 1737 0, /* tp_as_mapping */ 1738 0, /* tp_hash */ 1739 0, /* tp_call */ 1740 0, /* tp_str */ 1741 PyObject_GenericGetAttr, /* tp_getattro */ 1742 0, /* tp_setattro */ 1743 0, /* tp_as_buffer */ 1744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC 1745 | Py_TPFLAGS_BASETYPE, /* tp_flags */ 1746 0, /* tp_doc */ 1747 (traverseproc)mbstreamwriter_traverse, /* tp_traverse */ 1748 0, /* tp_clear */ 1749 0, /* tp_richcompare */ 1750 0, /* tp_weaklistoffset */ 1751 0, /* tp_iter */ 1752 0, /* tp_iterext */ 1753 mbstreamwriter_methods, /* tp_methods */ 1754 mbstreamwriter_members, /* tp_members */ 1755 codecctx_getsets, /* tp_getset */ 1756 0, /* tp_base */ 1757 0, /* tp_dict */ 1758 0, /* tp_descr_get */ 1759 0, /* tp_descr_set */ 1760 0, /* tp_dictoffset */ 1761 mbstreamwriter_init, /* tp_init */ 1762 0, /* tp_alloc */ 1763 mbstreamwriter_new, /* tp_new */ 1764 1764 }; 1765 1765 … … 1772 1772 __create_codec(PyObject *ignore, PyObject *arg) 1773 1773 { 1774 1775 1776 1777 if (!PyCObject_Check(arg)) {1778 1779 1780 1781 1782 codec = PyCObject_AsVoidPtr(arg);1783 1784 1785 1786 1787 1788 1789 1790 1791 1774 MultibyteCodecObject *self; 1775 MultibyteCodec *codec; 1776 1777 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) { 1778 PyErr_SetString(PyExc_ValueError, "argument type invalid"); 1779 return NULL; 1780 } 1781 1782 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME); 1783 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0) 1784 return NULL; 1785 1786 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type); 1787 if (self == NULL) 1788 return NULL; 1789 self->codec = codec; 1790 1791 return (PyObject *)self; 1792 1792 } 1793 1793 1794 1794 static struct PyMethodDef __methods[] = { 1795 1796 1795 {"__create_codec", (PyCFunction)__create_codec, METH_O}, 1796 {NULL, NULL}, 1797 1797 }; 1798 1798 … … 1800 1800 init_multibytecodec(void) 1801 1801 { 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 } 1802 int i; 1803 PyObject *m; 1804 PyTypeObject *typelist[] = { 1805 &MultibyteIncrementalEncoder_Type, 1806 &MultibyteIncrementalDecoder_Type, 1807 &MultibyteStreamReader_Type, 1808 &MultibyteStreamWriter_Type, 1809 NULL 1810 }; 1811 1812 if (PyType_Ready(&MultibyteCodec_Type) < 0) 1813 return; 1814 1815 m = Py_InitModule("_multibytecodec", __methods); 1816 if (m == NULL) 1817 return; 1818 1819 for (i = 0; typelist[i] != NULL; i++) { 1820 if (PyType_Ready(typelist[i]) < 0) 1821 return; 1822 Py_INCREF(typelist[i]); 1823 PyModule_AddObject(m, typelist[i]->tp_name, 1824 (PyObject *)typelist[i]); 1825 } 1826 1827 if (PyErr_Occurred()) 1828 Py_FatalError("can't initialize the _multibytecodec module"); 1829 }
Note:
See TracChangeset
for help on using the changeset viewer.