Changeset 391 for python/trunk/Modules/_csv.c
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Modules/_csv.c
r2 r391 32 32 33 33 #ifndef PyMODINIT_FUNC 34 # 35 # 36 # 37 # 38 # 34 # if defined(__cplusplus) 35 # define PyMODINIT_FUNC extern "C" void 36 # else /* __cplusplus */ 37 # define PyMODINIT_FUNC void 38 # endif /* __cplusplus */ 39 39 #endif 40 40 41 41 #ifndef Py_CLEAR 42 #define Py_CLEAR(op) 43 do {\44 if (op) {\45 PyObject *tmp = (PyObject *)(op);\46 (op) = NULL;\47 Py_DECREF(tmp);\48 }\49 42 #define Py_CLEAR(op) \ 43 do { \ 44 if (op) { \ 45 PyObject *tmp = (PyObject *)(op); \ 46 (op) = NULL; \ 47 Py_DECREF(tmp); \ 48 } \ 49 } while (0) 50 50 #endif 51 51 #ifndef Py_VISIT 52 #define Py_VISIT(op) 53 do {\54 if (op) {\55 int vret = visit((PyObject *)(op), arg);\56 if (vret)\57 return vret;\58 }\59 52 #define Py_VISIT(op) \ 53 do { \ 54 if (op) { \ 55 int vret = visit((PyObject *)(op), arg); \ 56 if (vret) \ 57 return vret; \ 58 } \ 59 } while (0) 60 60 #endif 61 61 … … 63 63 64 64 #define IS_BASESTRING(o) \ 65 66 67 static PyObject *error_obj; 65 PyObject_TypeCheck(o, &PyBaseString_Type) 66 67 static PyObject *error_obj; /* CSV exception */ 68 68 static PyObject *dialects; /* Dialect registry */ 69 static long field_limit = 128 * 1024; 69 static long field_limit = 128 * 1024; /* max parsed field size */ 70 70 71 71 typedef enum { 72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 73 74 72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, 74 EAT_CRNL 75 75 } ParserState; 76 76 77 77 typedef enum { 78 78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE 79 79 } QuoteStyle; 80 80 81 81 typedef struct { 82 83 82 QuoteStyle style; 83 char *name; 84 84 } StyleDesc; 85 85 86 86 static StyleDesc quote_styles[] = { 87 88 89 90 91 87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, 88 { QUOTE_ALL, "QUOTE_ALL" }, 89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, 90 { QUOTE_NONE, "QUOTE_NONE" }, 91 { 0 } 92 92 }; 93 93 94 94 typedef struct { 95 96 97 int doublequote;/* is " represented by ""? */98 char delimiter;/* field separator */99 char quotechar;/* quote character */100 char escapechar;/* escape character */101 int skipinitialspace;/* ignore spaces following delimiter? */102 103 int quoting;/* style of quoting to write */104 105 int strict;/* raise exception on bad CSV */95 PyObject_HEAD 96 97 int doublequote; /* is " represented by ""? */ 98 char delimiter; /* field separator */ 99 char quotechar; /* quote character */ 100 char escapechar; /* escape character */ 101 int skipinitialspace; /* ignore spaces following delimiter? */ 102 PyObject *lineterminator; /* string to write between records */ 103 int quoting; /* style of quoting to write */ 104 105 int strict; /* raise exception on bad CSV */ 106 106 } DialectObj; 107 107 … … 109 109 110 110 typedef struct { 111 112 113 114 115 116 117 PyObject *fields;/* field list for current record */118 ParserState state;/* current CSV parse state */119 char *field;/* build current field in here */120 int field_size;/* size of allocated buffer */121 int field_len;/* length of current field */122 int numeric_field;/* treat field as numeric */123 unsigned long line_num;/* Source-file line number */111 PyObject_HEAD 112 113 PyObject *input_iter; /* iterate over this for input lines */ 114 115 DialectObj *dialect; /* parsing dialect */ 116 117 PyObject *fields; /* field list for current record */ 118 ParserState state; /* current CSV parse state */ 119 char *field; /* build current field in here */ 120 int field_size; /* size of allocated buffer */ 121 int field_len; /* length of current field */ 122 int numeric_field; /* treat field as numeric */ 123 unsigned long line_num; /* Source-file line number */ 124 124 } ReaderObj; 125 125 … … 129 129 130 130 typedef struct { 131 132 133 134 135 136 137 char *rec;/* buffer for parser.join */138 int rec_size;/* size of allocated record */139 int rec_len;/* length of record */140 int num_fields;/* number of fields in record */141 } WriterObj; 131 PyObject_HEAD 132 133 PyObject *writeline; /* write output lines to this file */ 134 135 DialectObj *dialect; /* parsing dialect */ 136 137 char *rec; /* buffer for parser.join */ 138 int rec_size; /* size of allocated record */ 139 int rec_len; /* length of record */ 140 int num_fields; /* number of fields in record */ 141 } WriterObj; 142 142 143 143 staticforward PyTypeObject Writer_Type; … … 150 150 get_dialect_from_registry(PyObject * name_obj) 151 151 { 152 153 154 155 156 157 158 159 160 161 152 PyObject *dialect_obj; 153 154 dialect_obj = PyDict_GetItem(dialects, name_obj); 155 if (dialect_obj == NULL) { 156 if (!PyErr_Occurred()) 157 PyErr_Format(error_obj, "unknown dialect"); 158 } 159 else 160 Py_INCREF(dialect_obj); 161 return dialect_obj; 162 162 } 163 163 … … 165 165 get_string(PyObject *str) 166 166 { 167 168 167 Py_XINCREF(str); 168 return str; 169 169 } 170 170 … … 172 172 get_nullchar_as_None(char c) 173 173 { 174 175 176 177 178 179 174 if (c == '\0') { 175 Py_INCREF(Py_None); 176 return Py_None; 177 } 178 else 179 return PyString_FromStringAndSize((char*)&c, 1); 180 180 } 181 181 … … 183 183 Dialect_get_lineterminator(DialectObj *self) 184 184 { 185 185 return get_string(self->lineterminator); 186 186 } 187 187 … … 189 189 Dialect_get_escapechar(DialectObj *self) 190 190 { 191 191 return get_nullchar_as_None(self->escapechar); 192 192 } 193 193 … … 195 195 Dialect_get_quotechar(DialectObj *self) 196 196 { 197 197 return get_nullchar_as_None(self->quotechar); 198 198 } 199 199 … … 201 201 Dialect_get_quoting(DialectObj *self) 202 202 { 203 203 return PyInt_FromLong(self->quoting); 204 204 } 205 205 … … 207 207 _set_bool(const char *name, int *target, PyObject *src, int dflt) 208 208 { 209 if (src == NULL) 210 *target = dflt; 211 else 212 *target = PyObject_IsTrue(src); 213 return 0; 209 if (src == NULL) 210 *target = dflt; 211 else { 212 int b = PyObject_IsTrue(src); 213 if (b < 0) 214 return -1; 215 *target = b; 216 } 217 return 0; 214 218 } 215 219 … … 217 221 _set_int(const char *name, int *target, PyObject *src, int dflt) 218 222 { 219 220 221 222 223 PyErr_Format(PyExc_TypeError, 224 225 226 227 228 229 223 if (src == NULL) 224 *target = dflt; 225 else { 226 if (!PyInt_Check(src)) { 227 PyErr_Format(PyExc_TypeError, 228 "\"%s\" must be an integer", name); 229 return -1; 230 } 231 *target = PyInt_AsLong(src); 232 } 233 return 0; 230 234 } 231 235 … … 233 237 _set_char(const char *name, char *target, PyObject *src, char dflt) 234 238 { 235 236 237 238 239 240 241 PyErr_Format(PyExc_TypeError, 242 "\"%s\" must be an 1-character string", 243 244 245 246 247 248 249 250 251 252 253 239 if (src == NULL) 240 *target = dflt; 241 else { 242 if (src == Py_None || PyString_Size(src) == 0) 243 *target = '\0'; 244 else if (!PyString_Check(src) || PyString_Size(src) != 1) { 245 PyErr_Format(PyExc_TypeError, 246 "\"%s\" must be an 1-character string", 247 name); 248 return -1; 249 } 250 else { 251 char *s = PyString_AsString(src); 252 if (s == NULL) 253 return -1; 254 *target = s[0]; 255 } 256 } 257 return 0; 254 258 } 255 259 … … 257 261 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) 258 262 { 259 260 261 262 263 264 265 PyErr_Format(PyExc_TypeError, 266 267 268 269 270 271 272 273 274 275 263 if (src == NULL) 264 *target = PyString_FromString(dflt); 265 else { 266 if (src == Py_None) 267 *target = NULL; 268 else if (!IS_BASESTRING(src)) { 269 PyErr_Format(PyExc_TypeError, 270 "\"%s\" must be an string", name); 271 return -1; 272 } 273 else { 274 Py_XDECREF(*target); 275 Py_INCREF(src); 276 *target = src; 277 } 278 } 279 return 0; 276 280 } 277 281 … … 279 283 dialect_check_quoting(int quoting) 280 284 { 281 282 283 284 285 286 287 288 285 StyleDesc *qs = quote_styles; 286 287 for (qs = quote_styles; qs->name; qs++) { 288 if (qs->style == quoting) 289 return 0; 290 } 291 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); 292 return -1; 289 293 } 290 294 … … 292 296 293 297 static struct PyMemberDef Dialect_memberlist[] = { 294 295 296 297 298 298 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY }, 299 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY }, 300 { "doublequote", T_INT, D_OFF(doublequote), READONLY }, 301 { "strict", T_INT, D_OFF(strict), READONLY }, 302 { NULL } 299 303 }; 300 304 301 305 static PyGetSetDef Dialect_getsetlist[] = { 302 { "escapechar",(getter)Dialect_get_escapechar},303 { "lineterminator",(getter)Dialect_get_lineterminator},304 { "quotechar",(getter)Dialect_get_quotechar},305 { "quoting",(getter)Dialect_get_quoting},306 306 { "escapechar", (getter)Dialect_get_escapechar}, 307 { "lineterminator", (getter)Dialect_get_lineterminator}, 308 { "quotechar", (getter)Dialect_get_quotechar}, 309 { "quoting", (getter)Dialect_get_quoting}, 310 {NULL}, 307 311 }; 308 312 … … 310 314 Dialect_dealloc(DialectObj *self) 311 315 { 312 313 316 Py_XDECREF(self->lineterminator); 317 Py_TYPE(self)->tp_free((PyObject *)self); 314 318 } 315 319 316 320 static char *dialect_kws[] = { 317 318 319 320 321 322 323 324 325 326 321 "dialect", 322 "delimiter", 323 "doublequote", 324 "escapechar", 325 "lineterminator", 326 "quotechar", 327 "quoting", 328 "skipinitialspace", 329 "strict", 330 NULL 327 331 }; 328 332 … … 330 334 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) 331 335 { 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 336 DialectObj *self; 337 PyObject *ret = NULL; 338 PyObject *dialect = NULL; 339 PyObject *delimiter = NULL; 340 PyObject *doublequote = NULL; 341 PyObject *escapechar = NULL; 342 PyObject *lineterminator = NULL; 343 PyObject *quotechar = NULL; 344 PyObject *quoting = NULL; 345 PyObject *skipinitialspace = NULL; 346 PyObject *strict = NULL; 347 348 if (!PyArg_ParseTupleAndKeywords(args, kwargs, 349 "|OOOOOOOOO", dialect_kws, 350 &dialect, 351 &delimiter, 352 &doublequote, 353 &escapechar, 354 &lineterminator, 355 "echar, 356 "ing, 357 &skipinitialspace, 358 &strict)) 359 return NULL; 360 361 if (dialect != NULL) { 362 if (IS_BASESTRING(dialect)) { 363 dialect = get_dialect_from_registry(dialect); 364 if (dialect == NULL) 365 return NULL; 366 } 367 else 368 Py_INCREF(dialect); 369 /* Can we reuse this instance? */ 370 if (PyObject_TypeCheck(dialect, &Dialect_Type) && 371 delimiter == 0 && 372 doublequote == 0 && 373 escapechar == 0 && 374 lineterminator == 0 && 375 quotechar == 0 && 376 quoting == 0 && 377 skipinitialspace == 0 && 378 strict == 0) 379 return dialect; 380 } 381 382 self = (DialectObj *)type->tp_alloc(type, 0); 383 if (self == NULL) { 384 Py_XDECREF(dialect); 385 return NULL; 386 } 387 self->lineterminator = NULL; 388 389 Py_XINCREF(delimiter); 390 Py_XINCREF(doublequote); 391 Py_XINCREF(escapechar); 392 Py_XINCREF(lineterminator); 393 Py_XINCREF(quotechar); 394 Py_XINCREF(quoting); 395 Py_XINCREF(skipinitialspace); 396 Py_XINCREF(strict); 397 if (dialect != NULL) { 394 398 #define DIALECT_GETATTR(v, n) \ 395 396 397 398 399 400 401 402 403 404 405 406 407 408 399 if (v == NULL) \ 400 v = PyObject_GetAttrString(dialect, n) 401 DIALECT_GETATTR(delimiter, "delimiter"); 402 DIALECT_GETATTR(doublequote, "doublequote"); 403 DIALECT_GETATTR(escapechar, "escapechar"); 404 DIALECT_GETATTR(lineterminator, "lineterminator"); 405 DIALECT_GETATTR(quotechar, "quotechar"); 406 DIALECT_GETATTR(quoting, "quoting"); 407 DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); 408 DIALECT_GETATTR(strict, "strict"); 409 PyErr_Clear(); 410 } 411 412 /* check types and convert to C values */ 409 413 #define DIASET(meth, name, target, src, dflt) \ 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 PyErr_SetString(PyExc_TypeError,432 433 434 435 436 437 438 439 440 441 414 if (meth(name, target, src, dflt)) \ 415 goto err 416 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); 417 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); 418 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); 419 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); 420 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); 421 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); 422 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); 423 DIASET(_set_bool, "strict", &self->strict, strict, 0); 424 425 /* validate options */ 426 if (dialect_check_quoting(self->quoting)) 427 goto err; 428 if (self->delimiter == 0) { 429 PyErr_SetString(PyExc_TypeError, "delimiter must be set"); 430 goto err; 431 } 432 if (quotechar == Py_None && quoting == NULL) 433 self->quoting = QUOTE_NONE; 434 if (self->quoting != QUOTE_NONE && self->quotechar == 0) { 435 PyErr_SetString(PyExc_TypeError, 436 "quotechar must be set if quoting enabled"); 437 goto err; 438 } 439 if (self->lineterminator == 0) { 440 PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); 441 goto err; 442 } 443 444 ret = (PyObject *)self; 445 Py_INCREF(self); 442 446 err: 443 444 445 446 447 448 449 450 451 452 453 454 } 455 456 457 PyDoc_STRVAR(Dialect_Type_doc, 447 Py_XDECREF(self); 448 Py_XDECREF(dialect); 449 Py_XDECREF(delimiter); 450 Py_XDECREF(doublequote); 451 Py_XDECREF(escapechar); 452 Py_XDECREF(lineterminator); 453 Py_XDECREF(quotechar); 454 Py_XDECREF(quoting); 455 Py_XDECREF(skipinitialspace); 456 Py_XDECREF(strict); 457 return ret; 458 } 459 460 461 PyDoc_STRVAR(Dialect_Type_doc, 458 462 "CSV dialect\n" 459 463 "\n" … … 461 465 462 466 static PyTypeObject Dialect_Type = { 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 (reprfunc)0,/* tp_str */480 481 482 483 484 485 486 487 488 489 490 491 0,/* tp_methods */492 493 494 0,/* tp_base */495 0,/* tp_dict */496 0,/* tp_descr_get */497 0,/* tp_descr_set */498 0,/* tp_dictoffset */499 0,/* tp_init */500 0,/* tp_alloc */501 dialect_new,/* tp_new */502 0,/* tp_free */467 PyVarObject_HEAD_INIT(NULL, 0) 468 "_csv.Dialect", /* tp_name */ 469 sizeof(DialectObj), /* tp_basicsize */ 470 0, /* tp_itemsize */ 471 /* methods */ 472 (destructor)Dialect_dealloc, /* tp_dealloc */ 473 (printfunc)0, /* tp_print */ 474 (getattrfunc)0, /* tp_getattr */ 475 (setattrfunc)0, /* tp_setattr */ 476 (cmpfunc)0, /* tp_compare */ 477 (reprfunc)0, /* tp_repr */ 478 0, /* tp_as_number */ 479 0, /* tp_as_sequence */ 480 0, /* tp_as_mapping */ 481 (hashfunc)0, /* tp_hash */ 482 (ternaryfunc)0, /* tp_call */ 483 (reprfunc)0, /* tp_str */ 484 0, /* tp_getattro */ 485 0, /* tp_setattro */ 486 0, /* tp_as_buffer */ 487 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ 488 Dialect_Type_doc, /* tp_doc */ 489 0, /* tp_traverse */ 490 0, /* tp_clear */ 491 0, /* tp_richcompare */ 492 0, /* tp_weaklistoffset */ 493 0, /* tp_iter */ 494 0, /* tp_iternext */ 495 0, /* tp_methods */ 496 Dialect_memberlist, /* tp_members */ 497 Dialect_getsetlist, /* tp_getset */ 498 0, /* tp_base */ 499 0, /* tp_dict */ 500 0, /* tp_descr_get */ 501 0, /* tp_descr_set */ 502 0, /* tp_dictoffset */ 503 0, /* tp_init */ 504 0, /* tp_alloc */ 505 dialect_new, /* tp_new */ 506 0, /* tp_free */ 503 507 }; 504 508 … … 510 514 _call_dialect(PyObject *dialect_inst, PyObject *kwargs) 511 515 { 512 513 514 515 516 517 518 519 520 516 PyObject *ctor_args; 517 PyObject *dialect; 518 519 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst); 520 if (ctor_args == NULL) 521 return NULL; 522 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs); 523 Py_DECREF(ctor_args); 524 return dialect; 521 525 } 522 526 … … 527 531 parse_save_field(ReaderObj *self) 528 532 { 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 533 PyObject *field; 534 535 field = PyString_FromStringAndSize(self->field, self->field_len); 536 if (field == NULL) 537 return -1; 538 self->field_len = 0; 539 if (self->numeric_field) { 540 PyObject *tmp; 541 542 self->numeric_field = 0; 543 tmp = PyNumber_Float(field); 544 if (tmp == NULL) { 545 Py_DECREF(field); 546 return -1; 547 } 548 Py_DECREF(field); 549 field = tmp; 550 } 551 PyList_Append(self->fields, field); 552 Py_DECREF(field); 553 return 0; 550 554 } 551 555 … … 553 557 parse_grow_buff(ReaderObj *self) 554 558 { 555 556 557 558 559 560 561 562 563 564 565 } 566 567 568 569 570 571 572 573 559 if (self->field_size == 0) { 560 self->field_size = 4096; 561 if (self->field != NULL) 562 PyMem_Free(self->field); 563 self->field = PyMem_Malloc(self->field_size); 564 } 565 else { 566 if (self->field_size > INT_MAX / 2) { 567 PyErr_NoMemory(); 568 return 0; 569 } 570 self->field_size *= 2; 571 self->field = PyMem_Realloc(self->field, self->field_size); 572 } 573 if (self->field == NULL) { 574 PyErr_NoMemory(); 575 return 0; 576 } 577 return 1; 574 578 } 575 579 … … 577 581 parse_add_char(ReaderObj *self, char c) 578 582 { 579 580 581 582 583 584 585 586 587 583 if (self->field_len >= field_limit) { 584 PyErr_Format(error_obj, "field larger than field limit (%ld)", 585 field_limit); 586 return -1; 587 } 588 if (self->field_len == self->field_size && !parse_grow_buff(self)) 589 return -1; 590 self->field[self->field_len++] = c; 591 return 0; 588 592 } 589 593 … … 591 595 parse_process_char(ReaderObj *self, char c) 592 596 { 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 else if (c == dialect->quotechar && 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 if (dialect->quoting != QUOTE_NONE && 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 PyErr_Format(error_obj, "'%c' expected after '%c'", 739 dialect->delimiter, 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 597 DialectObj *dialect = self->dialect; 598 599 switch (self->state) { 600 case START_RECORD: 601 /* start of record */ 602 if (c == '\0') 603 /* empty line - return [] */ 604 break; 605 else if (c == '\n' || c == '\r') { 606 self->state = EAT_CRNL; 607 break; 608 } 609 /* normal character - handle as START_FIELD */ 610 self->state = START_FIELD; 611 /* fallthru */ 612 case START_FIELD: 613 /* expecting field */ 614 if (c == '\n' || c == '\r' || c == '\0') { 615 /* save empty field - return [fields] */ 616 if (parse_save_field(self) < 0) 617 return -1; 618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); 619 } 620 else if (c == dialect->quotechar && 621 dialect->quoting != QUOTE_NONE) { 622 /* start quoted field */ 623 self->state = IN_QUOTED_FIELD; 624 } 625 else if (c == dialect->escapechar) { 626 /* possible escaped character */ 627 self->state = ESCAPED_CHAR; 628 } 629 else if (c == ' ' && dialect->skipinitialspace) 630 /* ignore space at start of field */ 631 ; 632 else if (c == dialect->delimiter) { 633 /* save empty field */ 634 if (parse_save_field(self) < 0) 635 return -1; 636 } 637 else { 638 /* begin new unquoted field */ 639 if (dialect->quoting == QUOTE_NONNUMERIC) 640 self->numeric_field = 1; 641 if (parse_add_char(self, c) < 0) 642 return -1; 643 self->state = IN_FIELD; 644 } 645 break; 646 647 case ESCAPED_CHAR: 648 if (c == '\0') 649 c = '\n'; 650 if (parse_add_char(self, c) < 0) 651 return -1; 652 self->state = IN_FIELD; 653 break; 654 655 case IN_FIELD: 656 /* in unquoted field */ 657 if (c == '\n' || c == '\r' || c == '\0') { 658 /* end of line - return [fields] */ 659 if (parse_save_field(self) < 0) 660 return -1; 661 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); 662 } 663 else if (c == dialect->escapechar) { 664 /* possible escaped character */ 665 self->state = ESCAPED_CHAR; 666 } 667 else if (c == dialect->delimiter) { 668 /* save field - wait for new field */ 669 if (parse_save_field(self) < 0) 670 return -1; 671 self->state = START_FIELD; 672 } 673 else { 674 /* normal character - save in field */ 675 if (parse_add_char(self, c) < 0) 676 return -1; 677 } 678 break; 679 680 case IN_QUOTED_FIELD: 681 /* in quoted field */ 682 if (c == '\0') 683 ; 684 else if (c == dialect->escapechar) { 685 /* Possible escape character */ 686 self->state = ESCAPE_IN_QUOTED_FIELD; 687 } 688 else if (c == dialect->quotechar && 689 dialect->quoting != QUOTE_NONE) { 690 if (dialect->doublequote) { 691 /* doublequote; " represented by "" */ 692 self->state = QUOTE_IN_QUOTED_FIELD; 693 } 694 else { 695 /* end of quote part of field */ 696 self->state = IN_FIELD; 697 } 698 } 699 else { 700 /* normal character - save in field */ 701 if (parse_add_char(self, c) < 0) 702 return -1; 703 } 704 break; 705 706 case ESCAPE_IN_QUOTED_FIELD: 707 if (c == '\0') 708 c = '\n'; 709 if (parse_add_char(self, c) < 0) 710 return -1; 711 self->state = IN_QUOTED_FIELD; 712 break; 713 714 case QUOTE_IN_QUOTED_FIELD: 715 /* doublequote - seen a quote in an quoted field */ 716 if (dialect->quoting != QUOTE_NONE && 717 c == dialect->quotechar) { 718 /* save "" as " */ 719 if (parse_add_char(self, c) < 0) 720 return -1; 721 self->state = IN_QUOTED_FIELD; 722 } 723 else if (c == dialect->delimiter) { 724 /* save field - wait for new field */ 725 if (parse_save_field(self) < 0) 726 return -1; 727 self->state = START_FIELD; 728 } 729 else if (c == '\n' || c == '\r' || c == '\0') { 730 /* end of line - return [fields] */ 731 if (parse_save_field(self) < 0) 732 return -1; 733 self->state = (c == '\0' ? START_RECORD : EAT_CRNL); 734 } 735 else if (!dialect->strict) { 736 if (parse_add_char(self, c) < 0) 737 return -1; 738 self->state = IN_FIELD; 739 } 740 else { 741 /* illegal */ 742 PyErr_Format(error_obj, "'%c' expected after '%c'", 743 dialect->delimiter, 744 dialect->quotechar); 745 return -1; 746 } 747 break; 748 749 case EAT_CRNL: 750 if (c == '\n' || c == '\r') 751 ; 752 else if (c == '\0') 753 self->state = START_RECORD; 754 else { 755 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); 756 return -1; 757 } 758 break; 759 760 } 761 return 0; 758 762 } 759 763 … … 761 765 parse_reset(ReaderObj *self) 762 766 { 763 764 765 766 767 768 769 770 767 Py_XDECREF(self->fields); 768 self->fields = PyList_New(0); 769 if (self->fields == NULL) 770 return -1; 771 self->field_len = 0; 772 self->state = START_RECORD; 773 self->numeric_field = 0; 774 return 0; 771 775 } 772 776 … … 774 778 Reader_iternext(ReaderObj *self) 775 779 { 776 PyObject *lineobj; 777 PyObject *fields = NULL; 778 char *line, c; 779 int linelen; 780 781 if (parse_reset(self) < 0) 782 return NULL; 783 do { 784 lineobj = PyIter_Next(self->input_iter); 785 if (lineobj == NULL) { 786 /* End of input OR exception */ 787 if (!PyErr_Occurred() && self->field_len != 0) 788 PyErr_Format(error_obj, 789 "newline inside string"); 790 return NULL; 791 } 792 ++self->line_num; 793 794 line = PyString_AsString(lineobj); 795 linelen = PyString_Size(lineobj); 796 797 if (line == NULL || linelen < 0) { 798 Py_DECREF(lineobj); 799 return NULL; 800 } 801 while (linelen--) { 802 c = *line++; 803 if (c == '\0') { 804 Py_DECREF(lineobj); 805 PyErr_Format(error_obj, 806 "line contains NULL byte"); 807 goto err; 808 } 809 if (parse_process_char(self, c) < 0) { 810 Py_DECREF(lineobj); 811 goto err; 812 } 813 } 780 PyObject *lineobj; 781 PyObject *fields = NULL; 782 char *line, c; 783 int linelen; 784 785 if (parse_reset(self) < 0) 786 return NULL; 787 do { 788 lineobj = PyIter_Next(self->input_iter); 789 if (lineobj == NULL) { 790 /* End of input OR exception */ 791 if (!PyErr_Occurred() && (self->field_len != 0 || 792 self->state == IN_QUOTED_FIELD)) { 793 if (self->dialect->strict) 794 PyErr_SetString(error_obj, "unexpected end of data"); 795 else if (parse_save_field(self) >= 0 ) 796 break; 797 } 798 return NULL; 799 } 800 ++self->line_num; 801 802 line = PyString_AsString(lineobj); 803 linelen = PyString_Size(lineobj); 804 805 if (line == NULL || linelen < 0) { 806 Py_DECREF(lineobj); 807 return NULL; 808 } 809 while (linelen--) { 810 c = *line++; 811 if (c == '\0') { 814 812 Py_DECREF(lineobj); 815 if (parse_process_char(self, 0) < 0) 816 goto err; 817 } while (self->state != START_RECORD); 818 819 fields = self->fields; 820 self->fields = NULL; 813 PyErr_Format(error_obj, 814 "line contains NULL byte"); 815 goto err; 816 } 817 if (parse_process_char(self, c) < 0) { 818 Py_DECREF(lineobj); 819 goto err; 820 } 821 } 822 Py_DECREF(lineobj); 823 if (parse_process_char(self, 0) < 0) 824 goto err; 825 } while (self->state != START_RECORD); 826 827 fields = self->fields; 828 self->fields = NULL; 821 829 err: 822 830 return fields; 823 831 } 824 832 … … 826 834 Reader_dealloc(ReaderObj *self) 827 835 { 828 829 830 831 832 833 834 836 PyObject_GC_UnTrack(self); 837 Py_XDECREF(self->dialect); 838 Py_XDECREF(self->input_iter); 839 Py_XDECREF(self->fields); 840 if (self->field != NULL) 841 PyMem_Free(self->field); 842 PyObject_GC_Del(self); 835 843 } 836 844 … … 838 846 Reader_traverse(ReaderObj *self, visitproc visit, void *arg) 839 847 { 840 841 842 843 848 Py_VISIT(self->dialect); 849 Py_VISIT(self->input_iter); 850 Py_VISIT(self->fields); 851 return 0; 844 852 } 845 853 … … 847 855 Reader_clear(ReaderObj *self) 848 856 { 849 850 851 852 857 Py_CLEAR(self->dialect); 858 Py_CLEAR(self->input_iter); 859 Py_CLEAR(self->fields); 860 return 0; 853 861 } 854 862 … … 861 869 862 870 static struct PyMethodDef Reader_methods[] = { 863 871 { NULL, NULL } 864 872 }; 865 873 #define R_OFF(x) offsetof(ReaderObj, x) 866 874 867 875 static struct PyMemberDef Reader_memberlist[] = { 868 869 870 876 { "dialect", T_OBJECT, R_OFF(dialect), RO }, 877 { "line_num", T_ULONG, R_OFF(line_num), RO }, 878 { NULL } 871 879 }; 872 880 873 881 874 882 static PyTypeObject Reader_Type = { 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 (reprfunc)0,/*tp_str*/892 893 894 895 896 Py_TPFLAGS_HAVE_GC,/*tp_flags*/897 898 899 900 901 902 PyObject_SelfIter,/*tp_iter*/903 904 905 906 883 PyVarObject_HEAD_INIT(NULL, 0) 884 "_csv.reader", /*tp_name*/ 885 sizeof(ReaderObj), /*tp_basicsize*/ 886 0, /*tp_itemsize*/ 887 /* methods */ 888 (destructor)Reader_dealloc, /*tp_dealloc*/ 889 (printfunc)0, /*tp_print*/ 890 (getattrfunc)0, /*tp_getattr*/ 891 (setattrfunc)0, /*tp_setattr*/ 892 (cmpfunc)0, /*tp_compare*/ 893 (reprfunc)0, /*tp_repr*/ 894 0, /*tp_as_number*/ 895 0, /*tp_as_sequence*/ 896 0, /*tp_as_mapping*/ 897 (hashfunc)0, /*tp_hash*/ 898 (ternaryfunc)0, /*tp_call*/ 899 (reprfunc)0, /*tp_str*/ 900 0, /*tp_getattro*/ 901 0, /*tp_setattro*/ 902 0, /*tp_as_buffer*/ 903 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 904 Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 905 Reader_Type_doc, /*tp_doc*/ 906 (traverseproc)Reader_traverse, /*tp_traverse*/ 907 (inquiry)Reader_clear, /*tp_clear*/ 908 0, /*tp_richcompare*/ 909 0, /*tp_weaklistoffset*/ 910 PyObject_SelfIter, /*tp_iter*/ 911 (getiterfunc)Reader_iternext, /*tp_iternext*/ 912 Reader_methods, /*tp_methods*/ 913 Reader_memberlist, /*tp_members*/ 914 0, /*tp_getset*/ 907 915 908 916 }; … … 911 919 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) 912 920 { 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 PyErr_SetString(PyExc_TypeError,938 939 940 941 942 943 944 945 946 947 948 949 921 PyObject * iterator, * dialect = NULL; 922 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); 923 924 if (!self) 925 return NULL; 926 927 self->dialect = NULL; 928 self->fields = NULL; 929 self->input_iter = NULL; 930 self->field = NULL; 931 self->field_size = 0; 932 self->line_num = 0; 933 934 if (parse_reset(self) < 0) { 935 Py_DECREF(self); 936 return NULL; 937 } 938 939 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { 940 Py_DECREF(self); 941 return NULL; 942 } 943 self->input_iter = PyObject_GetIter(iterator); 944 if (self->input_iter == NULL) { 945 PyErr_SetString(PyExc_TypeError, 946 "argument 1 must be an iterator"); 947 Py_DECREF(self); 948 return NULL; 949 } 950 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); 951 if (self->dialect == NULL) { 952 Py_DECREF(self); 953 return NULL; 954 } 955 956 PyObject_GC_Track(self); 957 return (PyObject *)self; 950 958 } 951 959 … … 957 965 join_reset(WriterObj *self) 958 966 { 959 960 967 self->rec_len = 0; 968 self->num_fields = 0; 961 969 } 962 970 … … 968 976 static int 969 977 join_append_data(WriterObj *self, char *field, int quote_empty, 970 971 { 972 973 974 978 int *quoted, int copy_phase) 979 { 980 DialectObj *dialect = self->dialect; 981 int i, rec_len; 982 char *lineterm; 975 983 976 984 #define ADDCH(c) \ 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 PyErr_Format(error_obj, 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 985 do {\ 986 if (copy_phase) \ 987 self->rec[rec_len] = c;\ 988 rec_len++;\ 989 } while(0) 990 991 lineterm = PyString_AsString(dialect->lineterminator); 992 if (lineterm == NULL) 993 return -1; 994 995 rec_len = self->rec_len; 996 997 /* If this is not the first field we need a field separator */ 998 if (self->num_fields > 0) 999 ADDCH(dialect->delimiter); 1000 1001 /* Handle preceding quote */ 1002 if (copy_phase && *quoted) 1003 ADDCH(dialect->quotechar); 1004 1005 /* Copy/count field data */ 1006 for (i = 0;; i++) { 1007 char c = field[i]; 1008 int want_escape = 0; 1009 1010 if (c == '\0') 1011 break; 1012 1013 if (c == dialect->delimiter || 1014 c == dialect->escapechar || 1015 c == dialect->quotechar || 1016 strchr(lineterm, c)) { 1017 if (dialect->quoting == QUOTE_NONE) 1018 want_escape = 1; 1019 else { 1020 if (c == dialect->quotechar) { 1021 if (dialect->doublequote) 1022 ADDCH(dialect->quotechar); 1023 else 1024 want_escape = 1; 1025 } 1026 if (!want_escape) 1027 *quoted = 1; 1028 } 1029 if (want_escape) { 1030 if (!dialect->escapechar) { 1031 PyErr_Format(error_obj, 1032 "need to escape, but no escapechar set"); 1033 return -1; 1034 } 1035 ADDCH(dialect->escapechar); 1036 } 1037 } 1038 /* Copy field character into record buffer. 1039 */ 1040 ADDCH(c); 1041 } 1042 1043 /* If field is empty check if it needs to be quoted. 1044 */ 1045 if (i == 0 && quote_empty) { 1046 if (dialect->quoting == QUOTE_NONE) { 1047 PyErr_Format(error_obj, 1048 "single empty field record must be quoted"); 1049 return -1; 1050 } 1051 else 1052 *quoted = 1; 1053 } 1054 1055 if (*quoted) { 1056 if (copy_phase) 1057 ADDCH(dialect->quotechar); 1058 else 1059 rec_len += 2; 1060 } 1061 return rec_len; 1054 1062 #undef ADDCH 1055 1063 } … … 1059 1067 { 1060 1068 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1069 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) { 1070 PyErr_NoMemory(); 1071 return 0; 1072 } 1073 1074 if (rec_len > self->rec_size) { 1075 if (self->rec_size == 0) { 1076 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; 1077 if (self->rec != NULL) 1078 PyMem_Free(self->rec); 1079 self->rec = PyMem_Malloc(self->rec_size); 1080 } 1081 else { 1082 char *old_rec = self->rec; 1083 1084 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; 1085 self->rec = PyMem_Realloc(self->rec, self->rec_size); 1086 if (self->rec == NULL) 1087 PyMem_Free(old_rec); 1088 } 1089 if (self->rec == NULL) { 1090 PyErr_NoMemory(); 1091 return 0; 1092 } 1093 } 1094 return 1; 1087 1095 } 1088 1096 … … 1090 1098 join_append(WriterObj *self, char *field, int *quoted, int quote_empty) 1091 1099 { 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1100 int rec_len; 1101 1102 rec_len = join_append_data(self, field, quote_empty, quoted, 0); 1103 if (rec_len < 0) 1104 return 0; 1105 1106 /* grow record buffer if necessary */ 1107 if (!join_check_rec_size(self, rec_len)) 1108 return 0; 1109 1110 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1); 1111 self->num_fields++; 1112 1113 return 1; 1106 1114 } 1107 1115 … … 1109 1117 join_append_lineterminator(WriterObj *self) 1110 1118 { 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 terminator = PyString_AsString(self->dialect->lineterminator); 1123 1124 1125 1126 1127 1128 1119 int terminator_len; 1120 char *terminator; 1121 1122 terminator_len = PyString_Size(self->dialect->lineterminator); 1123 if (terminator_len == -1) 1124 return 0; 1125 1126 /* grow record buffer if necessary */ 1127 if (!join_check_rec_size(self, self->rec_len + terminator_len)) 1128 return 0; 1129 1130 terminator = PyString_AsString(self->dialect->lineterminator); 1131 if (terminator == NULL) 1132 return 0; 1133 memmove(self->rec + self->rec_len, terminator, terminator_len); 1134 self->rec_len += terminator_len; 1135 1136 return 1; 1129 1137 } 1130 1138 … … 1138 1146 csv_writerow(WriterObj *self, PyObject *seq) 1139 1147 { 1140 DialectObj *dialect = self->dialect; 1141 int len, i; 1142 1143 if (!PySequence_Check(seq)) 1144 return PyErr_Format(error_obj, "sequence expected"); 1145 1146 len = PySequence_Length(seq); 1147 if (len < 0) 1148 return NULL; 1149 1150 /* Join all fields in internal buffer. 1151 */ 1152 join_reset(self); 1153 for (i = 0; i < len; i++) { 1154 PyObject *field; 1155 int append_ok; 1156 int quoted; 1157 1158 field = PySequence_GetItem(seq, i); 1159 if (field == NULL) 1160 return NULL; 1161 1162 switch (dialect->quoting) { 1163 case QUOTE_NONNUMERIC: 1164 quoted = !PyNumber_Check(field); 1165 break; 1166 case QUOTE_ALL: 1167 quoted = 1; 1168 break; 1169 default: 1170 quoted = 0; 1171 break; 1172 } 1173 1174 if (PyString_Check(field)) { 1175 append_ok = join_append(self, 1176 PyString_AS_STRING(field), 1177 "ed, len == 1); 1178 Py_DECREF(field); 1179 } 1180 else if (field == Py_None) { 1181 append_ok = join_append(self, "", "ed, len == 1); 1182 Py_DECREF(field); 1183 } 1184 else { 1185 PyObject *str; 1186 1187 str = PyObject_Str(field); 1188 Py_DECREF(field); 1189 if (str == NULL) 1190 return NULL; 1191 1192 append_ok = join_append(self, PyString_AS_STRING(str), 1193 "ed, len == 1); 1194 Py_DECREF(str); 1195 } 1196 if (!append_ok) 1197 return NULL; 1198 } 1199 1200 /* Add line terminator. 1201 */ 1202 if (!join_append_lineterminator(self)) 1203 return 0; 1204 1205 return PyObject_CallFunction(self->writeline, 1206 "(s#)", self->rec, self->rec_len); 1148 DialectObj *dialect = self->dialect; 1149 int len, i; 1150 1151 if (!PySequence_Check(seq)) 1152 return PyErr_Format(error_obj, "sequence expected"); 1153 1154 len = PySequence_Length(seq); 1155 if (len < 0) 1156 return NULL; 1157 1158 /* Join all fields in internal buffer. 1159 */ 1160 join_reset(self); 1161 for (i = 0; i < len; i++) { 1162 PyObject *field; 1163 int append_ok; 1164 int quoted; 1165 1166 field = PySequence_GetItem(seq, i); 1167 if (field == NULL) 1168 return NULL; 1169 1170 switch (dialect->quoting) { 1171 case QUOTE_NONNUMERIC: 1172 quoted = !PyNumber_Check(field); 1173 break; 1174 case QUOTE_ALL: 1175 quoted = 1; 1176 break; 1177 default: 1178 quoted = 0; 1179 break; 1180 } 1181 1182 if (PyString_Check(field)) { 1183 append_ok = join_append(self, 1184 PyString_AS_STRING(field), 1185 "ed, len == 1); 1186 Py_DECREF(field); 1187 } 1188 else if (field == Py_None) { 1189 append_ok = join_append(self, "", "ed, len == 1); 1190 Py_DECREF(field); 1191 } 1192 else { 1193 PyObject *str; 1194 1195 if (PyFloat_Check(field)) { 1196 str = PyObject_Repr(field); 1197 } else { 1198 str = PyObject_Str(field); 1199 } 1200 Py_DECREF(field); 1201 if (str == NULL) 1202 return NULL; 1203 1204 append_ok = join_append(self, PyString_AS_STRING(str), 1205 "ed, len == 1); 1206 Py_DECREF(str); 1207 } 1208 if (!append_ok) 1209 return NULL; 1210 } 1211 1212 /* Add line terminator. 1213 */ 1214 if (!join_append_lineterminator(self)) 1215 return 0; 1216 1217 return PyObject_CallFunction(self->writeline, 1218 "(s#)", self->rec, self->rec_len); 1207 1219 } 1208 1220 … … 1216 1228 csv_writerows(WriterObj *self, PyObject *seqseq) 1217 1229 { 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 Py_DECREF(result);1235 1236 1237 1238 1239 1240 1230 PyObject *row_iter, *row_obj, *result; 1231 1232 row_iter = PyObject_GetIter(seqseq); 1233 if (row_iter == NULL) { 1234 PyErr_SetString(PyExc_TypeError, 1235 "writerows() argument must be iterable"); 1236 return NULL; 1237 } 1238 while ((row_obj = PyIter_Next(row_iter))) { 1239 result = csv_writerow(self, row_obj); 1240 Py_DECREF(row_obj); 1241 if (!result) { 1242 Py_DECREF(row_iter); 1243 return NULL; 1244 } 1245 else 1246 Py_DECREF(result); 1247 } 1248 Py_DECREF(row_iter); 1249 if (PyErr_Occurred()) 1250 return NULL; 1251 Py_INCREF(Py_None); 1252 return Py_None; 1241 1253 } 1242 1254 1243 1255 static struct PyMethodDef Writer_methods[] = { 1244 1245 1246 1256 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, 1257 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, 1258 { NULL, NULL } 1247 1259 }; 1248 1260 … … 1250 1262 1251 1263 static struct PyMemberDef Writer_memberlist[] = { 1252 1253 1264 { "dialect", T_OBJECT, W_OFF(dialect), RO }, 1265 { NULL } 1254 1266 }; 1255 1267 … … 1257 1269 Writer_dealloc(WriterObj *self) 1258 1270 { 1259 1260 1261 1262 1263 1264 1271 PyObject_GC_UnTrack(self); 1272 Py_XDECREF(self->dialect); 1273 Py_XDECREF(self->writeline); 1274 if (self->rec != NULL) 1275 PyMem_Free(self->rec); 1276 PyObject_GC_Del(self); 1265 1277 } 1266 1278 … … 1268 1280 Writer_traverse(WriterObj *self, visitproc visit, void *arg) 1269 1281 { 1270 1271 1272 1282 Py_VISIT(self->dialect); 1283 Py_VISIT(self->writeline); 1284 return 0; 1273 1285 } 1274 1286 … … 1276 1288 Writer_clear(WriterObj *self) 1277 1289 { 1278 1279 1280 1281 } 1282 1283 PyDoc_STRVAR(Writer_Type_doc, 1290 Py_CLEAR(self->dialect); 1291 Py_CLEAR(self->writeline); 1292 return 0; 1293 } 1294 1295 PyDoc_STRVAR(Writer_Type_doc, 1284 1296 "CSV writer\n" 1285 1297 "\n" … … 1289 1301 1290 1302 static PyTypeObject Writer_Type = { 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 Py_TPFLAGS_HAVE_GC,/*tp_flags*/1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1303 PyVarObject_HEAD_INIT(NULL, 0) 1304 "_csv.writer", /*tp_name*/ 1305 sizeof(WriterObj), /*tp_basicsize*/ 1306 0, /*tp_itemsize*/ 1307 /* methods */ 1308 (destructor)Writer_dealloc, /*tp_dealloc*/ 1309 (printfunc)0, /*tp_print*/ 1310 (getattrfunc)0, /*tp_getattr*/ 1311 (setattrfunc)0, /*tp_setattr*/ 1312 (cmpfunc)0, /*tp_compare*/ 1313 (reprfunc)0, /*tp_repr*/ 1314 0, /*tp_as_number*/ 1315 0, /*tp_as_sequence*/ 1316 0, /*tp_as_mapping*/ 1317 (hashfunc)0, /*tp_hash*/ 1318 (ternaryfunc)0, /*tp_call*/ 1319 (reprfunc)0, /*tp_str*/ 1320 0, /*tp_getattro*/ 1321 0, /*tp_setattro*/ 1322 0, /*tp_as_buffer*/ 1323 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 1324 Py_TPFLAGS_HAVE_GC, /*tp_flags*/ 1325 Writer_Type_doc, 1326 (traverseproc)Writer_traverse, /*tp_traverse*/ 1327 (inquiry)Writer_clear, /*tp_clear*/ 1328 0, /*tp_richcompare*/ 1329 0, /*tp_weaklistoffset*/ 1330 (getiterfunc)0, /*tp_iter*/ 1331 (getiterfunc)0, /*tp_iternext*/ 1332 Writer_methods, /*tp_methods*/ 1333 Writer_memberlist, /*tp_members*/ 1334 0, /*tp_getset*/ 1323 1335 }; 1324 1336 … … 1326 1338 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) 1327 1339 { 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1340 PyObject * output_file, * dialect = NULL; 1341 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); 1342 1343 if (!self) 1344 return NULL; 1345 1346 self->dialect = NULL; 1347 self->writeline = NULL; 1348 1349 self->rec = NULL; 1350 self->rec_size = 0; 1351 self->rec_len = 0; 1352 self->num_fields = 0; 1353 1354 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { 1355 Py_DECREF(self); 1356 return NULL; 1357 } 1358 self->writeline = PyObject_GetAttrString(output_file, "write"); 1359 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { 1360 PyErr_SetString(PyExc_TypeError, 1361 "argument 1 must have a \"write\" method"); 1362 Py_DECREF(self); 1363 return NULL; 1364 } 1365 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); 1366 if (self->dialect == NULL) { 1367 Py_DECREF(self); 1368 return NULL; 1369 } 1370 PyObject_GC_Track(self); 1371 return (PyObject *)self; 1360 1372 } 1361 1373 … … 1366 1378 csv_list_dialects(PyObject *module, PyObject *args) 1367 1379 { 1368 1380 return PyDict_Keys(dialects); 1369 1381 } 1370 1382 … … 1372 1384 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) 1373 1385 { 1374 1375 1376 1377 1378 1379 1380 PyErr_SetString(PyExc_TypeError,1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1386 PyObject *name_obj, *dialect_obj = NULL; 1387 PyObject *dialect; 1388 1389 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) 1390 return NULL; 1391 if (!IS_BASESTRING(name_obj)) { 1392 PyErr_SetString(PyExc_TypeError, 1393 "dialect name must be a string or unicode"); 1394 return NULL; 1395 } 1396 dialect = _call_dialect(dialect_obj, kwargs); 1397 if (dialect == NULL) 1398 return NULL; 1399 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) { 1400 Py_DECREF(dialect); 1401 return NULL; 1402 } 1403 Py_DECREF(dialect); 1404 Py_INCREF(Py_None); 1405 return Py_None; 1394 1406 } 1395 1407 … … 1397 1409 csv_unregister_dialect(PyObject *module, PyObject *name_obj) 1398 1410 { 1399 1400 1401 1402 1411 if (PyDict_DelItem(dialects, name_obj) < 0) 1412 return PyErr_Format(error_obj, "unknown dialect"); 1413 Py_INCREF(Py_None); 1414 return Py_None; 1403 1415 } 1404 1416 … … 1406 1418 csv_get_dialect(PyObject *module, PyObject *name_obj) 1407 1419 { 1408 1420 return get_dialect_from_registry(name_obj); 1409 1421 } 1410 1422 … … 1412 1424 csv_field_size_limit(PyObject *module, PyObject *args) 1413 1425 { 1414 1415 1416 1417 1418 1419 1420 1421 PyErr_Format(PyExc_TypeError, 1422 1423 1424 1425 1426 1427 1426 PyObject *new_limit = NULL; 1427 long old_limit = field_limit; 1428 1429 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) 1430 return NULL; 1431 if (new_limit != NULL) { 1432 if (!PyInt_Check(new_limit)) { 1433 PyErr_Format(PyExc_TypeError, 1434 "limit must be an integer"); 1435 return NULL; 1436 } 1437 field_limit = PyInt_AsLong(new_limit); 1438 } 1439 return PyInt_FromLong(old_limit); 1428 1440 } 1429 1441 … … 1543 1555 1544 1556 static struct PyMethodDef csv_methods[] = { 1545 { "reader", (PyCFunction)csv_reader, 1546 1547 { "writer", (PyCFunction)csv_writer, 1548 1549 { "list_dialects", (PyCFunction)csv_list_dialects, 1550 1551 { "register_dialect", (PyCFunction)csv_register_dialect, 1552 1553 { "unregister_dialect", (PyCFunction)csv_unregister_dialect, 1554 1555 { "get_dialect", (PyCFunction)csv_get_dialect, 1556 1557 { "field_size_limit", (PyCFunction)csv_field_size_limit, 1558 1559 1557 { "reader", (PyCFunction)csv_reader, 1558 METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, 1559 { "writer", (PyCFunction)csv_writer, 1560 METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, 1561 { "list_dialects", (PyCFunction)csv_list_dialects, 1562 METH_NOARGS, csv_list_dialects_doc}, 1563 { "register_dialect", (PyCFunction)csv_register_dialect, 1564 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, 1565 { "unregister_dialect", (PyCFunction)csv_unregister_dialect, 1566 METH_O, csv_unregister_dialect_doc}, 1567 { "get_dialect", (PyCFunction)csv_get_dialect, 1568 METH_O, csv_get_dialect_doc}, 1569 { "field_size_limit", (PyCFunction)csv_field_size_limit, 1570 METH_VARARGS, csv_field_size_limit_doc}, 1571 { NULL, NULL } 1560 1572 }; 1561 1573 … … 1563 1575 init_csv(void) 1564 1576 { 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 } 1577 PyObject *module; 1578 StyleDesc *style; 1579 1580 if (PyType_Ready(&Dialect_Type) < 0) 1581 return; 1582 1583 if (PyType_Ready(&Reader_Type) < 0) 1584 return; 1585 1586 if (PyType_Ready(&Writer_Type) < 0) 1587 return; 1588 1589 /* Create the module and add the functions */ 1590 module = Py_InitModule3("_csv", csv_methods, csv_module_doc); 1591 if (module == NULL) 1592 return; 1593 1594 /* Add version to the module. */ 1595 if (PyModule_AddStringConstant(module, "__version__", 1596 MODULE_VERSION) == -1) 1597 return; 1598 1599 /* Add _dialects dictionary */ 1600 dialects = PyDict_New(); 1601 if (dialects == NULL) 1602 return; 1603 if (PyModule_AddObject(module, "_dialects", dialects)) 1604 return; 1605 1606 /* Add quote styles into dictionary */ 1607 for (style = quote_styles; style->name; style++) { 1608 if (PyModule_AddIntConstant(module, style->name, 1609 style->style) == -1) 1610 return; 1611 } 1612 1613 /* Add the Dialect type */ 1614 Py_INCREF(&Dialect_Type); 1615 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) 1616 return; 1617 1618 /* Add the CSV exception object to the module. */ 1619 error_obj = PyErr_NewException("_csv.Error", NULL, NULL); 1620 if (error_obj == NULL) 1621 return; 1622 PyModule_AddObject(module, "Error", error_obj); 1623 }
Note:
See TracChangeset
for help on using the changeset viewer.