| 1 | /* NOTE: this API is -ONLY- for use with single byte character strings. */
|
|---|
| 2 | /* Do not use it with Unicode. */
|
|---|
| 3 |
|
|---|
| 4 | #include "bytes_methods.h"
|
|---|
| 5 |
|
|---|
| 6 | #ifndef STRINGLIB_MUTABLE
|
|---|
| 7 | #warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
|
|---|
| 8 | #define STRINGLIB_MUTABLE 0
|
|---|
| 9 | #endif
|
|---|
| 10 |
|
|---|
| 11 | /* the more complicated methods. parts of these should be pulled out into the
|
|---|
| 12 | shared code in bytes_methods.c to cut down on duplicate code bloat. */
|
|---|
| 13 |
|
|---|
| 14 | PyDoc_STRVAR(expandtabs__doc__,
|
|---|
| 15 | "B.expandtabs([tabsize]) -> copy of B\n\
|
|---|
| 16 | \n\
|
|---|
| 17 | Return a copy of B where all tab characters are expanded using spaces.\n\
|
|---|
| 18 | If tabsize is not given, a tab size of 8 characters is assumed.");
|
|---|
| 19 |
|
|---|
| 20 | static PyObject*
|
|---|
| 21 | stringlib_expandtabs(PyObject *self, PyObject *args)
|
|---|
| 22 | {
|
|---|
| 23 | const char *e, *p;
|
|---|
| 24 | char *q;
|
|---|
| 25 | size_t i, j;
|
|---|
| 26 | PyObject *u;
|
|---|
| 27 | int tabsize = 8;
|
|---|
| 28 |
|
|---|
| 29 | if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
|
|---|
| 30 | return NULL;
|
|---|
| 31 |
|
|---|
| 32 | /* First pass: determine size of output string */
|
|---|
| 33 | i = j = 0;
|
|---|
| 34 | e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
|
|---|
| 35 | for (p = STRINGLIB_STR(self); p < e; p++)
|
|---|
| 36 | if (*p == '\t') {
|
|---|
| 37 | if (tabsize > 0) {
|
|---|
| 38 | j += tabsize - (j % tabsize);
|
|---|
| 39 | if (j > PY_SSIZE_T_MAX) {
|
|---|
| 40 | PyErr_SetString(PyExc_OverflowError,
|
|---|
| 41 | "result is too long");
|
|---|
| 42 | return NULL;
|
|---|
| 43 | }
|
|---|
| 44 | }
|
|---|
| 45 | }
|
|---|
| 46 | else {
|
|---|
| 47 | j++;
|
|---|
| 48 | if (*p == '\n' || *p == '\r') {
|
|---|
| 49 | i += j;
|
|---|
| 50 | j = 0;
|
|---|
| 51 | if (i > PY_SSIZE_T_MAX) {
|
|---|
| 52 | PyErr_SetString(PyExc_OverflowError,
|
|---|
| 53 | "result is too long");
|
|---|
| 54 | return NULL;
|
|---|
| 55 | }
|
|---|
| 56 | }
|
|---|
| 57 | }
|
|---|
| 58 |
|
|---|
| 59 | if ((i + j) > PY_SSIZE_T_MAX) {
|
|---|
| 60 | PyErr_SetString(PyExc_OverflowError, "result is too long");
|
|---|
| 61 | return NULL;
|
|---|
| 62 | }
|
|---|
| 63 |
|
|---|
| 64 | /* Second pass: create output string and fill it */
|
|---|
| 65 | u = STRINGLIB_NEW(NULL, i + j);
|
|---|
| 66 | if (!u)
|
|---|
| 67 | return NULL;
|
|---|
| 68 |
|
|---|
| 69 | j = 0;
|
|---|
| 70 | q = STRINGLIB_STR(u);
|
|---|
| 71 |
|
|---|
| 72 | for (p = STRINGLIB_STR(self); p < e; p++)
|
|---|
| 73 | if (*p == '\t') {
|
|---|
| 74 | if (tabsize > 0) {
|
|---|
| 75 | i = tabsize - (j % tabsize);
|
|---|
| 76 | j += i;
|
|---|
| 77 | while (i--)
|
|---|
| 78 | *q++ = ' ';
|
|---|
| 79 | }
|
|---|
| 80 | }
|
|---|
| 81 | else {
|
|---|
| 82 | j++;
|
|---|
| 83 | *q++ = *p;
|
|---|
| 84 | if (*p == '\n' || *p == '\r')
|
|---|
| 85 | j = 0;
|
|---|
| 86 | }
|
|---|
| 87 |
|
|---|
| 88 | return u;
|
|---|
| 89 | }
|
|---|
| 90 |
|
|---|
| 91 | Py_LOCAL_INLINE(PyObject *)
|
|---|
| 92 | pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
|
|---|
| 93 | {
|
|---|
| 94 | PyObject *u;
|
|---|
| 95 |
|
|---|
| 96 | if (left < 0)
|
|---|
| 97 | left = 0;
|
|---|
| 98 | if (right < 0)
|
|---|
| 99 | right = 0;
|
|---|
| 100 |
|
|---|
| 101 | if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) {
|
|---|
| 102 | #if STRINGLIB_MUTABLE
|
|---|
| 103 | /* We're defined as returning a copy; If the object is mutable
|
|---|
| 104 | * that means we must make an identical copy. */
|
|---|
| 105 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
|---|
| 106 | #else
|
|---|
| 107 | Py_INCREF(self);
|
|---|
| 108 | return (PyObject *)self;
|
|---|
| 109 | #endif /* STRINGLIB_MUTABLE */
|
|---|
| 110 | }
|
|---|
| 111 |
|
|---|
| 112 | u = STRINGLIB_NEW(NULL,
|
|---|
| 113 | left + STRINGLIB_LEN(self) + right);
|
|---|
| 114 | if (u) {
|
|---|
| 115 | if (left)
|
|---|
| 116 | memset(STRINGLIB_STR(u), fill, left);
|
|---|
| 117 | Py_MEMCPY(STRINGLIB_STR(u) + left,
|
|---|
| 118 | STRINGLIB_STR(self),
|
|---|
| 119 | STRINGLIB_LEN(self));
|
|---|
| 120 | if (right)
|
|---|
| 121 | memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
|
|---|
| 122 | fill, right);
|
|---|
| 123 | }
|
|---|
| 124 |
|
|---|
| 125 | return u;
|
|---|
| 126 | }
|
|---|
| 127 |
|
|---|
| 128 | PyDoc_STRVAR(ljust__doc__,
|
|---|
| 129 | "B.ljust(width[, fillchar]) -> copy of B\n"
|
|---|
| 130 | "\n"
|
|---|
| 131 | "Return B left justified in a string of length width. Padding is\n"
|
|---|
| 132 | "done using the specified fill character (default is a space).");
|
|---|
| 133 |
|
|---|
| 134 | static PyObject *
|
|---|
| 135 | stringlib_ljust(PyObject *self, PyObject *args)
|
|---|
| 136 | {
|
|---|
| 137 | Py_ssize_t width;
|
|---|
| 138 | char fillchar = ' ';
|
|---|
| 139 |
|
|---|
| 140 | if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
|
|---|
| 141 | return NULL;
|
|---|
| 142 |
|
|---|
| 143 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
|
|---|
| 144 | #if STRINGLIB_MUTABLE
|
|---|
| 145 | /* We're defined as returning a copy; If the object is mutable
|
|---|
| 146 | * that means we must make an identical copy. */
|
|---|
| 147 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
|---|
| 148 | #else
|
|---|
| 149 | Py_INCREF(self);
|
|---|
| 150 | return (PyObject*) self;
|
|---|
| 151 | #endif
|
|---|
| 152 | }
|
|---|
| 153 |
|
|---|
| 154 | return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
|
|---|
| 155 | }
|
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 | PyDoc_STRVAR(rjust__doc__,
|
|---|
| 159 | "B.rjust(width[, fillchar]) -> copy of B\n"
|
|---|
| 160 | "\n"
|
|---|
| 161 | "Return B right justified in a string of length width. Padding is\n"
|
|---|
| 162 | "done using the specified fill character (default is a space)");
|
|---|
| 163 |
|
|---|
| 164 | static PyObject *
|
|---|
| 165 | stringlib_rjust(PyObject *self, PyObject *args)
|
|---|
| 166 | {
|
|---|
| 167 | Py_ssize_t width;
|
|---|
| 168 | char fillchar = ' ';
|
|---|
| 169 |
|
|---|
| 170 | if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
|
|---|
| 171 | return NULL;
|
|---|
| 172 |
|
|---|
| 173 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
|
|---|
| 174 | #if STRINGLIB_MUTABLE
|
|---|
| 175 | /* We're defined as returning a copy; If the object is mutable
|
|---|
| 176 | * that means we must make an identical copy. */
|
|---|
| 177 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
|---|
| 178 | #else
|
|---|
| 179 | Py_INCREF(self);
|
|---|
| 180 | return (PyObject*) self;
|
|---|
| 181 | #endif
|
|---|
| 182 | }
|
|---|
| 183 |
|
|---|
| 184 | return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
|
|---|
| 185 | }
|
|---|
| 186 |
|
|---|
| 187 |
|
|---|
| 188 | PyDoc_STRVAR(center__doc__,
|
|---|
| 189 | "B.center(width[, fillchar]) -> copy of B\n"
|
|---|
| 190 | "\n"
|
|---|
| 191 | "Return B centered in a string of length width. Padding is\n"
|
|---|
| 192 | "done using the specified fill character (default is a space).");
|
|---|
| 193 |
|
|---|
| 194 | static PyObject *
|
|---|
| 195 | stringlib_center(PyObject *self, PyObject *args)
|
|---|
| 196 | {
|
|---|
| 197 | Py_ssize_t marg, left;
|
|---|
| 198 | Py_ssize_t width;
|
|---|
| 199 | char fillchar = ' ';
|
|---|
| 200 |
|
|---|
| 201 | if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
|
|---|
| 202 | return NULL;
|
|---|
| 203 |
|
|---|
| 204 | if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) {
|
|---|
| 205 | #if STRINGLIB_MUTABLE
|
|---|
| 206 | /* We're defined as returning a copy; If the object is mutable
|
|---|
| 207 | * that means we must make an identical copy. */
|
|---|
| 208 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
|---|
| 209 | #else
|
|---|
| 210 | Py_INCREF(self);
|
|---|
| 211 | return (PyObject*) self;
|
|---|
| 212 | #endif
|
|---|
| 213 | }
|
|---|
| 214 |
|
|---|
| 215 | marg = width - STRINGLIB_LEN(self);
|
|---|
| 216 | left = marg / 2 + (marg & width & 1);
|
|---|
| 217 |
|
|---|
| 218 | return pad(self, left, marg - left, fillchar);
|
|---|
| 219 | }
|
|---|
| 220 |
|
|---|
| 221 | PyDoc_STRVAR(zfill__doc__,
|
|---|
| 222 | "B.zfill(width) -> copy of B\n"
|
|---|
| 223 | "\n"
|
|---|
| 224 | "Pad a numeric string B with zeros on the left, to fill a field\n"
|
|---|
| 225 | "of the specified width. B is never truncated.");
|
|---|
| 226 |
|
|---|
| 227 | static PyObject *
|
|---|
| 228 | stringlib_zfill(PyObject *self, PyObject *args)
|
|---|
| 229 | {
|
|---|
| 230 | Py_ssize_t fill;
|
|---|
| 231 | PyObject *s;
|
|---|
| 232 | char *p;
|
|---|
| 233 | Py_ssize_t width;
|
|---|
| 234 |
|
|---|
| 235 | if (!PyArg_ParseTuple(args, "n:zfill", &width))
|
|---|
| 236 | return NULL;
|
|---|
| 237 |
|
|---|
| 238 | if (STRINGLIB_LEN(self) >= width) {
|
|---|
| 239 | if (STRINGLIB_CHECK_EXACT(self)) {
|
|---|
| 240 | #if STRINGLIB_MUTABLE
|
|---|
| 241 | /* We're defined as returning a copy; If the object is mutable
|
|---|
| 242 | * that means we must make an identical copy. */
|
|---|
| 243 | return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
|---|
| 244 | #else
|
|---|
| 245 | Py_INCREF(self);
|
|---|
| 246 | return (PyObject*) self;
|
|---|
| 247 | #endif
|
|---|
| 248 | }
|
|---|
| 249 | else
|
|---|
| 250 | return STRINGLIB_NEW(
|
|---|
| 251 | STRINGLIB_STR(self),
|
|---|
| 252 | STRINGLIB_LEN(self)
|
|---|
| 253 | );
|
|---|
| 254 | }
|
|---|
| 255 |
|
|---|
| 256 | fill = width - STRINGLIB_LEN(self);
|
|---|
| 257 |
|
|---|
| 258 | s = pad(self, fill, 0, '0');
|
|---|
| 259 |
|
|---|
| 260 | if (s == NULL)
|
|---|
| 261 | return NULL;
|
|---|
| 262 |
|
|---|
| 263 | p = STRINGLIB_STR(s);
|
|---|
| 264 | if (p[fill] == '+' || p[fill] == '-') {
|
|---|
| 265 | /* move sign to beginning of string */
|
|---|
| 266 | p[0] = p[fill];
|
|---|
| 267 | p[fill] = '0';
|
|---|
| 268 | }
|
|---|
| 269 |
|
|---|
| 270 | return (PyObject*) s;
|
|---|
| 271 | }
|
|---|
| 272 |
|
|---|
| 273 |
|
|---|
| 274 | #define _STRINGLIB_SPLIT_APPEND(data, left, right) \
|
|---|
| 275 | str = STRINGLIB_NEW((data) + (left), \
|
|---|
| 276 | (right) - (left)); \
|
|---|
| 277 | if (str == NULL) \
|
|---|
| 278 | goto onError; \
|
|---|
| 279 | if (PyList_Append(list, str)) { \
|
|---|
| 280 | Py_DECREF(str); \
|
|---|
| 281 | goto onError; \
|
|---|
| 282 | } \
|
|---|
| 283 | else \
|
|---|
| 284 | Py_DECREF(str);
|
|---|
| 285 |
|
|---|
| 286 | PyDoc_STRVAR(splitlines__doc__,
|
|---|
| 287 | "B.splitlines([keepends]) -> list of lines\n\
|
|---|
| 288 | \n\
|
|---|
| 289 | Return a list of the lines in B, breaking at line boundaries.\n\
|
|---|
| 290 | Line breaks are not included in the resulting list unless keepends\n\
|
|---|
| 291 | is given and true.");
|
|---|
| 292 |
|
|---|
| 293 | static PyObject*
|
|---|
| 294 | stringlib_splitlines(PyObject *self, PyObject *args)
|
|---|
| 295 | {
|
|---|
| 296 | register Py_ssize_t i;
|
|---|
| 297 | register Py_ssize_t j;
|
|---|
| 298 | Py_ssize_t len;
|
|---|
| 299 | int keepends = 0;
|
|---|
| 300 | PyObject *list;
|
|---|
| 301 | PyObject *str;
|
|---|
| 302 | char *data;
|
|---|
| 303 |
|
|---|
| 304 | if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
|
|---|
| 305 | return NULL;
|
|---|
| 306 |
|
|---|
| 307 | data = STRINGLIB_STR(self);
|
|---|
| 308 | len = STRINGLIB_LEN(self);
|
|---|
| 309 |
|
|---|
| 310 | /* This does not use the preallocated list because splitlines is
|
|---|
| 311 | usually run with hundreds of newlines. The overhead of
|
|---|
| 312 | switching between PyList_SET_ITEM and append causes about a
|
|---|
| 313 | 2-3% slowdown for that common case. A smarter implementation
|
|---|
| 314 | could move the if check out, so the SET_ITEMs are done first
|
|---|
| 315 | and the appends only done when the prealloc buffer is full.
|
|---|
| 316 | That's too much work for little gain.*/
|
|---|
| 317 |
|
|---|
| 318 | list = PyList_New(0);
|
|---|
| 319 | if (!list)
|
|---|
| 320 | goto onError;
|
|---|
| 321 |
|
|---|
| 322 | for (i = j = 0; i < len; ) {
|
|---|
| 323 | Py_ssize_t eol;
|
|---|
| 324 |
|
|---|
| 325 | /* Find a line and append it */
|
|---|
| 326 | while (i < len && data[i] != '\n' && data[i] != '\r')
|
|---|
| 327 | i++;
|
|---|
| 328 |
|
|---|
| 329 | /* Skip the line break reading CRLF as one line break */
|
|---|
| 330 | eol = i;
|
|---|
| 331 | if (i < len) {
|
|---|
| 332 | if (data[i] == '\r' && i + 1 < len &&
|
|---|
| 333 | data[i+1] == '\n')
|
|---|
| 334 | i += 2;
|
|---|
| 335 | else
|
|---|
| 336 | i++;
|
|---|
| 337 | if (keepends)
|
|---|
| 338 | eol = i;
|
|---|
| 339 | }
|
|---|
| 340 | _STRINGLIB_SPLIT_APPEND(data, j, eol);
|
|---|
| 341 | j = i;
|
|---|
| 342 | }
|
|---|
| 343 | if (j < len) {
|
|---|
| 344 | _STRINGLIB_SPLIT_APPEND(data, j, len);
|
|---|
| 345 | }
|
|---|
| 346 |
|
|---|
| 347 | return list;
|
|---|
| 348 |
|
|---|
| 349 | onError:
|
|---|
| 350 | Py_XDECREF(list);
|
|---|
| 351 | return NULL;
|
|---|
| 352 | }
|
|---|
| 353 |
|
|---|
| 354 | #undef _STRINGLIB_SPLIT_APPEND
|
|---|
| 355 |
|
|---|