source: python/trunk/Python/pystrtod.c@ 6

Last change on this file since 6 was 2, checked in by Yuri Dario, 15 years ago

Initial import for vendor code.

  • Property svn:eol-style set to native
File size: 13.7 KB
Line 
1/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* ascii character tests (as opposed to locale tests) */
7#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
8 (c) == '\r' || (c) == '\t' || (c) == '\v')
9#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
10
11
12/**
13 * PyOS_ascii_strtod:
14 * @nptr: the string to convert to a numeric value.
15 * @endptr: if non-%NULL, it returns the character after
16 * the last character used in the conversion.
17 *
18 * Converts a string to a #gdouble value.
19 * This function behaves like the standard strtod() function
20 * does in the C locale. It does this without actually
21 * changing the current locale, since that would not be
22 * thread-safe.
23 *
24 * This function is typically used when reading configuration
25 * files or other non-user input that should be locale independent.
26 * To handle input from the user you should normally use the
27 * locale-sensitive system strtod() function.
28 *
29 * If the correct value would cause overflow, plus or minus %HUGE_VAL
30 * is returned (according to the sign of the value), and %ERANGE is
31 * stored in %errno. If the correct value would cause underflow,
32 * zero is returned and %ERANGE is stored in %errno.
33 * If memory allocation fails, %ENOMEM is stored in %errno.
34 *
35 * This function resets %errno before calling strtod() so that
36 * you can reliably detect overflow and underflow.
37 *
38 * Return value: the #gdouble value.
39 **/
40double
41PyOS_ascii_strtod(const char *nptr, char **endptr)
42{
43 char *fail_pos;
44 double val = -1.0;
45 struct lconv *locale_data;
46 const char *decimal_point;
47 size_t decimal_point_len;
48 const char *p, *decimal_point_pos;
49 const char *end = NULL; /* Silence gcc */
50 const char *digits_pos = NULL;
51 int negate = 0;
52
53 assert(nptr != NULL);
54
55 fail_pos = NULL;
56
57 locale_data = localeconv();
58 decimal_point = locale_data->decimal_point;
59 decimal_point_len = strlen(decimal_point);
60
61 assert(decimal_point_len != 0);
62
63 decimal_point_pos = NULL;
64
65 /* We process any leading whitespace and the optional sign manually,
66 then pass the remainder to the system strtod. This ensures that
67 the result of an underflow has the correct sign. (bug #1725) */
68
69 p = nptr;
70 /* Skip leading space */
71 while (ISSPACE(*p))
72 p++;
73
74 /* Process leading sign, if present */
75 if (*p == '-') {
76 negate = 1;
77 p++;
78 } else if (*p == '+') {
79 p++;
80 }
81
82 /* What's left should begin with a digit, a decimal point, or one of
83 the letters i, I, n, N. It should not begin with 0x or 0X */
84 if ((!ISDIGIT(*p) &&
85 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
86 ||
87 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
88 {
89 if (endptr)
90 *endptr = (char*)nptr;
91 errno = EINVAL;
92 return val;
93 }
94 digits_pos = p;
95
96 if (decimal_point[0] != '.' ||
97 decimal_point[1] != 0)
98 {
99 while (ISDIGIT(*p))
100 p++;
101
102 if (*p == '.')
103 {
104 decimal_point_pos = p++;
105
106 while (ISDIGIT(*p))
107 p++;
108
109 if (*p == 'e' || *p == 'E')
110 p++;
111 if (*p == '+' || *p == '-')
112 p++;
113 while (ISDIGIT(*p))
114 p++;
115 end = p;
116 }
117 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
118 {
119 /* Python bug #1417699 */
120 if (endptr)
121 *endptr = (char*)nptr;
122 errno = EINVAL;
123 return val;
124 }
125 /* For the other cases, we need not convert the decimal
126 point */
127 }
128
129 /* Set errno to zero, so that we can distinguish zero results
130 and underflows */
131 errno = 0;
132
133 if (decimal_point_pos)
134 {
135 char *copy, *c;
136
137 /* We need to convert the '.' to the locale specific decimal
138 point */
139 copy = (char *)PyMem_MALLOC(end - digits_pos +
140 1 + decimal_point_len);
141 if (copy == NULL) {
142 if (endptr)
143 *endptr = (char *)nptr;
144 errno = ENOMEM;
145 return val;
146 }
147
148 c = copy;
149 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
150 c += decimal_point_pos - digits_pos;
151 memcpy(c, decimal_point, decimal_point_len);
152 c += decimal_point_len;
153 memcpy(c, decimal_point_pos + 1,
154 end - (decimal_point_pos + 1));
155 c += end - (decimal_point_pos + 1);
156 *c = 0;
157
158 val = strtod(copy, &fail_pos);
159
160 if (fail_pos)
161 {
162 if (fail_pos > decimal_point_pos)
163 fail_pos = (char *)digits_pos +
164 (fail_pos - copy) -
165 (decimal_point_len - 1);
166 else
167 fail_pos = (char *)digits_pos +
168 (fail_pos - copy);
169 }
170
171 PyMem_FREE(copy);
172
173 }
174 else {
175 val = strtod(digits_pos, &fail_pos);
176 }
177
178 if (fail_pos == digits_pos)
179 fail_pos = (char *)nptr;
180
181 if (negate && fail_pos != nptr)
182 val = -val;
183
184 if (endptr)
185 *endptr = fail_pos;
186
187 return val;
188}
189
190/* Given a string that may have a decimal point in the current
191 locale, change it back to a dot. Since the string cannot get
192 longer, no need for a maximum buffer size parameter. */
193Py_LOCAL_INLINE(void)
194change_decimal_from_locale_to_dot(char* buffer)
195{
196 struct lconv *locale_data = localeconv();
197 const char *decimal_point = locale_data->decimal_point;
198
199 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
200 size_t decimal_point_len = strlen(decimal_point);
201
202 if (*buffer == '+' || *buffer == '-')
203 buffer++;
204 while (isdigit(Py_CHARMASK(*buffer)))
205 buffer++;
206 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
207 *buffer = '.';
208 buffer++;
209 if (decimal_point_len > 1) {
210 /* buffer needs to get smaller */
211 size_t rest_len = strlen(buffer +
212 (decimal_point_len - 1));
213 memmove(buffer,
214 buffer + (decimal_point_len - 1),
215 rest_len);
216 buffer[rest_len] = 0;
217 }
218 }
219 }
220}
221
222
223/* From the C99 standard, section 7.19.6:
224The exponent always contains at least two digits, and only as many more digits
225as necessary to represent the exponent.
226*/
227#define MIN_EXPONENT_DIGITS 2
228
229/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
230 in length. */
231Py_LOCAL_INLINE(void)
232ensure_minimum_exponent_length(char* buffer, size_t buf_size)
233{
234 char *p = strpbrk(buffer, "eE");
235 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
236 char *start = p + 2;
237 int exponent_digit_cnt = 0;
238 int leading_zero_cnt = 0;
239 int in_leading_zeros = 1;
240 int significant_digit_cnt;
241
242 /* Skip over the exponent and the sign. */
243 p += 2;
244
245 /* Find the end of the exponent, keeping track of leading
246 zeros. */
247 while (*p && isdigit(Py_CHARMASK(*p))) {
248 if (in_leading_zeros && *p == '0')
249 ++leading_zero_cnt;
250 if (*p != '0')
251 in_leading_zeros = 0;
252 ++p;
253 ++exponent_digit_cnt;
254 }
255
256 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
257 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
258 /* If there are 2 exactly digits, we're done,
259 regardless of what they contain */
260 }
261 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
262 int extra_zeros_cnt;
263
264 /* There are more than 2 digits in the exponent. See
265 if we can delete some of the leading zeros */
266 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
267 significant_digit_cnt = MIN_EXPONENT_DIGITS;
268 extra_zeros_cnt = exponent_digit_cnt -
269 significant_digit_cnt;
270
271 /* Delete extra_zeros_cnt worth of characters from the
272 front of the exponent */
273 assert(extra_zeros_cnt >= 0);
274
275 /* Add one to significant_digit_cnt to copy the
276 trailing 0 byte, thus setting the length */
277 memmove(start,
278 start + extra_zeros_cnt,
279 significant_digit_cnt + 1);
280 }
281 else {
282 /* If there are fewer than 2 digits, add zeros
283 until there are 2, if there's enough room */
284 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
285 if (start + zeros + exponent_digit_cnt + 1
286 < buffer + buf_size) {
287 memmove(start + zeros, start,
288 exponent_digit_cnt + 1);
289 memset(start, '0', zeros);
290 }
291 }
292 }
293}
294
295/* Ensure that buffer has a decimal point in it. The decimal point
296 will not be in the current locale, it will always be '.' */
297Py_LOCAL_INLINE(void)
298ensure_decimal_point(char* buffer, size_t buf_size)
299{
300 int insert_count = 0;
301 char* chars_to_insert;
302
303 /* search for the first non-digit character */
304 char *p = buffer;
305 if (*p == '-' || *p == '+')
306 /* Skip leading sign, if present. I think this could only
307 ever be '-', but it can't hurt to check for both. */
308 ++p;
309 while (*p && isdigit(Py_CHARMASK(*p)))
310 ++p;
311
312 if (*p == '.') {
313 if (isdigit(Py_CHARMASK(*(p+1)))) {
314 /* Nothing to do, we already have a decimal
315 point and a digit after it */
316 }
317 else {
318 /* We have a decimal point, but no following
319 digit. Insert a zero after the decimal. */
320 ++p;
321 chars_to_insert = "0";
322 insert_count = 1;
323 }
324 }
325 else {
326 chars_to_insert = ".0";
327 insert_count = 2;
328 }
329 if (insert_count) {
330 size_t buf_len = strlen(buffer);
331 if (buf_len + insert_count + 1 >= buf_size) {
332 /* If there is not enough room in the buffer
333 for the additional text, just skip it. It's
334 not worth generating an error over. */
335 }
336 else {
337 memmove(p + insert_count, p,
338 buffer + strlen(buffer) - p + 1);
339 memcpy(p, chars_to_insert, insert_count);
340 }
341 }
342}
343
344/* Add the locale specific grouping characters to buffer. Note
345 that any decimal point (if it's present) in buffer is already
346 locale-specific. Return 0 on error, else 1. */
347Py_LOCAL_INLINE(int)
348add_thousands_grouping(char* buffer, size_t buf_size)
349{
350 Py_ssize_t len = strlen(buffer);
351 struct lconv *locale_data = localeconv();
352 const char *decimal_point = locale_data->decimal_point;
353
354 /* Find the decimal point, if any. We're only concerned
355 about the characters to the left of the decimal when
356 adding grouping. */
357 char *p = strstr(buffer, decimal_point);
358 if (!p) {
359 /* No decimal, use the entire string. */
360
361 /* If any exponent, adjust p. */
362 p = strpbrk(buffer, "eE");
363 if (!p)
364 /* No exponent and no decimal. Use the entire
365 string. */
366 p = buffer + len;
367 }
368 /* At this point, p points just past the right-most character we
369 want to format. We need to add the grouping string for the
370 characters between buffer and p. */
371 return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
372 buf_size, NULL, 1);
373}
374
375/* see FORMATBUFLEN in unicodeobject.c */
376#define FLOAT_FORMATBUFLEN 120
377
378/**
379 * PyOS_ascii_formatd:
380 * @buffer: A buffer to place the resulting string in
381 * @buf_size: The length of the buffer.
382 * @format: The printf()-style format to use for the
383 * code to use for converting.
384 * @d: The #gdouble to convert
385 *
386 * Converts a #gdouble to a string, using the '.' as
387 * decimal point. To format the number you pass in
388 * a printf()-style format string. Allowed conversion
389 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
390 *
391 * 'n' is the same as 'g', except it uses the current locale.
392 * 'Z' is the same as 'g', except it always has a decimal and
393 * at least one digit after the decimal.
394 *
395 * Return value: The pointer to the buffer with the converted string.
396 **/
397char *
398PyOS_ascii_formatd(char *buffer,
399 size_t buf_size,
400 const char *format,
401 double d)
402{
403 char format_char;
404 size_t format_len = strlen(format);
405
406 /* For type 'n', we need to make a copy of the format string, because
407 we're going to modify 'n' -> 'g', and format is const char*, so we
408 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
409 we ever need this to be. There's an upcoming check to ensure it's
410 big enough. */
411 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
412 also with at least one character past the decimal. */
413 char tmp_format[FLOAT_FORMATBUFLEN];
414
415 /* The last character in the format string must be the format char */
416 format_char = format[format_len - 1];
417
418 if (format[0] != '%')
419 return NULL;
420
421 /* I'm not sure why this test is here. It's ensuring that the format
422 string after the first character doesn't have a single quote, a
423 lowercase l, or a percent. This is the reverse of the commented-out
424 test about 10 lines ago. */
425 if (strpbrk(format + 1, "'l%"))
426 return NULL;
427
428 /* Also curious about this function is that it accepts format strings
429 like "%xg", which are invalid for floats. In general, the
430 interface to this function is not very good, but changing it is
431 difficult because it's a public API. */
432
433 if (!(format_char == 'e' || format_char == 'E' ||
434 format_char == 'f' || format_char == 'F' ||
435 format_char == 'g' || format_char == 'G' ||
436 format_char == 'n' || format_char == 'Z'))
437 return NULL;
438
439 /* Map 'n' or 'Z' format_char to 'g', by copying the format string and
440 replacing the final char with a 'g' */
441 if (format_char == 'n' || format_char == 'Z') {
442 if (format_len + 1 >= sizeof(tmp_format)) {
443 /* The format won't fit in our copy. Error out. In
444 practice, this will never happen and will be
445 detected by returning NULL */
446 return NULL;
447 }
448 strcpy(tmp_format, format);
449 tmp_format[format_len - 1] = 'g';
450 format = tmp_format;
451 }
452
453
454 /* Have PyOS_snprintf do the hard work */
455 PyOS_snprintf(buffer, buf_size, format, d);
456
457 /* Do various fixups on the return string */
458
459 /* Get the current locale, and find the decimal point string.
460 Convert that string back to a dot. Do not do this if using the
461 'n' (number) format code, since we want to keep the localized
462 decimal point in that case. */
463 if (format_char != 'n')
464 change_decimal_from_locale_to_dot(buffer);
465
466 /* If an exponent exists, ensure that the exponent is at least
467 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
468 for the extra zeros. Also, if there are more than
469 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
470 back to MIN_EXPONENT_DIGITS */
471 ensure_minimum_exponent_length(buffer, buf_size);
472
473 /* If format_char is 'Z', make sure we have at least one character
474 after the decimal point (and make sure we have a decimal point). */
475 if (format_char == 'Z')
476 ensure_decimal_point(buffer, buf_size);
477
478 /* If format_char is 'n', add the thousands grouping. */
479 if (format_char == 'n')
480 if (!add_thousands_grouping(buffer, buf_size))
481 return NULL;
482
483 return buffer;
484}
485
486double
487PyOS_ascii_atof(const char *nptr)
488{
489 return PyOS_ascii_strtod(nptr, NULL);
490}
Note: See TracBrowser for help on using the repository browser.