source: python/trunk/Python/pystrtod.c@ 380

Last change on this file since 380 was 10, checked in by Yuri Dario, 15 years ago

python: merged offline changes.

  • Property svn:eol-style set to native
File size: 13.8 KB
Line 
1/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5#ifdef __EMX__
6#include <float.h>
7#endif
8
9
10/* ascii character tests (as opposed to locale tests) */
11#define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
12 (c) == '\r' || (c) == '\t' || (c) == '\v')
13#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
14
15
16/**
17 * PyOS_ascii_strtod:
18 * @nptr: the string to convert to a numeric value.
19 * @endptr: if non-%NULL, it returns the character after
20 * the last character used in the conversion.
21 *
22 * Converts a string to a #gdouble value.
23 * This function behaves like the standard strtod() function
24 * does in the C locale. It does this without actually
25 * changing the current locale, since that would not be
26 * thread-safe.
27 *
28 * This function is typically used when reading configuration
29 * files or other non-user input that should be locale independent.
30 * To handle input from the user you should normally use the
31 * locale-sensitive system strtod() function.
32 *
33 * If the correct value would cause overflow, plus or minus %HUGE_VAL
34 * is returned (according to the sign of the value), and %ERANGE is
35 * stored in %errno. If the correct value would cause underflow,
36 * zero is returned and %ERANGE is stored in %errno.
37 * If memory allocation fails, %ENOMEM is stored in %errno.
38 *
39 * This function resets %errno before calling strtod() so that
40 * you can reliably detect overflow and underflow.
41 *
42 * Return value: the #gdouble value.
43 **/
44double
45PyOS_ascii_strtod(const char *nptr, char **endptr)
46{
47#ifdef __EMX__
48_control87(MCW_EM, MCW_EM);
49#endif
50 char *fail_pos;
51 double val = -1.0;
52 struct lconv *locale_data;
53 const char *decimal_point;
54 size_t decimal_point_len;
55 const char *p, *decimal_point_pos;
56 const char *end = NULL; /* Silence gcc */
57 const char *digits_pos = NULL;
58 int negate = 0;
59
60 assert(nptr != NULL);
61
62 fail_pos = NULL;
63
64 locale_data = localeconv();
65 decimal_point = locale_data->decimal_point;
66 decimal_point_len = strlen(decimal_point);
67
68 assert(decimal_point_len != 0);
69
70 decimal_point_pos = NULL;
71
72 /* We process any leading whitespace and the optional sign manually,
73 then pass the remainder to the system strtod. This ensures that
74 the result of an underflow has the correct sign. (bug #1725) */
75
76 p = nptr;
77 /* Skip leading space */
78 while (ISSPACE(*p))
79 p++;
80
81 /* Process leading sign, if present */
82 if (*p == '-') {
83 negate = 1;
84 p++;
85 } else if (*p == '+') {
86 p++;
87 }
88
89 /* What's left should begin with a digit, a decimal point, or one of
90 the letters i, I, n, N. It should not begin with 0x or 0X */
91 if ((!ISDIGIT(*p) &&
92 *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
93 ||
94 (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
95 {
96 if (endptr)
97 *endptr = (char*)nptr;
98 errno = EINVAL;
99 return val;
100 }
101 digits_pos = p;
102
103 if (decimal_point[0] != '.' ||
104 decimal_point[1] != 0)
105 {
106 while (ISDIGIT(*p))
107 p++;
108
109 if (*p == '.')
110 {
111 decimal_point_pos = p++;
112
113 while (ISDIGIT(*p))
114 p++;
115
116 if (*p == 'e' || *p == 'E')
117 p++;
118 if (*p == '+' || *p == '-')
119 p++;
120 while (ISDIGIT(*p))
121 p++;
122 end = p;
123 }
124 else if (strncmp(p, decimal_point, decimal_point_len) == 0)
125 {
126 /* Python bug #1417699 */
127 if (endptr)
128 *endptr = (char*)nptr;
129 errno = EINVAL;
130 return val;
131 }
132 /* For the other cases, we need not convert the decimal
133 point */
134 }
135
136 /* Set errno to zero, so that we can distinguish zero results
137 and underflows */
138 errno = 0;
139
140 if (decimal_point_pos)
141 {
142 char *copy, *c;
143
144 /* We need to convert the '.' to the locale specific decimal
145 point */
146 copy = (char *)PyMem_MALLOC(end - digits_pos +
147 1 + decimal_point_len);
148 if (copy == NULL) {
149 if (endptr)
150 *endptr = (char *)nptr;
151 errno = ENOMEM;
152 return val;
153 }
154
155 c = copy;
156 memcpy(c, digits_pos, decimal_point_pos - digits_pos);
157 c += decimal_point_pos - digits_pos;
158 memcpy(c, decimal_point, decimal_point_len);
159 c += decimal_point_len;
160 memcpy(c, decimal_point_pos + 1,
161 end - (decimal_point_pos + 1));
162 c += end - (decimal_point_pos + 1);
163 *c = 0;
164
165 val = strtod(copy, &fail_pos);
166
167 if (fail_pos)
168 {
169 if (fail_pos > decimal_point_pos)
170 fail_pos = (char *)digits_pos +
171 (fail_pos - copy) -
172 (decimal_point_len - 1);
173 else
174 fail_pos = (char *)digits_pos +
175 (fail_pos - copy);
176 }
177
178 PyMem_FREE(copy);
179
180 }
181 else {
182//sigfpe here
183 val = strtod(digits_pos, &fail_pos);
184 }
185
186 if (fail_pos == digits_pos)
187 fail_pos = (char *)nptr;
188
189 if (negate && fail_pos != nptr)
190 val = -val;
191
192 if (endptr)
193 *endptr = fail_pos;
194
195 return val;
196}
197
198/* Given a string that may have a decimal point in the current
199 locale, change it back to a dot. Since the string cannot get
200 longer, no need for a maximum buffer size parameter. */
201Py_LOCAL_INLINE(void)
202change_decimal_from_locale_to_dot(char* buffer)
203{
204 struct lconv *locale_data = localeconv();
205 const char *decimal_point = locale_data->decimal_point;
206
207 if (decimal_point[0] != '.' || decimal_point[1] != 0) {
208 size_t decimal_point_len = strlen(decimal_point);
209
210 if (*buffer == '+' || *buffer == '-')
211 buffer++;
212 while (isdigit(Py_CHARMASK(*buffer)))
213 buffer++;
214 if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
215 *buffer = '.';
216 buffer++;
217 if (decimal_point_len > 1) {
218 /* buffer needs to get smaller */
219 size_t rest_len = strlen(buffer +
220 (decimal_point_len - 1));
221 memmove(buffer,
222 buffer + (decimal_point_len - 1),
223 rest_len);
224 buffer[rest_len] = 0;
225 }
226 }
227 }
228}
229
230
231/* From the C99 standard, section 7.19.6:
232The exponent always contains at least two digits, and only as many more digits
233as necessary to represent the exponent.
234*/
235#define MIN_EXPONENT_DIGITS 2
236
237/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
238 in length. */
239Py_LOCAL_INLINE(void)
240ensure_minimum_exponent_length(char* buffer, size_t buf_size)
241{
242 char *p = strpbrk(buffer, "eE");
243 if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
244 char *start = p + 2;
245 int exponent_digit_cnt = 0;
246 int leading_zero_cnt = 0;
247 int in_leading_zeros = 1;
248 int significant_digit_cnt;
249
250 /* Skip over the exponent and the sign. */
251 p += 2;
252
253 /* Find the end of the exponent, keeping track of leading
254 zeros. */
255 while (*p && isdigit(Py_CHARMASK(*p))) {
256 if (in_leading_zeros && *p == '0')
257 ++leading_zero_cnt;
258 if (*p != '0')
259 in_leading_zeros = 0;
260 ++p;
261 ++exponent_digit_cnt;
262 }
263
264 significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
265 if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
266 /* If there are 2 exactly digits, we're done,
267 regardless of what they contain */
268 }
269 else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
270 int extra_zeros_cnt;
271
272 /* There are more than 2 digits in the exponent. See
273 if we can delete some of the leading zeros */
274 if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
275 significant_digit_cnt = MIN_EXPONENT_DIGITS;
276 extra_zeros_cnt = exponent_digit_cnt -
277 significant_digit_cnt;
278
279 /* Delete extra_zeros_cnt worth of characters from the
280 front of the exponent */
281 assert(extra_zeros_cnt >= 0);
282
283 /* Add one to significant_digit_cnt to copy the
284 trailing 0 byte, thus setting the length */
285 memmove(start,
286 start + extra_zeros_cnt,
287 significant_digit_cnt + 1);
288 }
289 else {
290 /* If there are fewer than 2 digits, add zeros
291 until there are 2, if there's enough room */
292 int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
293 if (start + zeros + exponent_digit_cnt + 1
294 < buffer + buf_size) {
295 memmove(start + zeros, start,
296 exponent_digit_cnt + 1);
297 memset(start, '0', zeros);
298 }
299 }
300 }
301}
302
303/* Ensure that buffer has a decimal point in it. The decimal point
304 will not be in the current locale, it will always be '.' */
305Py_LOCAL_INLINE(void)
306ensure_decimal_point(char* buffer, size_t buf_size)
307{
308 int insert_count = 0;
309 char* chars_to_insert;
310
311 /* search for the first non-digit character */
312 char *p = buffer;
313 if (*p == '-' || *p == '+')
314 /* Skip leading sign, if present. I think this could only
315 ever be '-', but it can't hurt to check for both. */
316 ++p;
317 while (*p && isdigit(Py_CHARMASK(*p)))
318 ++p;
319
320 if (*p == '.') {
321 if (isdigit(Py_CHARMASK(*(p+1)))) {
322 /* Nothing to do, we already have a decimal
323 point and a digit after it */
324 }
325 else {
326 /* We have a decimal point, but no following
327 digit. Insert a zero after the decimal. */
328 ++p;
329 chars_to_insert = "0";
330 insert_count = 1;
331 }
332 }
333 else {
334 chars_to_insert = ".0";
335 insert_count = 2;
336 }
337 if (insert_count) {
338 size_t buf_len = strlen(buffer);
339 if (buf_len + insert_count + 1 >= buf_size) {
340 /* If there is not enough room in the buffer
341 for the additional text, just skip it. It's
342 not worth generating an error over. */
343 }
344 else {
345 memmove(p + insert_count, p,
346 buffer + strlen(buffer) - p + 1);
347 memcpy(p, chars_to_insert, insert_count);
348 }
349 }
350}
351
352/* Add the locale specific grouping characters to buffer. Note
353 that any decimal point (if it's present) in buffer is already
354 locale-specific. Return 0 on error, else 1. */
355Py_LOCAL_INLINE(int)
356add_thousands_grouping(char* buffer, size_t buf_size)
357{
358 Py_ssize_t len = strlen(buffer);
359 struct lconv *locale_data = localeconv();
360 const char *decimal_point = locale_data->decimal_point;
361
362 /* Find the decimal point, if any. We're only concerned
363 about the characters to the left of the decimal when
364 adding grouping. */
365 char *p = strstr(buffer, decimal_point);
366 if (!p) {
367 /* No decimal, use the entire string. */
368
369 /* If any exponent, adjust p. */
370 p = strpbrk(buffer, "eE");
371 if (!p)
372 /* No exponent and no decimal. Use the entire
373 string. */
374 p = buffer + len;
375 }
376 /* At this point, p points just past the right-most character we
377 want to format. We need to add the grouping string for the
378 characters between buffer and p. */
379 return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
380 buf_size, NULL, 1);
381}
382
383/* see FORMATBUFLEN in unicodeobject.c */
384#define FLOAT_FORMATBUFLEN 120
385
386/**
387 * PyOS_ascii_formatd:
388 * @buffer: A buffer to place the resulting string in
389 * @buf_size: The length of the buffer.
390 * @format: The printf()-style format to use for the
391 * code to use for converting.
392 * @d: The #gdouble to convert
393 *
394 * Converts a #gdouble to a string, using the '.' as
395 * decimal point. To format the number you pass in
396 * a printf()-style format string. Allowed conversion
397 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
398 *
399 * 'n' is the same as 'g', except it uses the current locale.
400 * 'Z' is the same as 'g', except it always has a decimal and
401 * at least one digit after the decimal.
402 *
403 * Return value: The pointer to the buffer with the converted string.
404 **/
405char *
406PyOS_ascii_formatd(char *buffer,
407 size_t buf_size,
408 const char *format,
409 double d)
410{
411 char format_char;
412 size_t format_len = strlen(format);
413
414 /* For type 'n', we need to make a copy of the format string, because
415 we're going to modify 'n' -> 'g', and format is const char*, so we
416 can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
417 we ever need this to be. There's an upcoming check to ensure it's
418 big enough. */
419 /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
420 also with at least one character past the decimal. */
421 char tmp_format[FLOAT_FORMATBUFLEN];
422
423 /* The last character in the format string must be the format char */
424 format_char = format[format_len - 1];
425
426 if (format[0] != '%')
427 return NULL;
428
429 /* I'm not sure why this test is here. It's ensuring that the format
430 string after the first character doesn't have a single quote, a
431 lowercase l, or a percent. This is the reverse of the commented-out
432 test about 10 lines ago. */
433 if (strpbrk(format + 1, "'l%"))
434 return NULL;
435
436 /* Also curious about this function is that it accepts format strings
437 like "%xg", which are invalid for floats. In general, the
438 interface to this function is not very good, but changing it is
439 difficult because it's a public API. */
440
441 if (!(format_char == 'e' || format_char == 'E' ||
442 format_char == 'f' || format_char == 'F' ||
443 format_char == 'g' || format_char == 'G' ||
444 format_char == 'n' || format_char == 'Z'))
445 return NULL;
446
447 /* Map 'n' or 'Z' format_char to 'g', by copying the format string and
448 replacing the final char with a 'g' */
449 if (format_char == 'n' || format_char == 'Z') {
450 if (format_len + 1 >= sizeof(tmp_format)) {
451 /* The format won't fit in our copy. Error out. In
452 practice, this will never happen and will be
453 detected by returning NULL */
454 return NULL;
455 }
456 strcpy(tmp_format, format);
457 tmp_format[format_len - 1] = 'g';
458 format = tmp_format;
459 }
460
461
462 /* Have PyOS_snprintf do the hard work */
463 PyOS_snprintf(buffer, buf_size, format, d);
464
465 /* Do various fixups on the return string */
466
467 /* Get the current locale, and find the decimal point string.
468 Convert that string back to a dot. Do not do this if using the
469 'n' (number) format code, since we want to keep the localized
470 decimal point in that case. */
471 if (format_char != 'n')
472 change_decimal_from_locale_to_dot(buffer);
473
474 /* If an exponent exists, ensure that the exponent is at least
475 MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
476 for the extra zeros. Also, if there are more than
477 MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
478 back to MIN_EXPONENT_DIGITS */
479 ensure_minimum_exponent_length(buffer, buf_size);
480
481 /* If format_char is 'Z', make sure we have at least one character
482 after the decimal point (and make sure we have a decimal point). */
483 if (format_char == 'Z')
484 ensure_decimal_point(buffer, buf_size);
485
486 /* If format_char is 'n', add the thousands grouping. */
487 if (format_char == 'n')
488 if (!add_thousands_grouping(buffer, buf_size))
489 return NULL;
490
491 return buffer;
492}
493
494double
495PyOS_ascii_atof(const char *nptr)
496{
497 return PyOS_ascii_strtod(nptr, NULL);
498}
Note: See TracBrowser for help on using the repository browser.