Context Navigation

pystrtod.c@ 394

Last change on this file since 394 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 39.6 KB

Line
1	/* -- Mode: C; c-file-style: "python" -- */
2
3	#include <Python.h>
4	#include <locale.h>
5	#ifdef __EMX__
6	#include <float.h>
7	#endif
8
9	/* Case-insensitive string match used for nan and inf detection; t should be
10	lower-case. Returns 1 for a successful match, 0 otherwise. */
11
12	static int
13	case_insensitive_match(const char s, const char t)
14	{
15	while(t && Py_TOLOWER(s) == *t) {
16	s++;
17	t++;
18	}
19	return *t ? 0 : 1;
20	}
21
22	/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
23	"infinity", with an optional leading sign of "+" or "-". On success,
24	return the NaN or Infinity as a double and set *endptr to point just beyond
25	the successfully parsed portion of the string. On failure, return -1.0 and
26	set endptr to point to the start of the string. /
27
28	double
29	_Py_parse_inf_or_nan(const char p, char *endptr)
30	{
31	double retval;
32	const char *s;
33	int negate = 0;
34
35	s = p;
36	if (*s == '-') {
37	negate = 1;
38	s++;
39	}
40	else if (*s == '+') {
41	s++;
42	}
43	if (case_insensitive_match(s, "inf")) {
44	s += 3;
45	if (case_insensitive_match(s, "inity"))
46	s += 5;
47	retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
48	}
49	#ifdef Py_NAN
50	else if (case_insensitive_match(s, "nan")) {
51	s += 3;
52	retval = negate ? -Py_NAN : Py_NAN;
53	}
54	#endif
55	else {
56	s = p;
57	retval = -1.0;
58	}
59	endptr = (char )s;
60	return retval;
61	}
62
63	/**
64	* PyOS_ascii_strtod:
65	* @nptr: the string to convert to a numeric value.
66	* @endptr: if non-%NULL, it returns the character after
67	* the last character used in the conversion.
68	*
69	* Converts a string to a #gdouble value.
70	* This function behaves like the standard strtod() function
71	* does in the C locale. It does this without actually
72	* changing the current locale, since that would not be
73	* thread-safe.
74	*
75	* This function is typically used when reading configuration
76	* files or other non-user input that should be locale independent.
77	* To handle input from the user you should normally use the
78	* locale-sensitive system strtod() function.
79	*
80	* If the correct value would cause overflow, plus or minus %HUGE_VAL
81	* is returned (according to the sign of the value), and %ERANGE is
82	* stored in %errno. If the correct value would cause underflow,
83	* zero is returned and %ERANGE is stored in %errno.
84	* If memory allocation fails, %ENOMEM is stored in %errno.
85	*
86	* This function resets %errno before calling strtod() so that
87	* you can reliably detect overflow and underflow.
88	*
89	* Return value: the #gdouble value.
90	**/
91
92	#ifndef PY_NO_SHORT_FLOAT_REPR
93
94	double
95	_PyOS_ascii_strtod(const char nptr, char *endptr)
96	{
97	double result;
98	_Py_SET_53BIT_PRECISION_HEADER;
99
100	#ifdef __OS2__
101	/* @todo: Quick hack: disable FPU exceptions to avoid unexpected
102	SIGFPE. The proper way is to fix compiler runtime so that its
103	exception handler undoes FPU CW reset caused by bogus OS/2 DLLs. */
104	_control87(MCW_EM, MCW_EM);
105	#endif
106
107	assert(nptr != NULL);
108	/* Set errno to zero, so that we can distinguish zero results
109	and underflows */
110	errno = 0;
111
112	_Py_SET_53BIT_PRECISION_START;
113	result = _Py_dg_strtod(nptr, endptr);
114	_Py_SET_53BIT_PRECISION_END;
115
116	if (*endptr == nptr)
117	/* string might represent an inf or nan */
118	result = _Py_parse_inf_or_nan(nptr, endptr);
119
120	return result;
121
122	}
123
124	#else
125
126	/*
127	Use system strtod; since strtod is locale aware, we may
128	have to first fix the decimal separator.
129
130	Note that unlike _Py_dg_strtod, the system strtod may not always give
131	correctly rounded results.
132	*/
133
134	double
135	_PyOS_ascii_strtod(const char nptr, char *endptr)
136	{
137	char *fail_pos;
138	double val = -1.0;
139	struct lconv *locale_data;
140	const char *decimal_point;
141	size_t decimal_point_len;
142	const char p, decimal_point_pos;
143	const char end = NULL; / Silence gcc */
144	const char *digits_pos = NULL;
145	int negate = 0;
146
147	#ifdef __OS2__
148	/* @todo: Quick hack: disable FPU exceptions to avoid unexpected
149	SIGFPE. The proper way is to fix compiler runtime so that its
150	exception handler undoes FPU CW reset caused by bogus OS/2 DLLs. */
151	_control87(MCW_EM, MCW_EM);
152	#endif
153
154	assert(nptr != NULL);
155
156	fail_pos = NULL;
157
158	locale_data = localeconv();
159	decimal_point = locale_data->decimal_point;
160	decimal_point_len = strlen(decimal_point);
161
162	assert(decimal_point_len != 0);
163
164	decimal_point_pos = NULL;
165
166	/* Parse infinities and nans */
167	val = _Py_parse_inf_or_nan(nptr, endptr);
168	if (*endptr != nptr)
169	return val;
170
171	/* Set errno to zero, so that we can distinguish zero results
172	and underflows */
173	errno = 0;
174
175	/* We process the optional sign manually, then pass the remainder to
176	the system strtod. This ensures that the result of an underflow
177	has the correct sign. (bug #1725) */
178	p = nptr;
179	/* Process leading sign, if present */
180	if (*p == '-') {
181	negate = 1;
182	p++;
183	}
184	else if (*p == '+') {
185	p++;
186	}
187
188	/* Some platform strtods accept hex floats; Python shouldn't (at the
189	moment), so we check explicitly for strings starting with '0x'. */
190	if (p == '0' && ((p+1) == 'x' \|\| *(p+1) == 'X'))
191	goto invalid_string;
192
193	/* Check that what's left begins with a digit or decimal point */
194	if (!Py_ISDIGIT(p) && p != '.')
195	goto invalid_string;
196
197	digits_pos = p;
198	if (decimal_point[0] != '.' \|\|
199	decimal_point[1] != 0)
200	{
201	/* Look for a '.' in the input; if present, it'll need to be
202	swapped for the current locale's decimal point before we
203	call strtod. On the other hand, if we find the current
204	locale's decimal point then the input is invalid. */
205	while (Py_ISDIGIT(*p))
206	p++;
207
208	if (*p == '.')
209	{
210	decimal_point_pos = p++;
211
212	/* locate end of number */
213	while (Py_ISDIGIT(*p))
214	p++;
215
216	if (p == 'e' \|\| p == 'E')
217	p++;
218	if (p == '+' \|\| p == '-')
219	p++;
220	while (Py_ISDIGIT(*p))
221	p++;
222	end = p;
223	}
224	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
225	/* Python bug #1417699 */
226	goto invalid_string;
227	/* For the other cases, we need not convert the decimal
228	point */
229	}
230
231	if (decimal_point_pos) {
232	char copy, c;
233	/* Create a copy of the input, with the '.' converted to the
234	locale-specific decimal point */
235	copy = (char *)PyMem_MALLOC(end - digits_pos +
236	1 + decimal_point_len);
237	if (copy == NULL) {
238	endptr = (char )nptr;
239	errno = ENOMEM;
240	return val;
241	}
242
243	c = copy;
244	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
245	c += decimal_point_pos - digits_pos;
246	memcpy(c, decimal_point, decimal_point_len);
247	c += decimal_point_len;
248	memcpy(c, decimal_point_pos + 1,
249	end - (decimal_point_pos + 1));
250	c += end - (decimal_point_pos + 1);
251	*c = 0;
252
253	val = strtod(copy, &fail_pos);
254
255	if (fail_pos)
256	{
257	if (fail_pos > decimal_point_pos)
258	fail_pos = (char *)digits_pos +
259	(fail_pos - copy) -
260	(decimal_point_len - 1);
261	else
262	fail_pos = (char *)digits_pos +
263	(fail_pos - copy);
264	}
265
266	PyMem_FREE(copy);
267
268	}
269	else {
270	val = strtod(digits_pos, &fail_pos);
271	}
272
273	if (fail_pos == digits_pos)
274	goto invalid_string;
275
276	if (negate && fail_pos != nptr)
277	val = -val;
278	*endptr = fail_pos;
279
280	return val;
281
282	invalid_string:
283	endptr = (char)nptr;
284	errno = EINVAL;
285	return -1.0;
286	}
287
288	#endif
289
290	/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
291
292	double
293	PyOS_ascii_strtod(const char nptr, char *endptr)
294	{
295	char *fail_pos;
296	const char *p;
297	double x;
298
299	if (PyErr_WarnEx(PyExc_DeprecationWarning,
300	"PyOS_ascii_strtod and PyOS_ascii_atof are "
301	"deprecated. Use PyOS_string_to_double "
302	"instead.", 1) < 0)
303	return -1.0;
304
305	/* _PyOS_ascii_strtod already does everything that we want,
306	except that it doesn't parse leading whitespace */
307	p = nptr;
308	while (Py_ISSPACE(*p))
309	p++;
310	x = _PyOS_ascii_strtod(p, &fail_pos);
311	if (fail_pos == p)
312	fail_pos = (char *)nptr;
313	if (endptr)
314	endptr = (char )fail_pos;
315	return x;
316	}
317
318	/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
319
320	double
321	PyOS_ascii_atof(const char *nptr)
322	{
323	return PyOS_ascii_strtod(nptr, NULL);
324	}
325
326	/* PyOS_string_to_double is the recommended replacement for the deprecated
327	PyOS_ascii_strtod and PyOS_ascii_atof functions. It converts a
328	null-terminated byte string s (interpreted as a string of ASCII characters)
329	to a float. The string should not have leading or trailing whitespace (in
330	contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
331	whitespace). The conversion is independent of the current locale.
332
333	If endptr is NULL, try to convert the whole string. Raise ValueError and
334	return -1.0 if the string is not a valid representation of a floating-point
335	number.
336
337	If endptr is non-NULL, try to convert as much of the string as possible.
338	If no initial segment of the string is the valid representation of a
339	floating-point number then *endptr is set to point to the beginning of the
340	string, -1.0 is returned and again ValueError is raised.
341
342	On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
343	if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
344	exception is raised. Otherwise, overflow_exception should point to a
345	a Python exception, this exception will be raised, -1.0 will be returned,
346	and *endptr will point just past the end of the converted value.
347
348	If any other failure occurs (for example lack of memory), -1.0 is returned
349	and the appropriate Python exception will have been set.
350	*/
351
352	double
353	PyOS_string_to_double(const char *s,
354	char **endptr,
355	PyObject *overflow_exception)
356	{
357	double x, result=-1.0;
358	char *fail_pos;
359
360	errno = 0;
361	PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
362	x = _PyOS_ascii_strtod(s, &fail_pos);
363	PyFPE_END_PROTECT(x)
364
365	if (errno == ENOMEM) {
366	PyErr_NoMemory();
367	fail_pos = (char *)s;
368	}
369	else if (!endptr && (fail_pos == s \|\| *fail_pos != '\0'))
370	PyErr_Format(PyExc_ValueError,
371	"could not convert string to float: "
372	"%.200s", s);
373	else if (fail_pos == s)
374	PyErr_Format(PyExc_ValueError,
375	"could not convert string to float: "
376	"%.200s", s);
377	else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
378	PyErr_Format(overflow_exception,
379	"value too large to convert to float: "
380	"%.200s", s);
381	else
382	result = x;
383
384	if (endptr != NULL)
385	*endptr = fail_pos;
386	return result;
387	}
388
389	/* Given a string that may have a decimal point in the current
390	locale, change it back to a dot. Since the string cannot get
391	longer, no need for a maximum buffer size parameter. */
392	Py_LOCAL_INLINE(void)
393	change_decimal_from_locale_to_dot(char* buffer)
394	{
395	struct lconv *locale_data = localeconv();
396	const char *decimal_point = locale_data->decimal_point;
397
398	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
399	size_t decimal_point_len = strlen(decimal_point);
400
401	if (buffer == '+' \|\| buffer == '-')
402	buffer++;
403	while (Py_ISDIGIT(*buffer))
404	buffer++;
405	if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
406	*buffer = '.';
407	buffer++;
408	if (decimal_point_len > 1) {
409	/* buffer needs to get smaller */
410	size_t rest_len = strlen(buffer +
411	(decimal_point_len - 1));
412	memmove(buffer,
413	buffer + (decimal_point_len - 1),
414	rest_len);
415	buffer[rest_len] = 0;
416	}
417	}
418	}
419	}
420
421
422	/* From the C99 standard, section 7.19.6:
423	The exponent always contains at least two digits, and only as many more digits
424	as necessary to represent the exponent.
425	*/
426	#define MIN_EXPONENT_DIGITS 2
427
428	/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
429	in length. */
430	Py_LOCAL_INLINE(void)
431	ensure_minimum_exponent_length(char* buffer, size_t buf_size)
432	{
433	char *p = strpbrk(buffer, "eE");
434	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
435	char *start = p + 2;
436	int exponent_digit_cnt = 0;
437	int leading_zero_cnt = 0;
438	int in_leading_zeros = 1;
439	int significant_digit_cnt;
440
441	/* Skip over the exponent and the sign. */
442	p += 2;
443
444	/* Find the end of the exponent, keeping track of leading
445	zeros. */
446	while (p && Py_ISDIGIT(p)) {
447	if (in_leading_zeros && *p == '0')
448	++leading_zero_cnt;
449	if (*p != '0')
450	in_leading_zeros = 0;
451	++p;
452	++exponent_digit_cnt;
453	}
454
455	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
456	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
457	/* If there are 2 exactly digits, we're done,
458	regardless of what they contain */
459	}
460	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
461	int extra_zeros_cnt;
462
463	/* There are more than 2 digits in the exponent. See
464	if we can delete some of the leading zeros */
465	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
466	significant_digit_cnt = MIN_EXPONENT_DIGITS;
467	extra_zeros_cnt = exponent_digit_cnt -
468	significant_digit_cnt;
469
470	/* Delete extra_zeros_cnt worth of characters from the
471	front of the exponent */
472	assert(extra_zeros_cnt >= 0);
473
474	/* Add one to significant_digit_cnt to copy the
475	trailing 0 byte, thus setting the length */
476	memmove(start,
477	start + extra_zeros_cnt,
478	significant_digit_cnt + 1);
479	}
480	else {
481	/* If there are fewer than 2 digits, add zeros
482	until there are 2, if there's enough room */
483	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
484	if (start + zeros + exponent_digit_cnt + 1
485	< buffer + buf_size) {
486	memmove(start + zeros, start,
487	exponent_digit_cnt + 1);
488	memset(start, '0', zeros);
489	}
490	}
491	}
492	}
493
494	/* Remove trailing zeros after the decimal point from a numeric string; also
495	remove the decimal point if all digits following it are zero. The numeric
496	string must end in '\0', and should not have any leading or trailing
497	whitespace. Assumes that the decimal point is '.'. */
498	Py_LOCAL_INLINE(void)
499	remove_trailing_zeros(char *buffer)
500	{
501	char old_fraction_end, new_fraction_end, end, p;
502
503	p = buffer;
504	if (p == '-' \|\| p == '+')
505	/* Skip leading sign, if present */
506	++p;
507	while (Py_ISDIGIT(*p))
508	++p;
509
510	/* if there's no decimal point there's nothing to do */
511	if (*p++ != '.')
512	return;
513
514	/* scan any digits after the point */
515	while (Py_ISDIGIT(*p))
516	++p;
517	old_fraction_end = p;
518
519	/* scan up to ending '\0' */
520	while (*p != '\0')
521	p++;
522	/* +1 to make sure that we move the null byte as well */
523	end = p+1;
524
525	/* scan back from fraction_end, looking for removable zeros */
526	p = old_fraction_end;
527	while (*(p-1) == '0')
528	--p;
529	/* and remove point if we've got that far */
530	if (*(p-1) == '.')
531	--p;
532	new_fraction_end = p;
533
534	memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
535	}
536
537	/* Ensure that buffer has a decimal point in it. The decimal point will not
538	be in the current locale, it will always be '.'. Don't add a decimal point
539	if an exponent is present. Also, convert to exponential notation where
540	adding a '.0' would produce too many significant digits (see issue 5864).
541
542	Returns a pointer to the fixed buffer, or NULL on failure.
543	*/
544	Py_LOCAL_INLINE(char *)
545	ensure_decimal_point(char* buffer, size_t buf_size, int precision)
546	{
547	int digit_count, insert_count = 0, convert_to_exp = 0;
548	char chars_to_insert, digits_start;
549
550	/* search for the first non-digit character */
551	char *p = buffer;
552	if (p == '-' \|\| p == '+')
553	/* Skip leading sign, if present. I think this could only
554	ever be '-', but it can't hurt to check for both. */
555	++p;
556	digits_start = p;
557	while (p && Py_ISDIGIT(p))
558	++p;
559	digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
560
561	if (*p == '.') {
562	if (Py_ISDIGIT(*(p+1))) {
563	/* Nothing to do, we already have a decimal
564	point and a digit after it */
565	}
566	else {
567	/* We have a decimal point, but no following
568	digit. Insert a zero after the decimal. */
569	/* can't ever get here via PyOS_double_to_string */
570	assert(precision == -1);
571	++p;
572	chars_to_insert = "0";
573	insert_count = 1;
574	}
575	}
576	else if (!(p == 'e' \|\| p == 'E')) {
577	/* Don't add ".0" if we have an exponent. */
578	if (digit_count == precision) {
579	/* issue 5864: don't add a trailing .0 in the case
580	where the '%g'-formatted result already has as many
581	significant digits as were requested. Switch to
582	exponential notation instead. */
583	convert_to_exp = 1;
584	/* no exponent, no point, and we shouldn't land here
585	for infs and nans, so we must be at the end of the
586	string. */
587	assert(*p == '\0');
588	}
589	else {
590	assert(precision == -1 \|\| digit_count < precision);
591	chars_to_insert = ".0";
592	insert_count = 2;
593	}
594	}
595	if (insert_count) {
596	size_t buf_len = strlen(buffer);
597	if (buf_len + insert_count + 1 >= buf_size) {
598	/* If there is not enough room in the buffer
599	for the additional text, just skip it. It's
600	not worth generating an error over. */
601	}
602	else {
603	memmove(p + insert_count, p,
604	buffer + strlen(buffer) - p + 1);
605	memcpy(p, chars_to_insert, insert_count);
606	}
607	}
608	if (convert_to_exp) {
609	int written;
610	size_t buf_avail;
611	p = digits_start;
612	/* insert decimal point */
613	assert(digit_count >= 1);
614	memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
615	p[1] = '.';
616	p += digit_count+1;
617	assert(p <= buf_size+buffer);
618	buf_avail = buf_size+buffer-p;
619	if (buf_avail == 0)
620	return NULL;
621	/* Add exponent. It's okay to use lower case 'e': we only
622	arrive here as a result of using the empty format code or
623	repr/str builtins and those never want an upper case 'E' */
624	written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
625	if (!(0 <= written &&
626	written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
627	/* output truncated, or something else bad happened */
628	return NULL;
629	remove_trailing_zeros(buffer);
630	}
631	return buffer;
632	}
633
634	/* see FORMATBUFLEN in unicodeobject.c */
635	#define FLOAT_FORMATBUFLEN 120
636
637	/**
638	* PyOS_ascii_formatd:
639	* @buffer: A buffer to place the resulting string in
640	* @buf_size: The length of the buffer.
641	* @format: The printf()-style format to use for the
642	* code to use for converting.
643	* @d: The #gdouble to convert
644	*
645	* Converts a #gdouble to a string, using the '.' as
646	* decimal point. To format the number you pass in
647	* a printf()-style format string. Allowed conversion
648	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
649	*
650	* 'Z' is the same as 'g', except it always has a decimal and
651	* at least one digit after the decimal.
652	*
653	* Return value: The pointer to the buffer with the converted string.
654	* On failure returns NULL but does not set any Python exception.
655	**/
656	char *
657	_PyOS_ascii_formatd(char *buffer,
658	size_t buf_size,
659	const char *format,
660	double d,
661	int precision)
662	{
663	char format_char;
664	size_t format_len = strlen(format);
665
666	/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
667	also with at least one character past the decimal. */
668	char tmp_format[FLOAT_FORMATBUFLEN];
669
670	/* The last character in the format string must be the format char */
671	format_char = format[format_len - 1];
672
673	if (format[0] != '%')
674	return NULL;
675
676	/* I'm not sure why this test is here. It's ensuring that the format
677	string after the first character doesn't have a single quote, a
678	lowercase l, or a percent. This is the reverse of the commented-out
679	test about 10 lines ago. */
680	if (strpbrk(format + 1, "'l%"))
681	return NULL;
682
683	/* Also curious about this function is that it accepts format strings
684	like "%xg", which are invalid for floats. In general, the
685	interface to this function is not very good, but changing it is
686	difficult because it's a public API. */
687
688	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
689	format_char == 'f' \|\| format_char == 'F' \|\|
690	format_char == 'g' \|\| format_char == 'G' \|\|
691	format_char == 'Z'))
692	return NULL;
693
694	/* Map 'Z' format_char to 'g', by copying the format string and
695	replacing the final char with a 'g' */
696	if (format_char == 'Z') {
697	if (format_len + 1 >= sizeof(tmp_format)) {
698	/* The format won't fit in our copy. Error out. In
699	practice, this will never happen and will be
700	detected by returning NULL */
701	return NULL;
702	}
703	strcpy(tmp_format, format);
704	tmp_format[format_len - 1] = 'g';
705	format = tmp_format;
706	}
707
708
709	/* Have PyOS_snprintf do the hard work */
710	PyOS_snprintf(buffer, buf_size, format, d);
711
712	/* Do various fixups on the return string */
713
714	/* Get the current locale, and find the decimal point string.
715	Convert that string back to a dot. */
716	change_decimal_from_locale_to_dot(buffer);
717
718	/* If an exponent exists, ensure that the exponent is at least
719	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
720	for the extra zeros. Also, if there are more than
721	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
722	back to MIN_EXPONENT_DIGITS */
723	ensure_minimum_exponent_length(buffer, buf_size);
724
725	/* If format_char is 'Z', make sure we have at least one character
726	after the decimal point (and make sure we have a decimal point);
727	also switch to exponential notation in some edge cases where the
728	extra character would produce more significant digits that we
729	really want. */
730	if (format_char == 'Z')
731	buffer = ensure_decimal_point(buffer, buf_size, precision);
732
733	return buffer;
734	}
735
736	char *
737	PyOS_ascii_formatd(char *buffer,
738	size_t buf_size,
739	const char *format,
740	double d)
741	{
742	if (PyErr_WarnEx(PyExc_DeprecationWarning,
743	"PyOS_ascii_formatd is deprecated, "
744	"use PyOS_double_to_string instead", 1) < 0)
745	return NULL;
746
747	return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
748	}
749
750	#ifdef PY_NO_SHORT_FLOAT_REPR
751
752	/* The fallback code to use if _Py_dg_dtoa is not available. */
753
754	PyAPI_FUNC(char *) PyOS_double_to_string(double val,
755	char format_code,
756	int precision,
757	int flags,
758	int *type)
759	{
760	char format[32];
761	Py_ssize_t bufsize;
762	char *buf;
763	int t, exp;
764	int upper = 0;
765
766	/* Validate format_code, and map upper and lower case */
767	switch (format_code) {
768	case 'e': /* exponent */
769	case 'f': /* fixed */
770	case 'g': /* general */
771	break;
772	case 'E':
773	upper = 1;
774	format_code = 'e';
775	break;
776	case 'F':
777	upper = 1;
778	format_code = 'f';
779	break;
780	case 'G':
781	upper = 1;
782	format_code = 'g';
783	break;
784	case 'r': /* repr format */
785	/* Supplied precision is unused, must be 0. */
786	if (precision != 0) {
787	PyErr_BadInternalCall();
788	return NULL;
789	}
790	/* The repr() precision (17 significant decimal digits) is the
791	minimal number that is guaranteed to have enough precision
792	so that if the number is read back in the exact same binary
793	value is recreated. This is true for IEEE floating point
794	by design, and also happens to work for all other modern
795	hardware. */
796	precision = 17;
797	format_code = 'g';
798	break;
799	default:
800	PyErr_BadInternalCall();
801	return NULL;
802	}
803
804	/* Here's a quick-and-dirty calculation to figure out how big a buffer
805	we need. In general, for a finite float we need:
806
807	1 byte for each digit of the decimal significand, and
808
809	1 for a possible sign
810	1 for a possible decimal point
811	2 for a possible [eE][+-]
812	1 for each digit of the exponent; if we allow 19 digits
813	total then we're safe up to exponents of 2**63.
814	1 for the trailing nul byte
815
816	This gives a total of 24 + the number of digits in the significand,
817	and the number of digits in the significand is:
818
819	for 'g' format: at most precision, except possibly
820	when precision == 0, when it's 1.
821	for 'e' format: precision+1
822	for 'f' format: precision digits after the point, at least 1
823	before. To figure out how many digits appear before the point
824	we have to examine the size of the number. If fabs(val) < 1.0
825	then there will be only one digit before the point. If
826	fabs(val) >= 1.0, then there are at most
827
828	1+floor(log10(ceiling(fabs(val))))
829
830	digits before the point (where the 'ceiling' allows for the
831	possibility that the rounding rounds the integer part of val
832	up). A safe upper bound for the above quantity is
833	1+floor(exp/3), where exp is the unique integer such that 0.5
834	<= fabs(val)/2**exp < 1.0. This exp can be obtained from
835	frexp.
836
837	So we allow room for precision+1 digits for all formats, plus an
838	extra floor(exp/3) digits for 'f' format.
839
840	*/
841
842	if (Py_IS_NAN(val) \|\| Py_IS_INFINITY(val))
843	/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
844	bufsize = 5;
845	else {
846	bufsize = 25 + precision;
847	if (format_code == 'f' && fabs(val) >= 1.0) {
848	frexp(val, &exp);
849	bufsize += exp/3;
850	}
851	}
852
853	buf = PyMem_Malloc(bufsize);
854	if (buf == NULL) {
855	PyErr_NoMemory();
856	return NULL;
857	}
858
859	/* Handle nan and inf. */
860	if (Py_IS_NAN(val)) {
861	strcpy(buf, "nan");
862	t = Py_DTST_NAN;
863	} else if (Py_IS_INFINITY(val)) {
864	if (copysign(1., val) == 1.)
865	strcpy(buf, "inf");
866	else
867	strcpy(buf, "-inf");
868	t = Py_DTST_INFINITE;
869	} else {
870	t = Py_DTST_FINITE;
871	if (flags & Py_DTSF_ADD_DOT_0)
872	format_code = 'Z';
873
874	PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
875	(flags & Py_DTSF_ALT ? "#" : ""), precision,
876	format_code);
877	_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
878	}
879
880	/* Add sign when requested. It's convenient (esp. when formatting
881	complex numbers) to include a sign even for inf and nan. */
882	if (flags & Py_DTSF_SIGN && buf[0] != '-') {
883	size_t len = strlen(buf);
884	/* the bufsize calculations above should ensure that we've got
885	space to add a sign */
886	assert((size_t)bufsize >= len+2);
887	memmove(buf+1, buf, len+1);
888	buf[0] = '+';
889	}
890	if (upper) {
891	/* Convert to upper case. */
892	char *p1;
893	for (p1 = buf; *p1; p1++)
894	p1 = Py_TOUPPER(p1);
895	}
896
897	if (type)
898	*type = t;
899	return buf;
900	}
901
902	#else
903
904	/* _Py_dg_dtoa is available. */
905
906	/* I'm using a lookup table here so that I don't have to invent a non-locale
907	specific way to convert to uppercase */
908	#define OFS_INF 0
909	#define OFS_NAN 1
910	#define OFS_E 2
911
912	/* The lengths of these are known to the code below, so don't change them */
913	static char *lc_float_strings[] = {
914	"inf",
915	"nan",
916	"e",
917	};
918	static char *uc_float_strings[] = {
919	"INF",
920	"NAN",
921	"E",
922	};
923
924
925	/* Convert a double d to a string, and return a PyMem_Malloc'd block of
926	memory contain the resulting string.
927
928	Arguments:
929	d is the double to be converted
930	format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
931	correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
932	mode is one of '0', '2' or '3', and is completely determined by
933	format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
934	precision is the desired precision
935	always_add_sign is nonzero if a '+' sign should be included for positive
936	numbers
937	add_dot_0_if_integer is nonzero if integers in non-exponential form
938	should have ".0" added. Only applies to format codes 'r' and 'g'.
939	use_alt_formatting is nonzero if alternative formatting should be
940	used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
941	at most one of use_alt_formatting and add_dot_0_if_integer should
942	be nonzero.
943	type, if non-NULL, will be set to one of these constants to identify
944	the type of the 'd' argument:
945	Py_DTST_FINITE
946	Py_DTST_INFINITE
947	Py_DTST_NAN
948
949	Returns a PyMem_Malloc'd block of memory containing the resulting string,
950	or NULL on error. If NULL is returned, the Python error has been set.
951	*/
952
953	static char *
954	format_float_short(double d, char format_code,
955	int mode, Py_ssize_t precision,
956	int always_add_sign, int add_dot_0_if_integer,
957	int use_alt_formatting, char *float_strings, int type)
958	{
959	char *buf = NULL;
960	char *p = NULL;
961	Py_ssize_t bufsize = 0;
962	char digits, digits_end;
963	int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
964	Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
965	_Py_SET_53BIT_PRECISION_HEADER;
966
967	/* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
968	Must be matched by a call to _Py_dg_freedtoa. */
969	_Py_SET_53BIT_PRECISION_START;
970	digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
971	&digits_end);
972	_Py_SET_53BIT_PRECISION_END;
973
974	decpt = (Py_ssize_t)decpt_as_int;
975	if (digits == NULL) {
976	/* The only failure mode is no memory. */
977	PyErr_NoMemory();
978	goto exit;
979	}
980	assert(digits_end != NULL && digits_end >= digits);
981	digits_len = digits_end - digits;
982
983	if (digits_len && !Py_ISDIGIT(digits[0])) {
984	/* Infinities and nans here; adapt Gay's output,
985	so convert Infinity to inf and NaN to nan, and
986	ignore sign of nan. Then return. */
987
988	/* ignore the actual sign of a nan */
989	if (digits[0] == 'n' \|\| digits[0] == 'N')
990	sign = 0;
991
992	/* We only need 5 bytes to hold the result "+inf\0" . */
993	bufsize = 5; /* Used later in an assert. */
994	buf = (char *)PyMem_Malloc(bufsize);
995	if (buf == NULL) {
996	PyErr_NoMemory();
997	goto exit;
998	}
999	p = buf;
1000
1001	if (sign == 1) {
1002	*p++ = '-';
1003	}
1004	else if (always_add_sign) {
1005	*p++ = '+';
1006	}
1007	if (digits[0] == 'i' \|\| digits[0] == 'I') {
1008	strncpy(p, float_strings[OFS_INF], 3);
1009	p += 3;
1010
1011	if (type)
1012	*type = Py_DTST_INFINITE;
1013	}
1014	else if (digits[0] == 'n' \|\| digits[0] == 'N') {
1015	strncpy(p, float_strings[OFS_NAN], 3);
1016	p += 3;
1017
1018	if (type)
1019	*type = Py_DTST_NAN;
1020	}
1021	else {
1022	/* shouldn't get here: Gay's code should always return
1023	something starting with a digit, an 'I', or 'N' */
1024	strncpy(p, "ERR", 3);
1025	p += 3;
1026	assert(0);
1027	}
1028	goto exit;
1029	}
1030
1031	/* The result must be finite (not inf or nan). */
1032	if (type)
1033	*type = Py_DTST_FINITE;
1034
1035
1036	/* We got digits back, format them. We may need to pad 'digits'
1037	either on the left or right (or both) with extra zeros, so in
1038	general the resulting string has the form
1039
1040	[<sign>]<zeros><digits><zeros>[<exponent>]
1041
1042	where either of the <zeros> pieces could be empty, and there's a
1043	decimal point that could appear either in <digits> or in the
1044	leading or trailing <zeros>.
1045
1046	Imagine an infinite 'virtual' string vdigits, consisting of the
1047	string 'digits' (starting at index 0) padded on both the left and
1048	right with infinite strings of zeros. We want to output a slice
1049
1050	vdigits[vdigits_start : vdigits_end]
1051
1052	of this virtual string. Thus if vdigits_start < 0 then we'll end
1053	up producing some leading zeros; if vdigits_end > digits_len there
1054	will be trailing zeros in the output. The next section of code
1055	determines whether to use an exponent or not, figures out the
1056	position 'decpt' of the decimal point, and computes 'vdigits_start'
1057	and 'vdigits_end'. */
1058	vdigits_end = digits_len;
1059	switch (format_code) {
1060	case 'e':
1061	use_exp = 1;
1062	vdigits_end = precision;
1063	break;
1064	case 'f':
1065	vdigits_end = decpt + precision;
1066	break;
1067	case 'g':
1068	if (decpt <= -4 \|\| decpt >
1069	(add_dot_0_if_integer ? precision-1 : precision))
1070	use_exp = 1;
1071	if (use_alt_formatting)
1072	vdigits_end = precision;
1073	break;
1074	case 'r':
1075	/* convert to exponential format at 1e16. We used to convert
1076	at 1e17, but that gives odd-looking results for some values
1077	when a 16-digit 'shortest' repr is padded with bogus zeros.
1078	For example, repr(2e16+8) would give 20000000000000010.0;
1079	the true value is 20000000000000008.0. */
1080	if (decpt <= -4 \|\| decpt > 16)
1081	use_exp = 1;
1082	break;
1083	default:
1084	PyErr_BadInternalCall();
1085	goto exit;
1086	}
1087
1088	/* if using an exponent, reset decimal point position to 1 and adjust
1089	exponent accordingly.*/
1090	if (use_exp) {
1091	exp = decpt - 1;
1092	decpt = 1;
1093	}
1094	/* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1095	decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1096	vdigits_start = decpt <= 0 ? decpt-1 : 0;
1097	if (!use_exp && add_dot_0_if_integer)
1098	vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1099	else
1100	vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1101
1102	/* double check inequalities */
1103	assert(vdigits_start <= 0 &&
1104	0 <= digits_len &&
1105	digits_len <= vdigits_end);
1106	/* decimal point should be in (vdigits_start, vdigits_end] */
1107	assert(vdigits_start < decpt && decpt <= vdigits_end);
1108
1109	/* Compute an upper bound how much memory we need. This might be a few
1110	chars too long, but no big deal. */
1111	bufsize =
1112	/* sign, decimal point and trailing 0 byte */
1113	3 +
1114
1115	/* total digit count (including zero padding on both sides) */
1116	(vdigits_end - vdigits_start) +
1117
1118	/* exponent "e+100", max 3 numerical digits */
1119	(use_exp ? 5 : 0);
1120
1121	/* Now allocate the memory and initialize p to point to the start of
1122	it. */
1123	buf = (char *)PyMem_Malloc(bufsize);
1124	if (buf == NULL) {
1125	PyErr_NoMemory();
1126	goto exit;
1127	}
1128	p = buf;
1129
1130	/* Add a negative sign if negative, and a plus sign if non-negative
1131	and always_add_sign is true. */
1132	if (sign == 1)
1133	*p++ = '-';
1134	else if (always_add_sign)
1135	*p++ = '+';
1136
1137	/* note that exactly one of the three 'if' conditions is true,
1138	so we include exactly one decimal point */
1139	/* Zero padding on left of digit string */
1140	if (decpt <= 0) {
1141	memset(p, '0', decpt-vdigits_start);
1142	p += decpt - vdigits_start;
1143	*p++ = '.';
1144	memset(p, '0', 0-decpt);
1145	p += 0-decpt;
1146	}
1147	else {
1148	memset(p, '0', 0-vdigits_start);
1149	p += 0 - vdigits_start;
1150	}
1151
1152	/* Digits, with included decimal point */
1153	if (0 < decpt && decpt <= digits_len) {
1154	strncpy(p, digits, decpt-0);
1155	p += decpt-0;
1156	*p++ = '.';
1157	strncpy(p, digits+decpt, digits_len-decpt);
1158	p += digits_len-decpt;
1159	}
1160	else {
1161	strncpy(p, digits, digits_len);
1162	p += digits_len;
1163	}
1164
1165	/* And zeros on the right */
1166	if (digits_len < decpt) {
1167	memset(p, '0', decpt-digits_len);
1168	p += decpt-digits_len;
1169	*p++ = '.';
1170	memset(p, '0', vdigits_end-decpt);
1171	p += vdigits_end-decpt;
1172	}
1173	else {
1174	memset(p, '0', vdigits_end-digits_len);
1175	p += vdigits_end-digits_len;
1176	}
1177
1178	/* Delete a trailing decimal pt unless using alternative formatting. */
1179	if (p[-1] == '.' && !use_alt_formatting)
1180	p--;
1181
1182	/* Now that we've done zero padding, add an exponent if needed. */
1183	if (use_exp) {
1184	*p++ = float_strings[OFS_E][0];
1185	exp_len = sprintf(p, "%+.02d", exp);
1186	p += exp_len;
1187	}
1188	exit:
1189	if (buf) {
1190	*p = '\0';
1191	/* It's too late if this fails, as we've already stepped on
1192	memory that isn't ours. But it's an okay debugging test. */
1193	assert(p-buf < bufsize);
1194	}
1195	if (digits)
1196	_Py_dg_freedtoa(digits);
1197
1198	return buf;
1199	}
1200
1201
1202	PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1203	char format_code,
1204	int precision,
1205	int flags,
1206	int *type)
1207	{
1208	char **float_strings = lc_float_strings;
1209	int mode;
1210
1211	/* Validate format_code, and map upper and lower case. Compute the
1212	mode and make any adjustments as needed. */
1213	switch (format_code) {
1214	/* exponent */
1215	case 'E':
1216	float_strings = uc_float_strings;
1217	format_code = 'e';
1218	/* Fall through. */
1219	case 'e':
1220	mode = 2;
1221	precision++;
1222	break;
1223
1224	/* fixed */
1225	case 'F':
1226	float_strings = uc_float_strings;
1227	format_code = 'f';
1228	/* Fall through. */
1229	case 'f':
1230	mode = 3;
1231	break;
1232
1233	/* general */
1234	case 'G':
1235	float_strings = uc_float_strings;
1236	format_code = 'g';
1237	/* Fall through. */
1238	case 'g':
1239	mode = 2;
1240	/* precision 0 makes no sense for 'g' format; interpret as 1 */
1241	if (precision == 0)
1242	precision = 1;
1243	break;
1244
1245	/* repr format */
1246	case 'r':
1247	mode = 0;
1248	/* Supplied precision is unused, must be 0. */
1249	if (precision != 0) {
1250	PyErr_BadInternalCall();
1251	return NULL;
1252	}
1253	break;
1254
1255	default:
1256	PyErr_BadInternalCall();
1257	return NULL;
1258	}
1259
1260	return format_float_short(val, format_code, mode, precision,
1261	flags & Py_DTSF_SIGN,
1262	flags & Py_DTSF_ADD_DOT_0,
1263	flags & Py_DTSF_ALT,
1264	float_strings, type);
1265	}
1266	#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Python/pystrtod.c@ 394

Download in other formats: