Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

pystrtod.c

Last change on this file was 388, checked in by dmik, 11 years ago
python: Update vendor to 2.7.6.
Property svn:eol-style set to `native`
File size: 39.0 KB

Line
1	/* -- Mode: C; c-file-style: "python" -- */
2
3	#include <Python.h>
4	#include <locale.h>
5
6	/* Case-insensitive string match used for nan and inf detection; t should be
7	lower-case. Returns 1 for a successful match, 0 otherwise. */
8
9	static int
10	case_insensitive_match(const char s, const char t)
11	{
12	while(t && Py_TOLOWER(s) == *t) {
13	s++;
14	t++;
15	}
16	return *t ? 0 : 1;
17	}
18
19	/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20	"infinity", with an optional leading sign of "+" or "-". On success,
21	return the NaN or Infinity as a double and set *endptr to point just beyond
22	the successfully parsed portion of the string. On failure, return -1.0 and
23	set endptr to point to the start of the string. /
24
25	double
26	_Py_parse_inf_or_nan(const char p, char *endptr)
27	{
28	double retval;
29	const char *s;
30	int negate = 0;
31
32	s = p;
33	if (*s == '-') {
34	negate = 1;
35	s++;
36	}
37	else if (*s == '+') {
38	s++;
39	}
40	if (case_insensitive_match(s, "inf")) {
41	s += 3;
42	if (case_insensitive_match(s, "inity"))
43	s += 5;
44	retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
45	}
46	#ifdef Py_NAN
47	else if (case_insensitive_match(s, "nan")) {
48	s += 3;
49	retval = negate ? -Py_NAN : Py_NAN;
50	}
51	#endif
52	else {
53	s = p;
54	retval = -1.0;
55	}
56	endptr = (char )s;
57	return retval;
58	}
59
60	/**
61	* PyOS_ascii_strtod:
62	* @nptr: the string to convert to a numeric value.
63	* @endptr: if non-%NULL, it returns the character after
64	* the last character used in the conversion.
65	*
66	* Converts a string to a #gdouble value.
67	* This function behaves like the standard strtod() function
68	* does in the C locale. It does this without actually
69	* changing the current locale, since that would not be
70	* thread-safe.
71	*
72	* This function is typically used when reading configuration
73	* files or other non-user input that should be locale independent.
74	* To handle input from the user you should normally use the
75	* locale-sensitive system strtod() function.
76	*
77	* If the correct value would cause overflow, plus or minus %HUGE_VAL
78	* is returned (according to the sign of the value), and %ERANGE is
79	* stored in %errno. If the correct value would cause underflow,
80	* zero is returned and %ERANGE is stored in %errno.
81	* If memory allocation fails, %ENOMEM is stored in %errno.
82	*
83	* This function resets %errno before calling strtod() so that
84	* you can reliably detect overflow and underflow.
85	*
86	* Return value: the #gdouble value.
87	**/
88
89	#ifndef PY_NO_SHORT_FLOAT_REPR
90
91	double
92	_PyOS_ascii_strtod(const char nptr, char *endptr)
93	{
94	double result;
95	_Py_SET_53BIT_PRECISION_HEADER;
96
97	assert(nptr != NULL);
98	/* Set errno to zero, so that we can distinguish zero results
99	and underflows */
100	errno = 0;
101
102	_Py_SET_53BIT_PRECISION_START;
103	result = _Py_dg_strtod(nptr, endptr);
104	_Py_SET_53BIT_PRECISION_END;
105
106	if (*endptr == nptr)
107	/* string might represent an inf or nan */
108	result = _Py_parse_inf_or_nan(nptr, endptr);
109
110	return result;
111
112	}
113
114	#else
115
116	/*
117	Use system strtod; since strtod is locale aware, we may
118	have to first fix the decimal separator.
119
120	Note that unlike _Py_dg_strtod, the system strtod may not always give
121	correctly rounded results.
122	*/
123
124	double
125	_PyOS_ascii_strtod(const char nptr, char *endptr)
126	{
127	char *fail_pos;
128	double val = -1.0;
129	struct lconv *locale_data;
130	const char *decimal_point;
131	size_t decimal_point_len;
132	const char p, decimal_point_pos;
133	const char end = NULL; / Silence gcc */
134	const char *digits_pos = NULL;
135	int negate = 0;
136
137	assert(nptr != NULL);
138
139	fail_pos = NULL;
140
141	locale_data = localeconv();
142	decimal_point = locale_data->decimal_point;
143	decimal_point_len = strlen(decimal_point);
144
145	assert(decimal_point_len != 0);
146
147	decimal_point_pos = NULL;
148
149	/* Parse infinities and nans */
150	val = _Py_parse_inf_or_nan(nptr, endptr);
151	if (*endptr != nptr)
152	return val;
153
154	/* Set errno to zero, so that we can distinguish zero results
155	and underflows */
156	errno = 0;
157
158	/* We process the optional sign manually, then pass the remainder to
159	the system strtod. This ensures that the result of an underflow
160	has the correct sign. (bug #1725) */
161	p = nptr;
162	/* Process leading sign, if present */
163	if (*p == '-') {
164	negate = 1;
165	p++;
166	}
167	else if (*p == '+') {
168	p++;
169	}
170
171	/* Some platform strtods accept hex floats; Python shouldn't (at the
172	moment), so we check explicitly for strings starting with '0x'. */
173	if (p == '0' && ((p+1) == 'x' \|\| *(p+1) == 'X'))
174	goto invalid_string;
175
176	/* Check that what's left begins with a digit or decimal point */
177	if (!Py_ISDIGIT(p) && p != '.')
178	goto invalid_string;
179
180	digits_pos = p;
181	if (decimal_point[0] != '.' \|\|
182	decimal_point[1] != 0)
183	{
184	/* Look for a '.' in the input; if present, it'll need to be
185	swapped for the current locale's decimal point before we
186	call strtod. On the other hand, if we find the current
187	locale's decimal point then the input is invalid. */
188	while (Py_ISDIGIT(*p))
189	p++;
190
191	if (*p == '.')
192	{
193	decimal_point_pos = p++;
194
195	/* locate end of number */
196	while (Py_ISDIGIT(*p))
197	p++;
198
199	if (p == 'e' \|\| p == 'E')
200	p++;
201	if (p == '+' \|\| p == '-')
202	p++;
203	while (Py_ISDIGIT(*p))
204	p++;
205	end = p;
206	}
207	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
208	/* Python bug #1417699 */
209	goto invalid_string;
210	/* For the other cases, we need not convert the decimal
211	point */
212	}
213
214	if (decimal_point_pos) {
215	char copy, c;
216	/* Create a copy of the input, with the '.' converted to the
217	locale-specific decimal point */
218	copy = (char *)PyMem_MALLOC(end - digits_pos +
219	1 + decimal_point_len);
220	if (copy == NULL) {
221	endptr = (char )nptr;
222	errno = ENOMEM;
223	return val;
224	}
225
226	c = copy;
227	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
228	c += decimal_point_pos - digits_pos;
229	memcpy(c, decimal_point, decimal_point_len);
230	c += decimal_point_len;
231	memcpy(c, decimal_point_pos + 1,
232	end - (decimal_point_pos + 1));
233	c += end - (decimal_point_pos + 1);
234	*c = 0;
235
236	val = strtod(copy, &fail_pos);
237
238	if (fail_pos)
239	{
240	if (fail_pos > decimal_point_pos)
241	fail_pos = (char *)digits_pos +
242	(fail_pos - copy) -
243	(decimal_point_len - 1);
244	else
245	fail_pos = (char *)digits_pos +
246	(fail_pos - copy);
247	}
248
249	PyMem_FREE(copy);
250
251	}
252	else {
253	val = strtod(digits_pos, &fail_pos);
254	}
255
256	if (fail_pos == digits_pos)
257	goto invalid_string;
258
259	if (negate && fail_pos != nptr)
260	val = -val;
261	*endptr = fail_pos;
262
263	return val;
264
265	invalid_string:
266	endptr = (char)nptr;
267	errno = EINVAL;
268	return -1.0;
269	}
270
271	#endif
272
273	/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
274
275	double
276	PyOS_ascii_strtod(const char nptr, char *endptr)
277	{
278	char *fail_pos;
279	const char *p;
280	double x;
281
282	if (PyErr_WarnEx(PyExc_DeprecationWarning,
283	"PyOS_ascii_strtod and PyOS_ascii_atof are "
284	"deprecated. Use PyOS_string_to_double "
285	"instead.", 1) < 0)
286	return -1.0;
287
288	/* _PyOS_ascii_strtod already does everything that we want,
289	except that it doesn't parse leading whitespace */
290	p = nptr;
291	while (Py_ISSPACE(*p))
292	p++;
293	x = _PyOS_ascii_strtod(p, &fail_pos);
294	if (fail_pos == p)
295	fail_pos = (char *)nptr;
296	if (endptr)
297	endptr = (char )fail_pos;
298	return x;
299	}
300
301	/* PyOS_ascii_strtod is DEPRECATED in Python 2.7 and 3.1 */
302
303	double
304	PyOS_ascii_atof(const char *nptr)
305	{
306	return PyOS_ascii_strtod(nptr, NULL);
307	}
308
309	/* PyOS_string_to_double is the recommended replacement for the deprecated
310	PyOS_ascii_strtod and PyOS_ascii_atof functions. It converts a
311	null-terminated byte string s (interpreted as a string of ASCII characters)
312	to a float. The string should not have leading or trailing whitespace (in
313	contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
314	whitespace). The conversion is independent of the current locale.
315
316	If endptr is NULL, try to convert the whole string. Raise ValueError and
317	return -1.0 if the string is not a valid representation of a floating-point
318	number.
319
320	If endptr is non-NULL, try to convert as much of the string as possible.
321	If no initial segment of the string is the valid representation of a
322	floating-point number then *endptr is set to point to the beginning of the
323	string, -1.0 is returned and again ValueError is raised.
324
325	On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
326	if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
327	exception is raised. Otherwise, overflow_exception should point to a
328	a Python exception, this exception will be raised, -1.0 will be returned,
329	and *endptr will point just past the end of the converted value.
330
331	If any other failure occurs (for example lack of memory), -1.0 is returned
332	and the appropriate Python exception will have been set.
333	*/
334
335	double
336	PyOS_string_to_double(const char *s,
337	char **endptr,
338	PyObject *overflow_exception)
339	{
340	double x, result=-1.0;
341	char *fail_pos;
342
343	errno = 0;
344	PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
345	x = _PyOS_ascii_strtod(s, &fail_pos);
346	PyFPE_END_PROTECT(x)
347
348	if (errno == ENOMEM) {
349	PyErr_NoMemory();
350	fail_pos = (char *)s;
351	}
352	else if (!endptr && (fail_pos == s \|\| *fail_pos != '\0'))
353	PyErr_Format(PyExc_ValueError,
354	"could not convert string to float: "
355	"%.200s", s);
356	else if (fail_pos == s)
357	PyErr_Format(PyExc_ValueError,
358	"could not convert string to float: "
359	"%.200s", s);
360	else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
361	PyErr_Format(overflow_exception,
362	"value too large to convert to float: "
363	"%.200s", s);
364	else
365	result = x;
366
367	if (endptr != NULL)
368	*endptr = fail_pos;
369	return result;
370	}
371
372	/* Given a string that may have a decimal point in the current
373	locale, change it back to a dot. Since the string cannot get
374	longer, no need for a maximum buffer size parameter. */
375	Py_LOCAL_INLINE(void)
376	change_decimal_from_locale_to_dot(char* buffer)
377	{
378	struct lconv *locale_data = localeconv();
379	const char *decimal_point = locale_data->decimal_point;
380
381	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
382	size_t decimal_point_len = strlen(decimal_point);
383
384	if (buffer == '+' \|\| buffer == '-')
385	buffer++;
386	while (Py_ISDIGIT(*buffer))
387	buffer++;
388	if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
389	*buffer = '.';
390	buffer++;
391	if (decimal_point_len > 1) {
392	/* buffer needs to get smaller */
393	size_t rest_len = strlen(buffer +
394	(decimal_point_len - 1));
395	memmove(buffer,
396	buffer + (decimal_point_len - 1),
397	rest_len);
398	buffer[rest_len] = 0;
399	}
400	}
401	}
402	}
403
404
405	/* From the C99 standard, section 7.19.6:
406	The exponent always contains at least two digits, and only as many more digits
407	as necessary to represent the exponent.
408	*/
409	#define MIN_EXPONENT_DIGITS 2
410
411	/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
412	in length. */
413	Py_LOCAL_INLINE(void)
414	ensure_minimum_exponent_length(char* buffer, size_t buf_size)
415	{
416	char *p = strpbrk(buffer, "eE");
417	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
418	char *start = p + 2;
419	int exponent_digit_cnt = 0;
420	int leading_zero_cnt = 0;
421	int in_leading_zeros = 1;
422	int significant_digit_cnt;
423
424	/* Skip over the exponent and the sign. */
425	p += 2;
426
427	/* Find the end of the exponent, keeping track of leading
428	zeros. */
429	while (p && Py_ISDIGIT(p)) {
430	if (in_leading_zeros && *p == '0')
431	++leading_zero_cnt;
432	if (*p != '0')
433	in_leading_zeros = 0;
434	++p;
435	++exponent_digit_cnt;
436	}
437
438	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
439	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
440	/* If there are 2 exactly digits, we're done,
441	regardless of what they contain */
442	}
443	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
444	int extra_zeros_cnt;
445
446	/* There are more than 2 digits in the exponent. See
447	if we can delete some of the leading zeros */
448	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
449	significant_digit_cnt = MIN_EXPONENT_DIGITS;
450	extra_zeros_cnt = exponent_digit_cnt -
451	significant_digit_cnt;
452
453	/* Delete extra_zeros_cnt worth of characters from the
454	front of the exponent */
455	assert(extra_zeros_cnt >= 0);
456
457	/* Add one to significant_digit_cnt to copy the
458	trailing 0 byte, thus setting the length */
459	memmove(start,
460	start + extra_zeros_cnt,
461	significant_digit_cnt + 1);
462	}
463	else {
464	/* If there are fewer than 2 digits, add zeros
465	until there are 2, if there's enough room */
466	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
467	if (start + zeros + exponent_digit_cnt + 1
468	< buffer + buf_size) {
469	memmove(start + zeros, start,
470	exponent_digit_cnt + 1);
471	memset(start, '0', zeros);
472	}
473	}
474	}
475	}
476
477	/* Remove trailing zeros after the decimal point from a numeric string; also
478	remove the decimal point if all digits following it are zero. The numeric
479	string must end in '\0', and should not have any leading or trailing
480	whitespace. Assumes that the decimal point is '.'. */
481	Py_LOCAL_INLINE(void)
482	remove_trailing_zeros(char *buffer)
483	{
484	char old_fraction_end, new_fraction_end, end, p;
485
486	p = buffer;
487	if (p == '-' \|\| p == '+')
488	/* Skip leading sign, if present */
489	++p;
490	while (Py_ISDIGIT(*p))
491	++p;
492
493	/* if there's no decimal point there's nothing to do */
494	if (*p++ != '.')
495	return;
496
497	/* scan any digits after the point */
498	while (Py_ISDIGIT(*p))
499	++p;
500	old_fraction_end = p;
501
502	/* scan up to ending '\0' */
503	while (*p != '\0')
504	p++;
505	/* +1 to make sure that we move the null byte as well */
506	end = p+1;
507
508	/* scan back from fraction_end, looking for removable zeros */
509	p = old_fraction_end;
510	while (*(p-1) == '0')
511	--p;
512	/* and remove point if we've got that far */
513	if (*(p-1) == '.')
514	--p;
515	new_fraction_end = p;
516
517	memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
518	}
519
520	/* Ensure that buffer has a decimal point in it. The decimal point will not
521	be in the current locale, it will always be '.'. Don't add a decimal point
522	if an exponent is present. Also, convert to exponential notation where
523	adding a '.0' would produce too many significant digits (see issue 5864).
524
525	Returns a pointer to the fixed buffer, or NULL on failure.
526	*/
527	Py_LOCAL_INLINE(char *)
528	ensure_decimal_point(char* buffer, size_t buf_size, int precision)
529	{
530	int digit_count, insert_count = 0, convert_to_exp = 0;
531	char chars_to_insert, digits_start;
532
533	/* search for the first non-digit character */
534	char *p = buffer;
535	if (p == '-' \|\| p == '+')
536	/* Skip leading sign, if present. I think this could only
537	ever be '-', but it can't hurt to check for both. */
538	++p;
539	digits_start = p;
540	while (p && Py_ISDIGIT(p))
541	++p;
542	digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
543
544	if (*p == '.') {
545	if (Py_ISDIGIT(*(p+1))) {
546	/* Nothing to do, we already have a decimal
547	point and a digit after it */
548	}
549	else {
550	/* We have a decimal point, but no following
551	digit. Insert a zero after the decimal. */
552	/* can't ever get here via PyOS_double_to_string */
553	assert(precision == -1);
554	++p;
555	chars_to_insert = "0";
556	insert_count = 1;
557	}
558	}
559	else if (!(p == 'e' \|\| p == 'E')) {
560	/* Don't add ".0" if we have an exponent. */
561	if (digit_count == precision) {
562	/* issue 5864: don't add a trailing .0 in the case
563	where the '%g'-formatted result already has as many
564	significant digits as were requested. Switch to
565	exponential notation instead. */
566	convert_to_exp = 1;
567	/* no exponent, no point, and we shouldn't land here
568	for infs and nans, so we must be at the end of the
569	string. */
570	assert(*p == '\0');
571	}
572	else {
573	assert(precision == -1 \|\| digit_count < precision);
574	chars_to_insert = ".0";
575	insert_count = 2;
576	}
577	}
578	if (insert_count) {
579	size_t buf_len = strlen(buffer);
580	if (buf_len + insert_count + 1 >= buf_size) {
581	/* If there is not enough room in the buffer
582	for the additional text, just skip it. It's
583	not worth generating an error over. */
584	}
585	else {
586	memmove(p + insert_count, p,
587	buffer + strlen(buffer) - p + 1);
588	memcpy(p, chars_to_insert, insert_count);
589	}
590	}
591	if (convert_to_exp) {
592	int written;
593	size_t buf_avail;
594	p = digits_start;
595	/* insert decimal point */
596	assert(digit_count >= 1);
597	memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
598	p[1] = '.';
599	p += digit_count+1;
600	assert(p <= buf_size+buffer);
601	buf_avail = buf_size+buffer-p;
602	if (buf_avail == 0)
603	return NULL;
604	/* Add exponent. It's okay to use lower case 'e': we only
605	arrive here as a result of using the empty format code or
606	repr/str builtins and those never want an upper case 'E' */
607	written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
608	if (!(0 <= written &&
609	written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
610	/* output truncated, or something else bad happened */
611	return NULL;
612	remove_trailing_zeros(buffer);
613	}
614	return buffer;
615	}
616
617	/* see FORMATBUFLEN in unicodeobject.c */
618	#define FLOAT_FORMATBUFLEN 120
619
620	/**
621	* PyOS_ascii_formatd:
622	* @buffer: A buffer to place the resulting string in
623	* @buf_size: The length of the buffer.
624	* @format: The printf()-style format to use for the
625	* code to use for converting.
626	* @d: The #gdouble to convert
627	*
628	* Converts a #gdouble to a string, using the '.' as
629	* decimal point. To format the number you pass in
630	* a printf()-style format string. Allowed conversion
631	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
632	*
633	* 'Z' is the same as 'g', except it always has a decimal and
634	* at least one digit after the decimal.
635	*
636	* Return value: The pointer to the buffer with the converted string.
637	* On failure returns NULL but does not set any Python exception.
638	**/
639	char *
640	_PyOS_ascii_formatd(char *buffer,
641	size_t buf_size,
642	const char *format,
643	double d,
644	int precision)
645	{
646	char format_char;
647	size_t format_len = strlen(format);
648
649	/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
650	also with at least one character past the decimal. */
651	char tmp_format[FLOAT_FORMATBUFLEN];
652
653	/* The last character in the format string must be the format char */
654	format_char = format[format_len - 1];
655
656	if (format[0] != '%')
657	return NULL;
658
659	/* I'm not sure why this test is here. It's ensuring that the format
660	string after the first character doesn't have a single quote, a
661	lowercase l, or a percent. This is the reverse of the commented-out
662	test about 10 lines ago. */
663	if (strpbrk(format + 1, "'l%"))
664	return NULL;
665
666	/* Also curious about this function is that it accepts format strings
667	like "%xg", which are invalid for floats. In general, the
668	interface to this function is not very good, but changing it is
669	difficult because it's a public API. */
670
671	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
672	format_char == 'f' \|\| format_char == 'F' \|\|
673	format_char == 'g' \|\| format_char == 'G' \|\|
674	format_char == 'Z'))
675	return NULL;
676
677	/* Map 'Z' format_char to 'g', by copying the format string and
678	replacing the final char with a 'g' */
679	if (format_char == 'Z') {
680	if (format_len + 1 >= sizeof(tmp_format)) {
681	/* The format won't fit in our copy. Error out. In
682	practice, this will never happen and will be
683	detected by returning NULL */
684	return NULL;
685	}
686	strcpy(tmp_format, format);
687	tmp_format[format_len - 1] = 'g';
688	format = tmp_format;
689	}
690
691
692	/* Have PyOS_snprintf do the hard work */
693	PyOS_snprintf(buffer, buf_size, format, d);
694
695	/* Do various fixups on the return string */
696
697	/* Get the current locale, and find the decimal point string.
698	Convert that string back to a dot. */
699	change_decimal_from_locale_to_dot(buffer);
700
701	/* If an exponent exists, ensure that the exponent is at least
702	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
703	for the extra zeros. Also, if there are more than
704	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
705	back to MIN_EXPONENT_DIGITS */
706	ensure_minimum_exponent_length(buffer, buf_size);
707
708	/* If format_char is 'Z', make sure we have at least one character
709	after the decimal point (and make sure we have a decimal point);
710	also switch to exponential notation in some edge cases where the
711	extra character would produce more significant digits that we
712	really want. */
713	if (format_char == 'Z')
714	buffer = ensure_decimal_point(buffer, buf_size, precision);
715
716	return buffer;
717	}
718
719	char *
720	PyOS_ascii_formatd(char *buffer,
721	size_t buf_size,
722	const char *format,
723	double d)
724	{
725	if (PyErr_WarnEx(PyExc_DeprecationWarning,
726	"PyOS_ascii_formatd is deprecated, "
727	"use PyOS_double_to_string instead", 1) < 0)
728	return NULL;
729
730	return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
731	}
732
733	#ifdef PY_NO_SHORT_FLOAT_REPR
734
735	/* The fallback code to use if _Py_dg_dtoa is not available. */
736
737	PyAPI_FUNC(char *) PyOS_double_to_string(double val,
738	char format_code,
739	int precision,
740	int flags,
741	int *type)
742	{
743	char format[32];
744	Py_ssize_t bufsize;
745	char *buf;
746	int t, exp;
747	int upper = 0;
748
749	/* Validate format_code, and map upper and lower case */
750	switch (format_code) {
751	case 'e': /* exponent */
752	case 'f': /* fixed */
753	case 'g': /* general */
754	break;
755	case 'E':
756	upper = 1;
757	format_code = 'e';
758	break;
759	case 'F':
760	upper = 1;
761	format_code = 'f';
762	break;
763	case 'G':
764	upper = 1;
765	format_code = 'g';
766	break;
767	case 'r': /* repr format */
768	/* Supplied precision is unused, must be 0. */
769	if (precision != 0) {
770	PyErr_BadInternalCall();
771	return NULL;
772	}
773	/* The repr() precision (17 significant decimal digits) is the
774	minimal number that is guaranteed to have enough precision
775	so that if the number is read back in the exact same binary
776	value is recreated. This is true for IEEE floating point
777	by design, and also happens to work for all other modern
778	hardware. */
779	precision = 17;
780	format_code = 'g';
781	break;
782	default:
783	PyErr_BadInternalCall();
784	return NULL;
785	}
786
787	/* Here's a quick-and-dirty calculation to figure out how big a buffer
788	we need. In general, for a finite float we need:
789
790	1 byte for each digit of the decimal significand, and
791
792	1 for a possible sign
793	1 for a possible decimal point
794	2 for a possible [eE][+-]
795	1 for each digit of the exponent; if we allow 19 digits
796	total then we're safe up to exponents of 2**63.
797	1 for the trailing nul byte
798
799	This gives a total of 24 + the number of digits in the significand,
800	and the number of digits in the significand is:
801
802	for 'g' format: at most precision, except possibly
803	when precision == 0, when it's 1.
804	for 'e' format: precision+1
805	for 'f' format: precision digits after the point, at least 1
806	before. To figure out how many digits appear before the point
807	we have to examine the size of the number. If fabs(val) < 1.0
808	then there will be only one digit before the point. If
809	fabs(val) >= 1.0, then there are at most
810
811	1+floor(log10(ceiling(fabs(val))))
812
813	digits before the point (where the 'ceiling' allows for the
814	possibility that the rounding rounds the integer part of val
815	up). A safe upper bound for the above quantity is
816	1+floor(exp/3), where exp is the unique integer such that 0.5
817	<= fabs(val)/2**exp < 1.0. This exp can be obtained from
818	frexp.
819
820	So we allow room for precision+1 digits for all formats, plus an
821	extra floor(exp/3) digits for 'f' format.
822
823	*/
824
825	if (Py_IS_NAN(val) \|\| Py_IS_INFINITY(val))
826	/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
827	bufsize = 5;
828	else {
829	bufsize = 25 + precision;
830	if (format_code == 'f' && fabs(val) >= 1.0) {
831	frexp(val, &exp);
832	bufsize += exp/3;
833	}
834	}
835
836	buf = PyMem_Malloc(bufsize);
837	if (buf == NULL) {
838	PyErr_NoMemory();
839	return NULL;
840	}
841
842	/* Handle nan and inf. */
843	if (Py_IS_NAN(val)) {
844	strcpy(buf, "nan");
845	t = Py_DTST_NAN;
846	} else if (Py_IS_INFINITY(val)) {
847	if (copysign(1., val) == 1.)
848	strcpy(buf, "inf");
849	else
850	strcpy(buf, "-inf");
851	t = Py_DTST_INFINITE;
852	} else {
853	t = Py_DTST_FINITE;
854	if (flags & Py_DTSF_ADD_DOT_0)
855	format_code = 'Z';
856
857	PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
858	(flags & Py_DTSF_ALT ? "#" : ""), precision,
859	format_code);
860	_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
861	}
862
863	/* Add sign when requested. It's convenient (esp. when formatting
864	complex numbers) to include a sign even for inf and nan. */
865	if (flags & Py_DTSF_SIGN && buf[0] != '-') {
866	size_t len = strlen(buf);
867	/* the bufsize calculations above should ensure that we've got
868	space to add a sign */
869	assert((size_t)bufsize >= len+2);
870	memmove(buf+1, buf, len+1);
871	buf[0] = '+';
872	}
873	if (upper) {
874	/* Convert to upper case. */
875	char *p1;
876	for (p1 = buf; *p1; p1++)
877	p1 = Py_TOUPPER(p1);
878	}
879
880	if (type)
881	*type = t;
882	return buf;
883	}
884
885	#else
886
887	/* _Py_dg_dtoa is available. */
888
889	/* I'm using a lookup table here so that I don't have to invent a non-locale
890	specific way to convert to uppercase */
891	#define OFS_INF 0
892	#define OFS_NAN 1
893	#define OFS_E 2
894
895	/* The lengths of these are known to the code below, so don't change them */
896	static char *lc_float_strings[] = {
897	"inf",
898	"nan",
899	"e",
900	};
901	static char *uc_float_strings[] = {
902	"INF",
903	"NAN",
904	"E",
905	};
906
907
908	/* Convert a double d to a string, and return a PyMem_Malloc'd block of
909	memory contain the resulting string.
910
911	Arguments:
912	d is the double to be converted
913	format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
914	correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
915	mode is one of '0', '2' or '3', and is completely determined by
916	format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
917	precision is the desired precision
918	always_add_sign is nonzero if a '+' sign should be included for positive
919	numbers
920	add_dot_0_if_integer is nonzero if integers in non-exponential form
921	should have ".0" added. Only applies to format codes 'r' and 'g'.
922	use_alt_formatting is nonzero if alternative formatting should be
923	used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
924	at most one of use_alt_formatting and add_dot_0_if_integer should
925	be nonzero.
926	type, if non-NULL, will be set to one of these constants to identify
927	the type of the 'd' argument:
928	Py_DTST_FINITE
929	Py_DTST_INFINITE
930	Py_DTST_NAN
931
932	Returns a PyMem_Malloc'd block of memory containing the resulting string,
933	or NULL on error. If NULL is returned, the Python error has been set.
934	*/
935
936	static char *
937	format_float_short(double d, char format_code,
938	int mode, Py_ssize_t precision,
939	int always_add_sign, int add_dot_0_if_integer,
940	int use_alt_formatting, char *float_strings, int type)
941	{
942	char *buf = NULL;
943	char *p = NULL;
944	Py_ssize_t bufsize = 0;
945	char digits, digits_end;
946	int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
947	Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
948	_Py_SET_53BIT_PRECISION_HEADER;
949
950	/* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
951	Must be matched by a call to _Py_dg_freedtoa. */
952	_Py_SET_53BIT_PRECISION_START;
953	digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
954	&digits_end);
955	_Py_SET_53BIT_PRECISION_END;
956
957	decpt = (Py_ssize_t)decpt_as_int;
958	if (digits == NULL) {
959	/* The only failure mode is no memory. */
960	PyErr_NoMemory();
961	goto exit;
962	}
963	assert(digits_end != NULL && digits_end >= digits);
964	digits_len = digits_end - digits;
965
966	if (digits_len && !Py_ISDIGIT(digits[0])) {
967	/* Infinities and nans here; adapt Gay's output,
968	so convert Infinity to inf and NaN to nan, and
969	ignore sign of nan. Then return. */
970
971	/* ignore the actual sign of a nan */
972	if (digits[0] == 'n' \|\| digits[0] == 'N')
973	sign = 0;
974
975	/* We only need 5 bytes to hold the result "+inf\0" . */
976	bufsize = 5; /* Used later in an assert. */
977	buf = (char *)PyMem_Malloc(bufsize);
978	if (buf == NULL) {
979	PyErr_NoMemory();
980	goto exit;
981	}
982	p = buf;
983
984	if (sign == 1) {
985	*p++ = '-';
986	}
987	else if (always_add_sign) {
988	*p++ = '+';
989	}
990	if (digits[0] == 'i' \|\| digits[0] == 'I') {
991	strncpy(p, float_strings[OFS_INF], 3);
992	p += 3;
993
994	if (type)
995	*type = Py_DTST_INFINITE;
996	}
997	else if (digits[0] == 'n' \|\| digits[0] == 'N') {
998	strncpy(p, float_strings[OFS_NAN], 3);
999	p += 3;
1000
1001	if (type)
1002	*type = Py_DTST_NAN;
1003	}
1004	else {
1005	/* shouldn't get here: Gay's code should always return
1006	something starting with a digit, an 'I', or 'N' */
1007	strncpy(p, "ERR", 3);
1008	p += 3;
1009	assert(0);
1010	}
1011	goto exit;
1012	}
1013
1014	/* The result must be finite (not inf or nan). */
1015	if (type)
1016	*type = Py_DTST_FINITE;
1017
1018
1019	/* We got digits back, format them. We may need to pad 'digits'
1020	either on the left or right (or both) with extra zeros, so in
1021	general the resulting string has the form
1022
1023	[<sign>]<zeros><digits><zeros>[<exponent>]
1024
1025	where either of the <zeros> pieces could be empty, and there's a
1026	decimal point that could appear either in <digits> or in the
1027	leading or trailing <zeros>.
1028
1029	Imagine an infinite 'virtual' string vdigits, consisting of the
1030	string 'digits' (starting at index 0) padded on both the left and
1031	right with infinite strings of zeros. We want to output a slice
1032
1033	vdigits[vdigits_start : vdigits_end]
1034
1035	of this virtual string. Thus if vdigits_start < 0 then we'll end
1036	up producing some leading zeros; if vdigits_end > digits_len there
1037	will be trailing zeros in the output. The next section of code
1038	determines whether to use an exponent or not, figures out the
1039	position 'decpt' of the decimal point, and computes 'vdigits_start'
1040	and 'vdigits_end'. */
1041	vdigits_end = digits_len;
1042	switch (format_code) {
1043	case 'e':
1044	use_exp = 1;
1045	vdigits_end = precision;
1046	break;
1047	case 'f':
1048	vdigits_end = decpt + precision;
1049	break;
1050	case 'g':
1051	if (decpt <= -4 \|\| decpt >
1052	(add_dot_0_if_integer ? precision-1 : precision))
1053	use_exp = 1;
1054	if (use_alt_formatting)
1055	vdigits_end = precision;
1056	break;
1057	case 'r':
1058	/* convert to exponential format at 1e16. We used to convert
1059	at 1e17, but that gives odd-looking results for some values
1060	when a 16-digit 'shortest' repr is padded with bogus zeros.
1061	For example, repr(2e16+8) would give 20000000000000010.0;
1062	the true value is 20000000000000008.0. */
1063	if (decpt <= -4 \|\| decpt > 16)
1064	use_exp = 1;
1065	break;
1066	default:
1067	PyErr_BadInternalCall();
1068	goto exit;
1069	}
1070
1071	/* if using an exponent, reset decimal point position to 1 and adjust
1072	exponent accordingly.*/
1073	if (use_exp) {
1074	exp = decpt - 1;
1075	decpt = 1;
1076	}
1077	/* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1078	decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1079	vdigits_start = decpt <= 0 ? decpt-1 : 0;
1080	if (!use_exp && add_dot_0_if_integer)
1081	vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1082	else
1083	vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1084
1085	/* double check inequalities */
1086	assert(vdigits_start <= 0 &&
1087	0 <= digits_len &&
1088	digits_len <= vdigits_end);
1089	/* decimal point should be in (vdigits_start, vdigits_end] */
1090	assert(vdigits_start < decpt && decpt <= vdigits_end);
1091
1092	/* Compute an upper bound how much memory we need. This might be a few
1093	chars too long, but no big deal. */
1094	bufsize =
1095	/* sign, decimal point and trailing 0 byte */
1096	3 +
1097
1098	/* total digit count (including zero padding on both sides) */
1099	(vdigits_end - vdigits_start) +
1100
1101	/* exponent "e+100", max 3 numerical digits */
1102	(use_exp ? 5 : 0);
1103
1104	/* Now allocate the memory and initialize p to point to the start of
1105	it. */
1106	buf = (char *)PyMem_Malloc(bufsize);
1107	if (buf == NULL) {
1108	PyErr_NoMemory();
1109	goto exit;
1110	}
1111	p = buf;
1112
1113	/* Add a negative sign if negative, and a plus sign if non-negative
1114	and always_add_sign is true. */
1115	if (sign == 1)
1116	*p++ = '-';
1117	else if (always_add_sign)
1118	*p++ = '+';
1119
1120	/* note that exactly one of the three 'if' conditions is true,
1121	so we include exactly one decimal point */
1122	/* Zero padding on left of digit string */
1123	if (decpt <= 0) {
1124	memset(p, '0', decpt-vdigits_start);
1125	p += decpt - vdigits_start;
1126	*p++ = '.';
1127	memset(p, '0', 0-decpt);
1128	p += 0-decpt;
1129	}
1130	else {
1131	memset(p, '0', 0-vdigits_start);
1132	p += 0 - vdigits_start;
1133	}
1134
1135	/* Digits, with included decimal point */
1136	if (0 < decpt && decpt <= digits_len) {
1137	strncpy(p, digits, decpt-0);
1138	p += decpt-0;
1139	*p++ = '.';
1140	strncpy(p, digits+decpt, digits_len-decpt);
1141	p += digits_len-decpt;
1142	}
1143	else {
1144	strncpy(p, digits, digits_len);
1145	p += digits_len;
1146	}
1147
1148	/* And zeros on the right */
1149	if (digits_len < decpt) {
1150	memset(p, '0', decpt-digits_len);
1151	p += decpt-digits_len;
1152	*p++ = '.';
1153	memset(p, '0', vdigits_end-decpt);
1154	p += vdigits_end-decpt;
1155	}
1156	else {
1157	memset(p, '0', vdigits_end-digits_len);
1158	p += vdigits_end-digits_len;
1159	}
1160
1161	/* Delete a trailing decimal pt unless using alternative formatting. */
1162	if (p[-1] == '.' && !use_alt_formatting)
1163	p--;
1164
1165	/* Now that we've done zero padding, add an exponent if needed. */
1166	if (use_exp) {
1167	*p++ = float_strings[OFS_E][0];
1168	exp_len = sprintf(p, "%+.02d", exp);
1169	p += exp_len;
1170	}
1171	exit:
1172	if (buf) {
1173	*p = '\0';
1174	/* It's too late if this fails, as we've already stepped on
1175	memory that isn't ours. But it's an okay debugging test. */
1176	assert(p-buf < bufsize);
1177	}
1178	if (digits)
1179	_Py_dg_freedtoa(digits);
1180
1181	return buf;
1182	}
1183
1184
1185	PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1186	char format_code,
1187	int precision,
1188	int flags,
1189	int *type)
1190	{
1191	char **float_strings = lc_float_strings;
1192	int mode;
1193
1194	/* Validate format_code, and map upper and lower case. Compute the
1195	mode and make any adjustments as needed. */
1196	switch (format_code) {
1197	/* exponent */
1198	case 'E':
1199	float_strings = uc_float_strings;
1200	format_code = 'e';
1201	/* Fall through. */
1202	case 'e':
1203	mode = 2;
1204	precision++;
1205	break;
1206
1207	/* fixed */
1208	case 'F':
1209	float_strings = uc_float_strings;
1210	format_code = 'f';
1211	/* Fall through. */
1212	case 'f':
1213	mode = 3;
1214	break;
1215
1216	/* general */
1217	case 'G':
1218	float_strings = uc_float_strings;
1219	format_code = 'g';
1220	/* Fall through. */
1221	case 'g':
1222	mode = 2;
1223	/* precision 0 makes no sense for 'g' format; interpret as 1 */
1224	if (precision == 0)
1225	precision = 1;
1226	break;
1227
1228	/* repr format */
1229	case 'r':
1230	mode = 0;
1231	/* Supplied precision is unused, must be 0. */
1232	if (precision != 0) {
1233	PyErr_BadInternalCall();
1234	return NULL;
1235	}
1236	break;
1237
1238	default:
1239	PyErr_BadInternalCall();
1240	return NULL;
1241	}
1242
1243	return format_float_short(val, format_code, mode, precision,
1244	flags & Py_DTSF_SIGN,
1245	flags & Py_DTSF_ADD_DOT_0,
1246	flags & Py_DTSF_ALT,
1247	float_strings, type);
1248	}
1249	#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/vendor/current/Python/pystrtod.c

Download in other formats: