Context Navigation

source: python/trunk/Python/pystrtod.c@ 383

Last change on this file since 383 was 10, checked in by Yuri Dario, 15 years ago
python: merged offline changes.
Property svn:eol-style set to `native`
File size: 13.8 KB

Line
1	/* -- Mode: C; c-file-style: "python" -- */
2
3	#include <Python.h>
4	#include <locale.h>
5	#ifdef __EMX__
6	#include <float.h>
7	#endif
8
9
10	/* ascii character tests (as opposed to locale tests) */
11	#define ISSPACE(c) ((c) == ' ' \|\| (c) == '\f' \|\| (c) == '\n' \|\| \
12	(c) == '\r' \|\| (c) == '\t' \|\| (c) == '\v')
13	#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
14
15
16	/**
17	* PyOS_ascii_strtod:
18	* @nptr: the string to convert to a numeric value.
19	* @endptr: if non-%NULL, it returns the character after
20	* the last character used in the conversion.
21	*
22	* Converts a string to a #gdouble value.
23	* This function behaves like the standard strtod() function
24	* does in the C locale. It does this without actually
25	* changing the current locale, since that would not be
26	* thread-safe.
27	*
28	* This function is typically used when reading configuration
29	* files or other non-user input that should be locale independent.
30	* To handle input from the user you should normally use the
31	* locale-sensitive system strtod() function.
32	*
33	* If the correct value would cause overflow, plus or minus %HUGE_VAL
34	* is returned (according to the sign of the value), and %ERANGE is
35	* stored in %errno. If the correct value would cause underflow,
36	* zero is returned and %ERANGE is stored in %errno.
37	* If memory allocation fails, %ENOMEM is stored in %errno.
38	*
39	* This function resets %errno before calling strtod() so that
40	* you can reliably detect overflow and underflow.
41	*
42	* Return value: the #gdouble value.
43	**/
44	double
45	PyOS_ascii_strtod(const char nptr, char *endptr)
46	{
47	#ifdef __EMX__
48	_control87(MCW_EM, MCW_EM);
49	#endif
50	char *fail_pos;
51	double val = -1.0;
52	struct lconv *locale_data;
53	const char *decimal_point;
54	size_t decimal_point_len;
55	const char p, decimal_point_pos;
56	const char end = NULL; / Silence gcc */
57	const char *digits_pos = NULL;
58	int negate = 0;
59
60	assert(nptr != NULL);
61
62	fail_pos = NULL;
63
64	locale_data = localeconv();
65	decimal_point = locale_data->decimal_point;
66	decimal_point_len = strlen(decimal_point);
67
68	assert(decimal_point_len != 0);
69
70	decimal_point_pos = NULL;
71
72	/* We process any leading whitespace and the optional sign manually,
73	then pass the remainder to the system strtod. This ensures that
74	the result of an underflow has the correct sign. (bug #1725) */
75
76	p = nptr;
77	/* Skip leading space */
78	while (ISSPACE(*p))
79	p++;
80
81	/* Process leading sign, if present */
82	if (*p == '-') {
83	negate = 1;
84	p++;
85	} else if (*p == '+') {
86	p++;
87	}
88
89	/* What's left should begin with a digit, a decimal point, or one of
90	the letters i, I, n, N. It should not begin with 0x or 0X */
91	if ((!ISDIGIT(*p) &&
92	p != '.' && p != 'i' && p != 'I' && p != 'n' && *p != 'N')
93	\|\|
94	(*p == '0' && (p[1] == 'x' \|\| p[1] == 'X')))
95	{
96	if (endptr)
97	endptr = (char)nptr;
98	errno = EINVAL;
99	return val;
100	}
101	digits_pos = p;
102
103	if (decimal_point[0] != '.' \|\|
104	decimal_point[1] != 0)
105	{
106	while (ISDIGIT(*p))
107	p++;
108
109	if (*p == '.')
110	{
111	decimal_point_pos = p++;
112
113	while (ISDIGIT(*p))
114	p++;
115
116	if (p == 'e' \|\| p == 'E')
117	p++;
118	if (p == '+' \|\| p == '-')
119	p++;
120	while (ISDIGIT(*p))
121	p++;
122	end = p;
123	}
124	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
125	{
126	/* Python bug #1417699 */
127	if (endptr)
128	endptr = (char)nptr;
129	errno = EINVAL;
130	return val;
131	}
132	/* For the other cases, we need not convert the decimal
133	point */
134	}
135
136	/* Set errno to zero, so that we can distinguish zero results
137	and underflows */
138	errno = 0;
139
140	if (decimal_point_pos)
141	{
142	char copy, c;
143
144	/* We need to convert the '.' to the locale specific decimal
145	point */
146	copy = (char *)PyMem_MALLOC(end - digits_pos +
147	1 + decimal_point_len);
148	if (copy == NULL) {
149	if (endptr)
150	endptr = (char )nptr;
151	errno = ENOMEM;
152	return val;
153	}
154
155	c = copy;
156	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
157	c += decimal_point_pos - digits_pos;
158	memcpy(c, decimal_point, decimal_point_len);
159	c += decimal_point_len;
160	memcpy(c, decimal_point_pos + 1,
161	end - (decimal_point_pos + 1));
162	c += end - (decimal_point_pos + 1);
163	*c = 0;
164
165	val = strtod(copy, &fail_pos);
166
167	if (fail_pos)
168	{
169	if (fail_pos > decimal_point_pos)
170	fail_pos = (char *)digits_pos +
171	(fail_pos - copy) -
172	(decimal_point_len - 1);
173	else
174	fail_pos = (char *)digits_pos +
175	(fail_pos - copy);
176	}
177
178	PyMem_FREE(copy);
179
180	}
181	else {
182	//sigfpe here
183	val = strtod(digits_pos, &fail_pos);
184	}
185
186	if (fail_pos == digits_pos)
187	fail_pos = (char *)nptr;
188
189	if (negate && fail_pos != nptr)
190	val = -val;
191
192	if (endptr)
193	*endptr = fail_pos;
194
195	return val;
196	}
197
198	/* Given a string that may have a decimal point in the current
199	locale, change it back to a dot. Since the string cannot get
200	longer, no need for a maximum buffer size parameter. */
201	Py_LOCAL_INLINE(void)
202	change_decimal_from_locale_to_dot(char* buffer)
203	{
204	struct lconv *locale_data = localeconv();
205	const char *decimal_point = locale_data->decimal_point;
206
207	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
208	size_t decimal_point_len = strlen(decimal_point);
209
210	if (buffer == '+' \|\| buffer == '-')
211	buffer++;
212	while (isdigit(Py_CHARMASK(*buffer)))
213	buffer++;
214	if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
215	*buffer = '.';
216	buffer++;
217	if (decimal_point_len > 1) {
218	/* buffer needs to get smaller */
219	size_t rest_len = strlen(buffer +
220	(decimal_point_len - 1));
221	memmove(buffer,
222	buffer + (decimal_point_len - 1),
223	rest_len);
224	buffer[rest_len] = 0;
225	}
226	}
227	}
228	}
229
230
231	/* From the C99 standard, section 7.19.6:
232	The exponent always contains at least two digits, and only as many more digits
233	as necessary to represent the exponent.
234	*/
235	#define MIN_EXPONENT_DIGITS 2
236
237	/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
238	in length. */
239	Py_LOCAL_INLINE(void)
240	ensure_minimum_exponent_length(char* buffer, size_t buf_size)
241	{
242	char *p = strpbrk(buffer, "eE");
243	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
244	char *start = p + 2;
245	int exponent_digit_cnt = 0;
246	int leading_zero_cnt = 0;
247	int in_leading_zeros = 1;
248	int significant_digit_cnt;
249
250	/* Skip over the exponent and the sign. */
251	p += 2;
252
253	/* Find the end of the exponent, keeping track of leading
254	zeros. */
255	while (p && isdigit(Py_CHARMASK(p))) {
256	if (in_leading_zeros && *p == '0')
257	++leading_zero_cnt;
258	if (*p != '0')
259	in_leading_zeros = 0;
260	++p;
261	++exponent_digit_cnt;
262	}
263
264	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
265	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
266	/* If there are 2 exactly digits, we're done,
267	regardless of what they contain */
268	}
269	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
270	int extra_zeros_cnt;
271
272	/* There are more than 2 digits in the exponent. See
273	if we can delete some of the leading zeros */
274	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
275	significant_digit_cnt = MIN_EXPONENT_DIGITS;
276	extra_zeros_cnt = exponent_digit_cnt -
277	significant_digit_cnt;
278
279	/* Delete extra_zeros_cnt worth of characters from the
280	front of the exponent */
281	assert(extra_zeros_cnt >= 0);
282
283	/* Add one to significant_digit_cnt to copy the
284	trailing 0 byte, thus setting the length */
285	memmove(start,
286	start + extra_zeros_cnt,
287	significant_digit_cnt + 1);
288	}
289	else {
290	/* If there are fewer than 2 digits, add zeros
291	until there are 2, if there's enough room */
292	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
293	if (start + zeros + exponent_digit_cnt + 1
294	< buffer + buf_size) {
295	memmove(start + zeros, start,
296	exponent_digit_cnt + 1);
297	memset(start, '0', zeros);
298	}
299	}
300	}
301	}
302
303	/* Ensure that buffer has a decimal point in it. The decimal point
304	will not be in the current locale, it will always be '.' */
305	Py_LOCAL_INLINE(void)
306	ensure_decimal_point(char* buffer, size_t buf_size)
307	{
308	int insert_count = 0;
309	char* chars_to_insert;
310
311	/* search for the first non-digit character */
312	char *p = buffer;
313	if (p == '-' \|\| p == '+')
314	/* Skip leading sign, if present. I think this could only
315	ever be '-', but it can't hurt to check for both. */
316	++p;
317	while (p && isdigit(Py_CHARMASK(p)))
318	++p;
319
320	if (*p == '.') {
321	if (isdigit(Py_CHARMASK(*(p+1)))) {
322	/* Nothing to do, we already have a decimal
323	point and a digit after it */
324	}
325	else {
326	/* We have a decimal point, but no following
327	digit. Insert a zero after the decimal. */
328	++p;
329	chars_to_insert = "0";
330	insert_count = 1;
331	}
332	}
333	else {
334	chars_to_insert = ".0";
335	insert_count = 2;
336	}
337	if (insert_count) {
338	size_t buf_len = strlen(buffer);
339	if (buf_len + insert_count + 1 >= buf_size) {
340	/* If there is not enough room in the buffer
341	for the additional text, just skip it. It's
342	not worth generating an error over. */
343	}
344	else {
345	memmove(p + insert_count, p,
346	buffer + strlen(buffer) - p + 1);
347	memcpy(p, chars_to_insert, insert_count);
348	}
349	}
350	}
351
352	/* Add the locale specific grouping characters to buffer. Note
353	that any decimal point (if it's present) in buffer is already
354	locale-specific. Return 0 on error, else 1. */
355	Py_LOCAL_INLINE(int)
356	add_thousands_grouping(char* buffer, size_t buf_size)
357	{
358	Py_ssize_t len = strlen(buffer);
359	struct lconv *locale_data = localeconv();
360	const char *decimal_point = locale_data->decimal_point;
361
362	/* Find the decimal point, if any. We're only concerned
363	about the characters to the left of the decimal when
364	adding grouping. */
365	char *p = strstr(buffer, decimal_point);
366	if (!p) {
367	/* No decimal, use the entire string. */
368
369	/* If any exponent, adjust p. */
370	p = strpbrk(buffer, "eE");
371	if (!p)
372	/* No exponent and no decimal. Use the entire
373	string. */
374	p = buffer + len;
375	}
376	/* At this point, p points just past the right-most character we
377	want to format. We need to add the grouping string for the
378	characters between buffer and p. */
379	return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
380	buf_size, NULL, 1);
381	}
382
383	/* see FORMATBUFLEN in unicodeobject.c */
384	#define FLOAT_FORMATBUFLEN 120
385
386	/**
387	* PyOS_ascii_formatd:
388	* @buffer: A buffer to place the resulting string in
389	* @buf_size: The length of the buffer.
390	* @format: The printf()-style format to use for the
391	* code to use for converting.
392	* @d: The #gdouble to convert
393	*
394	* Converts a #gdouble to a string, using the '.' as
395	* decimal point. To format the number you pass in
396	* a printf()-style format string. Allowed conversion
397	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
398	*
399	* 'n' is the same as 'g', except it uses the current locale.
400	* 'Z' is the same as 'g', except it always has a decimal and
401	* at least one digit after the decimal.
402	*
403	* Return value: The pointer to the buffer with the converted string.
404	**/
405	char *
406	PyOS_ascii_formatd(char *buffer,
407	size_t buf_size,
408	const char *format,
409	double d)
410	{
411	char format_char;
412	size_t format_len = strlen(format);
413
414	/* For type 'n', we need to make a copy of the format string, because
415	we're going to modify 'n' -> 'g', and format is const char*, so we
416	can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
417	we ever need this to be. There's an upcoming check to ensure it's
418	big enough. */
419	/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
420	also with at least one character past the decimal. */
421	char tmp_format[FLOAT_FORMATBUFLEN];
422
423	/* The last character in the format string must be the format char */
424	format_char = format[format_len - 1];
425
426	if (format[0] != '%')
427	return NULL;
428
429	/* I'm not sure why this test is here. It's ensuring that the format
430	string after the first character doesn't have a single quote, a
431	lowercase l, or a percent. This is the reverse of the commented-out
432	test about 10 lines ago. */
433	if (strpbrk(format + 1, "'l%"))
434	return NULL;
435
436	/* Also curious about this function is that it accepts format strings
437	like "%xg", which are invalid for floats. In general, the
438	interface to this function is not very good, but changing it is
439	difficult because it's a public API. */
440
441	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
442	format_char == 'f' \|\| format_char == 'F' \|\|
443	format_char == 'g' \|\| format_char == 'G' \|\|
444	format_char == 'n' \|\| format_char == 'Z'))
445	return NULL;
446
447	/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
448	replacing the final char with a 'g' */
449	if (format_char == 'n' \|\| format_char == 'Z') {
450	if (format_len + 1 >= sizeof(tmp_format)) {
451	/* The format won't fit in our copy. Error out. In
452	practice, this will never happen and will be
453	detected by returning NULL */
454	return NULL;
455	}
456	strcpy(tmp_format, format);
457	tmp_format[format_len - 1] = 'g';
458	format = tmp_format;
459	}
460
461
462	/* Have PyOS_snprintf do the hard work */
463	PyOS_snprintf(buffer, buf_size, format, d);
464
465	/* Do various fixups on the return string */
466
467	/* Get the current locale, and find the decimal point string.
468	Convert that string back to a dot. Do not do this if using the
469	'n' (number) format code, since we want to keep the localized
470	decimal point in that case. */
471	if (format_char != 'n')
472	change_decimal_from_locale_to_dot(buffer);
473
474	/* If an exponent exists, ensure that the exponent is at least
475	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
476	for the extra zeros. Also, if there are more than
477	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
478	back to MIN_EXPONENT_DIGITS */
479	ensure_minimum_exponent_length(buffer, buf_size);
480
481	/* If format_char is 'Z', make sure we have at least one character
482	after the decimal point (and make sure we have a decimal point). */
483	if (format_char == 'Z')
484	ensure_decimal_point(buffer, buf_size);
485
486	/* If format_char is 'n', add the thousands grouping. */
487	if (format_char == 'n')
488	if (!add_thousands_grouping(buffer, buf_size))
489	return NULL;
490
491	return buffer;
492	}
493
494	double
495	PyOS_ascii_atof(const char *nptr)
496	{
497	return PyOS_ascii_strtod(nptr, NULL);
498	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: