Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

pystrtod.c@ 6

Last change on this file since 6 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 13.7 KB

Line
1	/* -- Mode: C; c-file-style: "python" -- */
2
3	#include <Python.h>
4	#include <locale.h>
5
6	/* ascii character tests (as opposed to locale tests) */
7	#define ISSPACE(c) ((c) == ' ' \|\| (c) == '\f' \|\| (c) == '\n' \|\| \
8	(c) == '\r' \|\| (c) == '\t' \|\| (c) == '\v')
9	#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
10
11
12	/**
13	* PyOS_ascii_strtod:
14	* @nptr: the string to convert to a numeric value.
15	* @endptr: if non-%NULL, it returns the character after
16	* the last character used in the conversion.
17	*
18	* Converts a string to a #gdouble value.
19	* This function behaves like the standard strtod() function
20	* does in the C locale. It does this without actually
21	* changing the current locale, since that would not be
22	* thread-safe.
23	*
24	* This function is typically used when reading configuration
25	* files or other non-user input that should be locale independent.
26	* To handle input from the user you should normally use the
27	* locale-sensitive system strtod() function.
28	*
29	* If the correct value would cause overflow, plus or minus %HUGE_VAL
30	* is returned (according to the sign of the value), and %ERANGE is
31	* stored in %errno. If the correct value would cause underflow,
32	* zero is returned and %ERANGE is stored in %errno.
33	* If memory allocation fails, %ENOMEM is stored in %errno.
34	*
35	* This function resets %errno before calling strtod() so that
36	* you can reliably detect overflow and underflow.
37	*
38	* Return value: the #gdouble value.
39	**/
40	double
41	PyOS_ascii_strtod(const char nptr, char *endptr)
42	{
43	char *fail_pos;
44	double val = -1.0;
45	struct lconv *locale_data;
46	const char *decimal_point;
47	size_t decimal_point_len;
48	const char p, decimal_point_pos;
49	const char end = NULL; / Silence gcc */
50	const char *digits_pos = NULL;
51	int negate = 0;
52
53	assert(nptr != NULL);
54
55	fail_pos = NULL;
56
57	locale_data = localeconv();
58	decimal_point = locale_data->decimal_point;
59	decimal_point_len = strlen(decimal_point);
60
61	assert(decimal_point_len != 0);
62
63	decimal_point_pos = NULL;
64
65	/* We process any leading whitespace and the optional sign manually,
66	then pass the remainder to the system strtod. This ensures that
67	the result of an underflow has the correct sign. (bug #1725) */
68
69	p = nptr;
70	/* Skip leading space */
71	while (ISSPACE(*p))
72	p++;
73
74	/* Process leading sign, if present */
75	if (*p == '-') {
76	negate = 1;
77	p++;
78	} else if (*p == '+') {
79	p++;
80	}
81
82	/* What's left should begin with a digit, a decimal point, or one of
83	the letters i, I, n, N. It should not begin with 0x or 0X */
84	if ((!ISDIGIT(*p) &&
85	p != '.' && p != 'i' && p != 'I' && p != 'n' && *p != 'N')
86	\|\|
87	(*p == '0' && (p[1] == 'x' \|\| p[1] == 'X')))
88	{
89	if (endptr)
90	endptr = (char)nptr;
91	errno = EINVAL;
92	return val;
93	}
94	digits_pos = p;
95
96	if (decimal_point[0] != '.' \|\|
97	decimal_point[1] != 0)
98	{
99	while (ISDIGIT(*p))
100	p++;
101
102	if (*p == '.')
103	{
104	decimal_point_pos = p++;
105
106	while (ISDIGIT(*p))
107	p++;
108
109	if (p == 'e' \|\| p == 'E')
110	p++;
111	if (p == '+' \|\| p == '-')
112	p++;
113	while (ISDIGIT(*p))
114	p++;
115	end = p;
116	}
117	else if (strncmp(p, decimal_point, decimal_point_len) == 0)
118	{
119	/* Python bug #1417699 */
120	if (endptr)
121	endptr = (char)nptr;
122	errno = EINVAL;
123	return val;
124	}
125	/* For the other cases, we need not convert the decimal
126	point */
127	}
128
129	/* Set errno to zero, so that we can distinguish zero results
130	and underflows */
131	errno = 0;
132
133	if (decimal_point_pos)
134	{
135	char copy, c;
136
137	/* We need to convert the '.' to the locale specific decimal
138	point */
139	copy = (char *)PyMem_MALLOC(end - digits_pos +
140	1 + decimal_point_len);
141	if (copy == NULL) {
142	if (endptr)
143	endptr = (char )nptr;
144	errno = ENOMEM;
145	return val;
146	}
147
148	c = copy;
149	memcpy(c, digits_pos, decimal_point_pos - digits_pos);
150	c += decimal_point_pos - digits_pos;
151	memcpy(c, decimal_point, decimal_point_len);
152	c += decimal_point_len;
153	memcpy(c, decimal_point_pos + 1,
154	end - (decimal_point_pos + 1));
155	c += end - (decimal_point_pos + 1);
156	*c = 0;
157
158	val = strtod(copy, &fail_pos);
159
160	if (fail_pos)
161	{
162	if (fail_pos > decimal_point_pos)
163	fail_pos = (char *)digits_pos +
164	(fail_pos - copy) -
165	(decimal_point_len - 1);
166	else
167	fail_pos = (char *)digits_pos +
168	(fail_pos - copy);
169	}
170
171	PyMem_FREE(copy);
172
173	}
174	else {
175	val = strtod(digits_pos, &fail_pos);
176	}
177
178	if (fail_pos == digits_pos)
179	fail_pos = (char *)nptr;
180
181	if (negate && fail_pos != nptr)
182	val = -val;
183
184	if (endptr)
185	*endptr = fail_pos;
186
187	return val;
188	}
189
190	/* Given a string that may have a decimal point in the current
191	locale, change it back to a dot. Since the string cannot get
192	longer, no need for a maximum buffer size parameter. */
193	Py_LOCAL_INLINE(void)
194	change_decimal_from_locale_to_dot(char* buffer)
195	{
196	struct lconv *locale_data = localeconv();
197	const char *decimal_point = locale_data->decimal_point;
198
199	if (decimal_point[0] != '.' \|\| decimal_point[1] != 0) {
200	size_t decimal_point_len = strlen(decimal_point);
201
202	if (buffer == '+' \|\| buffer == '-')
203	buffer++;
204	while (isdigit(Py_CHARMASK(*buffer)))
205	buffer++;
206	if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
207	*buffer = '.';
208	buffer++;
209	if (decimal_point_len > 1) {
210	/* buffer needs to get smaller */
211	size_t rest_len = strlen(buffer +
212	(decimal_point_len - 1));
213	memmove(buffer,
214	buffer + (decimal_point_len - 1),
215	rest_len);
216	buffer[rest_len] = 0;
217	}
218	}
219	}
220	}
221
222
223	/* From the C99 standard, section 7.19.6:
224	The exponent always contains at least two digits, and only as many more digits
225	as necessary to represent the exponent.
226	*/
227	#define MIN_EXPONENT_DIGITS 2
228
229	/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
230	in length. */
231	Py_LOCAL_INLINE(void)
232	ensure_minimum_exponent_length(char* buffer, size_t buf_size)
233	{
234	char *p = strpbrk(buffer, "eE");
235	if (p && ((p + 1) == '-' \|\| (p + 1) == '+')) {
236	char *start = p + 2;
237	int exponent_digit_cnt = 0;
238	int leading_zero_cnt = 0;
239	int in_leading_zeros = 1;
240	int significant_digit_cnt;
241
242	/* Skip over the exponent and the sign. */
243	p += 2;
244
245	/* Find the end of the exponent, keeping track of leading
246	zeros. */
247	while (p && isdigit(Py_CHARMASK(p))) {
248	if (in_leading_zeros && *p == '0')
249	++leading_zero_cnt;
250	if (*p != '0')
251	in_leading_zeros = 0;
252	++p;
253	++exponent_digit_cnt;
254	}
255
256	significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
257	if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
258	/* If there are 2 exactly digits, we're done,
259	regardless of what they contain */
260	}
261	else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
262	int extra_zeros_cnt;
263
264	/* There are more than 2 digits in the exponent. See
265	if we can delete some of the leading zeros */
266	if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
267	significant_digit_cnt = MIN_EXPONENT_DIGITS;
268	extra_zeros_cnt = exponent_digit_cnt -
269	significant_digit_cnt;
270
271	/* Delete extra_zeros_cnt worth of characters from the
272	front of the exponent */
273	assert(extra_zeros_cnt >= 0);
274
275	/* Add one to significant_digit_cnt to copy the
276	trailing 0 byte, thus setting the length */
277	memmove(start,
278	start + extra_zeros_cnt,
279	significant_digit_cnt + 1);
280	}
281	else {
282	/* If there are fewer than 2 digits, add zeros
283	until there are 2, if there's enough room */
284	int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
285	if (start + zeros + exponent_digit_cnt + 1
286	< buffer + buf_size) {
287	memmove(start + zeros, start,
288	exponent_digit_cnt + 1);
289	memset(start, '0', zeros);
290	}
291	}
292	}
293	}
294
295	/* Ensure that buffer has a decimal point in it. The decimal point
296	will not be in the current locale, it will always be '.' */
297	Py_LOCAL_INLINE(void)
298	ensure_decimal_point(char* buffer, size_t buf_size)
299	{
300	int insert_count = 0;
301	char* chars_to_insert;
302
303	/* search for the first non-digit character */
304	char *p = buffer;
305	if (p == '-' \|\| p == '+')
306	/* Skip leading sign, if present. I think this could only
307	ever be '-', but it can't hurt to check for both. */
308	++p;
309	while (p && isdigit(Py_CHARMASK(p)))
310	++p;
311
312	if (*p == '.') {
313	if (isdigit(Py_CHARMASK(*(p+1)))) {
314	/* Nothing to do, we already have a decimal
315	point and a digit after it */
316	}
317	else {
318	/* We have a decimal point, but no following
319	digit. Insert a zero after the decimal. */
320	++p;
321	chars_to_insert = "0";
322	insert_count = 1;
323	}
324	}
325	else {
326	chars_to_insert = ".0";
327	insert_count = 2;
328	}
329	if (insert_count) {
330	size_t buf_len = strlen(buffer);
331	if (buf_len + insert_count + 1 >= buf_size) {
332	/* If there is not enough room in the buffer
333	for the additional text, just skip it. It's
334	not worth generating an error over. */
335	}
336	else {
337	memmove(p + insert_count, p,
338	buffer + strlen(buffer) - p + 1);
339	memcpy(p, chars_to_insert, insert_count);
340	}
341	}
342	}
343
344	/* Add the locale specific grouping characters to buffer. Note
345	that any decimal point (if it's present) in buffer is already
346	locale-specific. Return 0 on error, else 1. */
347	Py_LOCAL_INLINE(int)
348	add_thousands_grouping(char* buffer, size_t buf_size)
349	{
350	Py_ssize_t len = strlen(buffer);
351	struct lconv *locale_data = localeconv();
352	const char *decimal_point = locale_data->decimal_point;
353
354	/* Find the decimal point, if any. We're only concerned
355	about the characters to the left of the decimal when
356	adding grouping. */
357	char *p = strstr(buffer, decimal_point);
358	if (!p) {
359	/* No decimal, use the entire string. */
360
361	/* If any exponent, adjust p. */
362	p = strpbrk(buffer, "eE");
363	if (!p)
364	/* No exponent and no decimal. Use the entire
365	string. */
366	p = buffer + len;
367	}
368	/* At this point, p points just past the right-most character we
369	want to format. We need to add the grouping string for the
370	characters between buffer and p. */
371	return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
372	buf_size, NULL, 1);
373	}
374
375	/* see FORMATBUFLEN in unicodeobject.c */
376	#define FLOAT_FORMATBUFLEN 120
377
378	/**
379	* PyOS_ascii_formatd:
380	* @buffer: A buffer to place the resulting string in
381	* @buf_size: The length of the buffer.
382	* @format: The printf()-style format to use for the
383	* code to use for converting.
384	* @d: The #gdouble to convert
385	*
386	* Converts a #gdouble to a string, using the '.' as
387	* decimal point. To format the number you pass in
388	* a printf()-style format string. Allowed conversion
389	* specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
390	*
391	* 'n' is the same as 'g', except it uses the current locale.
392	* 'Z' is the same as 'g', except it always has a decimal and
393	* at least one digit after the decimal.
394	*
395	* Return value: The pointer to the buffer with the converted string.
396	**/
397	char *
398	PyOS_ascii_formatd(char *buffer,
399	size_t buf_size,
400	const char *format,
401	double d)
402	{
403	char format_char;
404	size_t format_len = strlen(format);
405
406	/* For type 'n', we need to make a copy of the format string, because
407	we're going to modify 'n' -> 'g', and format is const char*, so we
408	can't modify it directly. FLOAT_FORMATBUFLEN should be longer than
409	we ever need this to be. There's an upcoming check to ensure it's
410	big enough. */
411	/* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
412	also with at least one character past the decimal. */
413	char tmp_format[FLOAT_FORMATBUFLEN];
414
415	/* The last character in the format string must be the format char */
416	format_char = format[format_len - 1];
417
418	if (format[0] != '%')
419	return NULL;
420
421	/* I'm not sure why this test is here. It's ensuring that the format
422	string after the first character doesn't have a single quote, a
423	lowercase l, or a percent. This is the reverse of the commented-out
424	test about 10 lines ago. */
425	if (strpbrk(format + 1, "'l%"))
426	return NULL;
427
428	/* Also curious about this function is that it accepts format strings
429	like "%xg", which are invalid for floats. In general, the
430	interface to this function is not very good, but changing it is
431	difficult because it's a public API. */
432
433	if (!(format_char == 'e' \|\| format_char == 'E' \|\|
434	format_char == 'f' \|\| format_char == 'F' \|\|
435	format_char == 'g' \|\| format_char == 'G' \|\|
436	format_char == 'n' \|\| format_char == 'Z'))
437	return NULL;
438
439	/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
440	replacing the final char with a 'g' */
441	if (format_char == 'n' \|\| format_char == 'Z') {
442	if (format_len + 1 >= sizeof(tmp_format)) {
443	/* The format won't fit in our copy. Error out. In
444	practice, this will never happen and will be
445	detected by returning NULL */
446	return NULL;
447	}
448	strcpy(tmp_format, format);
449	tmp_format[format_len - 1] = 'g';
450	format = tmp_format;
451	}
452
453
454	/* Have PyOS_snprintf do the hard work */
455	PyOS_snprintf(buffer, buf_size, format, d);
456
457	/* Do various fixups on the return string */
458
459	/* Get the current locale, and find the decimal point string.
460	Convert that string back to a dot. Do not do this if using the
461	'n' (number) format code, since we want to keep the localized
462	decimal point in that case. */
463	if (format_char != 'n')
464	change_decimal_from_locale_to_dot(buffer);
465
466	/* If an exponent exists, ensure that the exponent is at least
467	MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
468	for the extra zeros. Also, if there are more than
469	MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
470	back to MIN_EXPONENT_DIGITS */
471	ensure_minimum_exponent_length(buffer, buf_size);
472
473	/* If format_char is 'Z', make sure we have at least one character
474	after the decimal point (and make sure we have a decimal point). */
475	if (format_char == 'Z')
476	ensure_decimal_point(buffer, buf_size);
477
478	/* If format_char is 'n', add the thousands grouping. */
479	if (format_char == 'n')
480	if (!add_thousands_grouping(buffer, buf_size))
481	return NULL;
482
483	return buffer;
484	}
485
486	double
487	PyOS_ascii_atof(const char *nptr)
488	{
489	return PyOS_ascii_strtod(nptr, NULL);
490	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Python/pystrtod.c@ 6

Download in other formats: