Context Navigation

stropmodule.c

Visit:

Last change on this file was 3225, checked in by bird, 18 years ago
Python 2.5
File size: 27.8 KB

Line
1	/* strop module */
2
3	#define PY_SSIZE_T_CLEAN
4	#include "Python.h"
5	#include <ctype.h>
6
7	PyDoc_STRVAR(strop_module__doc__,
8	"Common string manipulations, optimized for speed.\n"
9	"\n"
10	"Always use \"import string\" rather than referencing\n"
11	"this module directly.");
12
13	/* XXX This file assumes that the <ctype.h> is*() functions
14	XXX are defined for all 8-bit characters! */
15
16	#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17	"strop functions are obsolete; use string methods")) \
18	return NULL
19
20	/* The lstrip(), rstrip() and strip() functions are implemented
21	in do_strip(), which uses an additional parameter to indicate what
22	type of strip should occur. */
23
24	#define LEFTSTRIP 0
25	#define RIGHTSTRIP 1
26	#define BOTHSTRIP 2
27
28
29	static PyObject *
30	split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
31	{
32	Py_ssize_t i = 0, j;
33	int err;
34	Py_ssize_t countsplit = 0;
35	PyObject* item;
36	PyObject *list = PyList_New(0);
37
38	if (list == NULL)
39	return NULL;
40
41	while (i < len) {
42	while (i < len && isspace(Py_CHARMASK(s[i]))) {
43	i = i+1;
44	}
45	j = i;
46	while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47	i = i+1;
48	}
49	if (j < i) {
50	item = PyString_FromStringAndSize(s+j, i-j);
51	if (item == NULL)
52	goto finally;
53
54	err = PyList_Append(list, item);
55	Py_DECREF(item);
56	if (err < 0)
57	goto finally;
58
59	countsplit++;
60	while (i < len && isspace(Py_CHARMASK(s[i]))) {
61	i = i+1;
62	}
63	if (maxsplit && (countsplit >= maxsplit) && i < len) {
64	item = PyString_FromStringAndSize(
65	s+i, len - i);
66	if (item == NULL)
67	goto finally;
68
69	err = PyList_Append(list, item);
70	Py_DECREF(item);
71	if (err < 0)
72	goto finally;
73
74	i = len;
75	}
76	}
77	}
78	return list;
79	finally:
80	Py_DECREF(list);
81	return NULL;
82	}
83
84
85	PyDoc_STRVAR(splitfields__doc__,
86	"split(s [,sep [,maxsplit]]) -> list of strings\n"
87	"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88	"\n"
89	"Return a list of the words in the string s, using sep as the\n"
90	"delimiter string. If maxsplit is nonzero, splits into at most\n"
91	"maxsplit words. If sep is not specified, any whitespace string\n"
92	"is a separator. Maxsplit defaults to 0.\n"
93	"\n"
94	"(split and splitfields are synonymous)");
95
96	static PyObject *
97	strop_splitfields(PyObject self, PyObject args)
98	{
99	Py_ssize_t len, n, i, j, err;
100	Py_ssize_t splitcount, maxsplit;
101	char s, sub;
102	PyObject list, item;
103
104	WARN;
105	sub = NULL;
106	n = 0;
107	splitcount = 0;
108	maxsplit = 0;
109	if (!PyArg_ParseTuple(args, "t#\|z#n:split", &s, &len, &sub, &n, &maxsplit))
110	return NULL;
111	if (sub == NULL)
112	return split_whitespace(s, len, maxsplit);
113	if (n == 0) {
114	PyErr_SetString(PyExc_ValueError, "empty separator");
115	return NULL;
116	}
117
118	list = PyList_New(0);
119	if (list == NULL)
120	return NULL;
121
122	i = j = 0;
123	while (i+n <= len) {
124	if (s[i] == sub[0] && (n == 1 \|\| memcmp(s+i, sub, n) == 0)) {
125	item = PyString_FromStringAndSize(s+j, i-j);
126	if (item == NULL)
127	goto fail;
128	err = PyList_Append(list, item);
129	Py_DECREF(item);
130	if (err < 0)
131	goto fail;
132	i = j = i + n;
133	splitcount++;
134	if (maxsplit && (splitcount >= maxsplit))
135	break;
136	}
137	else
138	i++;
139	}
140	item = PyString_FromStringAndSize(s+j, len-j);
141	if (item == NULL)
142	goto fail;
143	err = PyList_Append(list, item);
144	Py_DECREF(item);
145	if (err < 0)
146	goto fail;
147
148	return list;
149
150	fail:
151	Py_DECREF(list);
152	return NULL;
153	}
154
155
156	PyDoc_STRVAR(joinfields__doc__,
157	"join(list [,sep]) -> string\n"
158	"joinfields(list [,sep]) -> string\n"
159	"\n"
160	"Return a string composed of the words in list, with\n"
161	"intervening occurrences of sep. Sep defaults to a single\n"
162	"space.\n"
163	"\n"
164	"(join and joinfields are synonymous)");
165
166	static PyObject *
167	strop_joinfields(PyObject self, PyObject args)
168	{
169	PyObject *seq;
170	char *sep = NULL;
171	Py_ssize_t seqlen, seplen = 0;
172	Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173	PyObject *res = NULL;
174	char* p = NULL;
175	ssizeargfunc getitemfunc;
176
177	WARN;
178	if (!PyArg_ParseTuple(args, "O\|t#:join", &seq, &sep, &seplen))
179	return NULL;
180	if (sep == NULL) {
181	sep = " ";
182	seplen = 1;
183	}
184
185	seqlen = PySequence_Size(seq);
186	if (seqlen < 0 && PyErr_Occurred())
187	return NULL;
188
189	if (seqlen == 1) {
190	/* Optimization if there's only one item */
191	PyObject *item = PySequence_GetItem(seq, 0);
192	if (item && !PyString_Check(item)) {
193	PyErr_SetString(PyExc_TypeError,
194	"first argument must be sequence of strings");
195	Py_DECREF(item);
196	return NULL;
197	}
198	return item;
199	}
200
201	if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202	return NULL;
203	p = PyString_AsString(res);
204
205	/* optimize for lists, since it's the most common case. all others
206	* (tuples and arbitrary sequences) just use the sequence abstract
207	* interface.
208	*/
209	if (PyList_Check(seq)) {
210	for (i = 0; i < seqlen; i++) {
211	PyObject *item = PyList_GET_ITEM(seq, i);
212	if (!PyString_Check(item)) {
213	PyErr_SetString(PyExc_TypeError,
214	"first argument must be sequence of strings");
215	Py_DECREF(res);
216	return NULL;
217	}
218	slen = PyString_GET_SIZE(item);
219	while (reslen + slen + seplen >= sz) {
220	if (_PyString_Resize(&res, sz * 2) < 0)
221	return NULL;
222	sz *= 2;
223	p = PyString_AsString(res) + reslen;
224	}
225	if (i > 0) {
226	memcpy(p, sep, seplen);
227	p += seplen;
228	reslen += seplen;
229	}
230	memcpy(p, PyString_AS_STRING(item), slen);
231	p += slen;
232	reslen += slen;
233	}
234	_PyString_Resize(&res, reslen);
235	return res;
236	}
237
238	if (seq->ob_type->tp_as_sequence == NULL \|\|
239	(getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
240	{
241	PyErr_SetString(PyExc_TypeError,
242	"first argument must be a sequence");
243	return NULL;
244	}
245	/* This is now type safe */
246	for (i = 0; i < seqlen; i++) {
247	PyObject *item = getitemfunc(seq, i);
248	if (!item \|\| !PyString_Check(item)) {
249	PyErr_SetString(PyExc_TypeError,
250	"first argument must be sequence of strings");
251	Py_DECREF(res);
252	Py_XDECREF(item);
253	return NULL;
254	}
255	slen = PyString_GET_SIZE(item);
256	while (reslen + slen + seplen >= sz) {
257	if (_PyString_Resize(&res, sz * 2) < 0) {
258	Py_DECREF(item);
259	return NULL;
260	}
261	sz *= 2;
262	p = PyString_AsString(res) + reslen;
263	}
264	if (i > 0) {
265	memcpy(p, sep, seplen);
266	p += seplen;
267	reslen += seplen;
268	}
269	memcpy(p, PyString_AS_STRING(item), slen);
270	p += slen;
271	reslen += slen;
272	Py_DECREF(item);
273	}
274	_PyString_Resize(&res, reslen);
275	return res;
276	}
277
278
279	PyDoc_STRVAR(find__doc__,
280	"find(s, sub [,start [,end]]) -> in\n"
281	"\n"
282	"Return the lowest index in s where substring sub is found,\n"
283	"such that sub is contained within s[start,end]. Optional\n"
284	"arguments start and end are interpreted as in slice notation.\n"
285	"\n"
286	"Return -1 on failure.");
287
288	static PyObject *
289	strop_find(PyObject self, PyObject args)
290	{
291	char s, sub;
292	Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
293
294	WARN;
295	if (!PyArg_ParseTuple(args, "t#t#\|nn:find", &s, &len, &sub, &n, &i, &last))
296	return NULL;
297
298	if (last > len)
299	last = len;
300	if (last < 0)
301	last += len;
302	if (last < 0)
303	last = 0;
304	if (i < 0)
305	i += len;
306	if (i < 0)
307	i = 0;
308
309	if (n == 0 && i <= last)
310	return PyInt_FromLong((long)i);
311
312	last -= n;
313	for (; i <= last; ++i)
314	if (s[i] == sub[0] &&
315	(n == 1 \|\| memcmp(&s[i+1], &sub[1], n-1) == 0))
316	return PyInt_FromLong((long)i);
317
318	return PyInt_FromLong(-1L);
319	}
320
321
322	PyDoc_STRVAR(rfind__doc__,
323	"rfind(s, sub [,start [,end]]) -> int\n"
324	"\n"
325	"Return the highest index in s where substring sub is found,\n"
326	"such that sub is contained within s[start,end]. Optional\n"
327	"arguments start and end are interpreted as in slice notation.\n"
328	"\n"
329	"Return -1 on failure.");
330
331	static PyObject *
332	strop_rfind(PyObject self, PyObject args)
333	{
334	char s, sub;
335	Py_ssize_t len, n, j;
336	Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
337
338	WARN;
339	if (!PyArg_ParseTuple(args, "t#t#\|nn:rfind", &s, &len, &sub, &n, &i, &last))
340	return NULL;
341
342	if (last > len)
343	last = len;
344	if (last < 0)
345	last += len;
346	if (last < 0)
347	last = 0;
348	if (i < 0)
349	i += len;
350	if (i < 0)
351	i = 0;
352
353	if (n == 0 && i <= last)
354	return PyInt_FromLong((long)last);
355
356	for (j = last-n; j >= i; --j)
357	if (s[j] == sub[0] &&
358	(n == 1 \|\| memcmp(&s[j+1], &sub[1], n-1) == 0))
359	return PyInt_FromLong((long)j);
360
361	return PyInt_FromLong(-1L);
362	}
363
364
365	static PyObject *
366	do_strip(PyObject *args, int striptype)
367	{
368	char *s;
369	Py_ssize_t len, i, j;
370
371
372	if (PyString_AsStringAndSize(args, &s, &len))
373	return NULL;
374
375	i = 0;
376	if (striptype != RIGHTSTRIP) {
377	while (i < len && isspace(Py_CHARMASK(s[i]))) {
378	i++;
379	}
380	}
381
382	j = len;
383	if (striptype != LEFTSTRIP) {
384	do {
385	j--;
386	} while (j >= i && isspace(Py_CHARMASK(s[j])));
387	j++;
388	}
389
390	if (i == 0 && j == len) {
391	Py_INCREF(args);
392	return args;
393	}
394	else
395	return PyString_FromStringAndSize(s+i, j-i);
396	}
397
398
399	PyDoc_STRVAR(strip__doc__,
400	"strip(s) -> string\n"
401	"\n"
402	"Return a copy of the string s with leading and trailing\n"
403	"whitespace removed.");
404
405	static PyObject *
406	strop_strip(PyObject self, PyObject args)
407	{
408	WARN;
409	return do_strip(args, BOTHSTRIP);
410	}
411
412
413	PyDoc_STRVAR(lstrip__doc__,
414	"lstrip(s) -> string\n"
415	"\n"
416	"Return a copy of the string s with leading whitespace removed.");
417
418	static PyObject *
419	strop_lstrip(PyObject self, PyObject args)
420	{
421	WARN;
422	return do_strip(args, LEFTSTRIP);
423	}
424
425
426	PyDoc_STRVAR(rstrip__doc__,
427	"rstrip(s) -> string\n"
428	"\n"
429	"Return a copy of the string s with trailing whitespace removed.");
430
431	static PyObject *
432	strop_rstrip(PyObject self, PyObject args)
433	{
434	WARN;
435	return do_strip(args, RIGHTSTRIP);
436	}
437
438
439	PyDoc_STRVAR(lower__doc__,
440	"lower(s) -> string\n"
441	"\n"
442	"Return a copy of the string s converted to lowercase.");
443
444	static PyObject *
445	strop_lower(PyObject self, PyObject args)
446	{
447	char s, s_new;
448	Py_ssize_t i, n;
449	PyObject *newstr;
450	int changed;
451
452	WARN;
453	if (PyString_AsStringAndSize(args, &s, &n))
454	return NULL;
455	newstr = PyString_FromStringAndSize(NULL, n);
456	if (newstr == NULL)
457	return NULL;
458	s_new = PyString_AsString(newstr);
459	changed = 0;
460	for (i = 0; i < n; i++) {
461	int c = Py_CHARMASK(*s++);
462	if (isupper(c)) {
463	changed = 1;
464	*s_new = tolower(c);
465	} else
466	*s_new = c;
467	s_new++;
468	}
469	if (!changed) {
470	Py_DECREF(newstr);
471	Py_INCREF(args);
472	return args;
473	}
474	return newstr;
475	}
476
477
478	PyDoc_STRVAR(upper__doc__,
479	"upper(s) -> string\n"
480	"\n"
481	"Return a copy of the string s converted to uppercase.");
482
483	static PyObject *
484	strop_upper(PyObject self, PyObject args)
485	{
486	char s, s_new;
487	Py_ssize_t i, n;
488	PyObject *newstr;
489	int changed;
490
491	WARN;
492	if (PyString_AsStringAndSize(args, &s, &n))
493	return NULL;
494	newstr = PyString_FromStringAndSize(NULL, n);
495	if (newstr == NULL)
496	return NULL;
497	s_new = PyString_AsString(newstr);
498	changed = 0;
499	for (i = 0; i < n; i++) {
500	int c = Py_CHARMASK(*s++);
501	if (islower(c)) {
502	changed = 1;
503	*s_new = toupper(c);
504	} else
505	*s_new = c;
506	s_new++;
507	}
508	if (!changed) {
509	Py_DECREF(newstr);
510	Py_INCREF(args);
511	return args;
512	}
513	return newstr;
514	}
515
516
517	PyDoc_STRVAR(capitalize__doc__,
518	"capitalize(s) -> string\n"
519	"\n"
520	"Return a copy of the string s with only its first character\n"
521	"capitalized.");
522
523	static PyObject *
524	strop_capitalize(PyObject self, PyObject args)
525	{
526	char s, s_new;
527	Py_ssize_t i, n;
528	PyObject *newstr;
529	int changed;
530
531	WARN;
532	if (PyString_AsStringAndSize(args, &s, &n))
533	return NULL;
534	newstr = PyString_FromStringAndSize(NULL, n);
535	if (newstr == NULL)
536	return NULL;
537	s_new = PyString_AsString(newstr);
538	changed = 0;
539	if (0 < n) {
540	int c = Py_CHARMASK(*s++);
541	if (islower(c)) {
542	changed = 1;
543	*s_new = toupper(c);
544	} else
545	*s_new = c;
546	s_new++;
547	}
548	for (i = 1; i < n; i++) {
549	int c = Py_CHARMASK(*s++);
550	if (isupper(c)) {
551	changed = 1;
552	*s_new = tolower(c);
553	} else
554	*s_new = c;
555	s_new++;
556	}
557	if (!changed) {
558	Py_DECREF(newstr);
559	Py_INCREF(args);
560	return args;
561	}
562	return newstr;
563	}
564
565
566	PyDoc_STRVAR(expandtabs__doc__,
567	"expandtabs(string, [tabsize]) -> string\n"
568	"\n"
569	"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
570	"depending on the current column and the given tab size (default 8).\n"
571	"The column number is reset to zero after each newline occurring in the\n"
572	"string. This doesn't understand other non-printing characters.");
573
574	static PyObject *
575	strop_expandtabs(PyObject self, PyObject args)
576	{
577	/* Original by Fredrik Lundh */
578	char* e;
579	char* p;
580	char* q;
581	Py_ssize_t i, j;
582	PyObject* out;
583	char* string;
584	Py_ssize_t stringlen;
585	int tabsize = 8;
586
587	WARN;
588	/* Get arguments */
589	if (!PyArg_ParseTuple(args, "s#\|i:expandtabs", &string, &stringlen, &tabsize))
590	return NULL;
591	if (tabsize < 1) {
592	PyErr_SetString(PyExc_ValueError,
593	"tabsize must be at least 1");
594	return NULL;
595	}
596
597	/* First pass: determine size of output string */
598	i = j = 0; /* j: current column; i: total of previous lines */
599	e = string + stringlen;
600	for (p = string; p < e; p++) {
601	if (*p == '\t')
602	j += tabsize - (j%tabsize);
603	else {
604	j++;
605	if (*p == '\n') {
606	i += j;
607	j = 0;
608	}
609	}
610	}
611
612	/* Second pass: create output string and fill it */
613	out = PyString_FromStringAndSize(NULL, i+j);
614	if (out == NULL)
615	return NULL;
616
617	i = 0;
618	q = PyString_AS_STRING(out);
619
620	for (p = string; p < e; p++) {
621	if (*p == '\t') {
622	j = tabsize - (i%tabsize);
623	i += j;
624	while (j-- > 0)
625	*q++ = ' ';
626	} else {
627	q++ = p;
628	i++;
629	if (*p == '\n')
630	i = 0;
631	}
632	}
633
634	return out;
635	}
636
637
638	PyDoc_STRVAR(count__doc__,
639	"count(s, sub[, start[, end]]) -> int\n"
640	"\n"
641	"Return the number of occurrences of substring sub in string\n"
642	"s[start:end]. Optional arguments start and end are\n"
643	"interpreted as in slice notation.");
644
645	static PyObject *
646	strop_count(PyObject self, PyObject args)
647	{
648	char s, sub;
649	Py_ssize_t len, n;
650	Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
651	Py_ssize_t m, r;
652
653	WARN;
654	if (!PyArg_ParseTuple(args, "t#t#\|nn:count", &s, &len, &sub, &n, &i, &last))
655	return NULL;
656	if (last > len)
657	last = len;
658	if (last < 0)
659	last += len;
660	if (last < 0)
661	last = 0;
662	if (i < 0)
663	i += len;
664	if (i < 0)
665	i = 0;
666	m = last + 1 - n;
667	if (n == 0)
668	return PyInt_FromLong((long) (m-i));
669
670	r = 0;
671	while (i < m) {
672	if (!memcmp(s+i, sub, n)) {
673	r++;
674	i += n;
675	} else {
676	i++;
677	}
678	}
679	return PyInt_FromLong((long) r);
680	}
681
682
683	PyDoc_STRVAR(swapcase__doc__,
684	"swapcase(s) -> string\n"
685	"\n"
686	"Return a copy of the string s with upper case characters\n"
687	"converted to lowercase and vice versa.");
688
689	static PyObject *
690	strop_swapcase(PyObject self, PyObject args)
691	{
692	char s, s_new;
693	Py_ssize_t i, n;
694	PyObject *newstr;
695	int changed;
696
697	WARN;
698	if (PyString_AsStringAndSize(args, &s, &n))
699	return NULL;
700	newstr = PyString_FromStringAndSize(NULL, n);
701	if (newstr == NULL)
702	return NULL;
703	s_new = PyString_AsString(newstr);
704	changed = 0;
705	for (i = 0; i < n; i++) {
706	int c = Py_CHARMASK(*s++);
707	if (islower(c)) {
708	changed = 1;
709	*s_new = toupper(c);
710	}
711	else if (isupper(c)) {
712	changed = 1;
713	*s_new = tolower(c);
714	}
715	else
716	*s_new = c;
717	s_new++;
718	}
719	if (!changed) {
720	Py_DECREF(newstr);
721	Py_INCREF(args);
722	return args;
723	}
724	return newstr;
725	}
726
727
728	PyDoc_STRVAR(atoi__doc__,
729	"atoi(s [,base]) -> int\n"
730	"\n"
731	"Return the integer represented by the string s in the given\n"
732	"base, which defaults to 10. The string s must consist of one\n"
733	"or more digits, possibly preceded by a sign. If base is 0, it\n"
734	"is chosen from the leading characters of s, 0 for octal, 0x or\n"
735	"0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
736	"accepted.");
737
738	static PyObject *
739	strop_atoi(PyObject self, PyObject args)
740	{
741	char s, end;
742	int base = 10;
743	long x;
744	char buffer[256]; /* For errors */
745
746	WARN;
747	if (!PyArg_ParseTuple(args, "s\|i:atoi", &s, &base))
748	return NULL;
749
750	if ((base != 0 && base < 2) \|\| base > 36) {
751	PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
752	return NULL;
753	}
754
755	while (s && isspace(Py_CHARMASK(s)))
756	s++;
757	errno = 0;
758	if (base == 0 && s[0] == '0')
759	x = (long) PyOS_strtoul(s, &end, base);
760	else
761	x = PyOS_strtol(s, &end, base);
762	if (end == s \|\| !isalnum(Py_CHARMASK(end[-1])))
763	goto bad;
764	while (end && isspace(Py_CHARMASK(end)))
765	end++;
766	if (*end != '\0') {
767	bad:
768	PyOS_snprintf(buffer, sizeof(buffer),
769	"invalid literal for atoi(): %.200s", s);
770	PyErr_SetString(PyExc_ValueError, buffer);
771	return NULL;
772	}
773	else if (errno != 0) {
774	PyOS_snprintf(buffer, sizeof(buffer),
775	"atoi() literal too large: %.200s", s);
776	PyErr_SetString(PyExc_ValueError, buffer);
777	return NULL;
778	}
779	return PyInt_FromLong(x);
780	}
781
782
783	PyDoc_STRVAR(atol__doc__,
784	"atol(s [,base]) -> long\n"
785	"\n"
786	"Return the long integer represented by the string s in the\n"
787	"given base, which defaults to 10. The string s must consist\n"
788	"of one or more digits, possibly preceded by a sign. If base\n"
789	"is 0, it is chosen from the leading characters of s, 0 for\n"
790	"octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
791	"0x or 0X is accepted. A trailing L or l is not accepted,\n"
792	"unless base is 0.");
793
794	static PyObject *
795	strop_atol(PyObject self, PyObject args)
796	{
797	char s, end;
798	int base = 10;
799	PyObject *x;
800	char buffer[256]; /* For errors */
801
802	WARN;
803	if (!PyArg_ParseTuple(args, "s\|i:atol", &s, &base))
804	return NULL;
805
806	if ((base != 0 && base < 2) \|\| base > 36) {
807	PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
808	return NULL;
809	}
810
811	while (s && isspace(Py_CHARMASK(s)))
812	s++;
813	if (s[0] == '\0') {
814	PyErr_SetString(PyExc_ValueError, "empty string for atol()");
815	return NULL;
816	}
817	x = PyLong_FromString(s, &end, base);
818	if (x == NULL)
819	return NULL;
820	if (base == 0 && (end == 'l' \|\| end == 'L'))
821	end++;
822	while (end && isspace(Py_CHARMASK(end)))
823	end++;
824	if (*end != '\0') {
825	PyOS_snprintf(buffer, sizeof(buffer),
826	"invalid literal for atol(): %.200s", s);
827	PyErr_SetString(PyExc_ValueError, buffer);
828	Py_DECREF(x);
829	return NULL;
830	}
831	return x;
832	}
833
834
835	PyDoc_STRVAR(atof__doc__,
836	"atof(s) -> float\n"
837	"\n"
838	"Return the floating point number represented by the string s.");
839
840	static PyObject *
841	strop_atof(PyObject self, PyObject args)
842	{
843	char s, end;
844	double x;
845	char buffer[256]; /* For errors */
846
847	WARN;
848	if (!PyArg_ParseTuple(args, "s:atof", &s))
849	return NULL;
850	while (s && isspace(Py_CHARMASK(s)))
851	s++;
852	if (s[0] == '\0') {
853	PyErr_SetString(PyExc_ValueError, "empty string for atof()");
854	return NULL;
855	}
856	errno = 0;
857	PyFPE_START_PROTECT("strop_atof", return 0)
858	x = PyOS_ascii_strtod(s, &end);
859	PyFPE_END_PROTECT(x)
860	while (end && isspace(Py_CHARMASK(end)))
861	end++;
862	if (*end != '\0') {
863	PyOS_snprintf(buffer, sizeof(buffer),
864	"invalid literal for atof(): %.200s", s);
865	PyErr_SetString(PyExc_ValueError, buffer);
866	return NULL;
867	}
868	else if (errno != 0) {
869	PyOS_snprintf(buffer, sizeof(buffer),
870	"atof() literal too large: %.200s", s);
871	PyErr_SetString(PyExc_ValueError, buffer);
872	return NULL;
873	}
874	return PyFloat_FromDouble(x);
875	}
876
877
878	PyDoc_STRVAR(maketrans__doc__,
879	"maketrans(frm, to) -> string\n"
880	"\n"
881	"Return a translation table (a string of 256 bytes long)\n"
882	"suitable for use in string.translate. The strings frm and to\n"
883	"must be of the same length.");
884
885	static PyObject *
886	strop_maketrans(PyObject self, PyObject args)
887	{
888	unsigned char c, from=NULL, *to=NULL;
889	Py_ssize_t i, fromlen=0, tolen=0;
890	PyObject *result;
891
892	if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
893	return NULL;
894
895	if (fromlen != tolen) {
896	PyErr_SetString(PyExc_ValueError,
897	"maketrans arguments must have same length");
898	return NULL;
899	}
900
901	result = PyString_FromStringAndSize((char *)NULL, 256);
902	if (result == NULL)
903	return NULL;
904	c = (unsigned char ) PyString_AS_STRING((PyStringObject )result);
905	for (i = 0; i < 256; i++)
906	c[i]=(unsigned char)i;
907	for (i = 0; i < fromlen; i++)
908	c[from[i]]=to[i];
909
910	return result;
911	}
912
913
914	PyDoc_STRVAR(translate__doc__,
915	"translate(s,table [,deletechars]) -> string\n"
916	"\n"
917	"Return a copy of the string s, where all characters occurring\n"
918	"in the optional argument deletechars are removed, and the\n"
919	"remaining characters have been mapped through the given\n"
920	"translation table, which must be a string of length 256.");
921
922	static PyObject *
923	strop_translate(PyObject self, PyObject args)
924	{
925	register char input, table, *output;
926	Py_ssize_t i;
927	int c, changed = 0;
928	PyObject *input_obj;
929	char table1, output_start, *del_table=NULL;
930	Py_ssize_t inlen, tablen, dellen = 0;
931	PyObject *result;
932	int trans_table[256];
933
934	WARN;
935	if (!PyArg_ParseTuple(args, "St#\|t#:translate", &input_obj,
936	&table1, &tablen, &del_table, &dellen))
937	return NULL;
938	if (tablen != 256) {
939	PyErr_SetString(PyExc_ValueError,
940	"translation table must be 256 characters long");
941	return NULL;
942	}
943
944	table = table1;
945	inlen = PyString_GET_SIZE(input_obj);
946	result = PyString_FromStringAndSize((char *)NULL, inlen);
947	if (result == NULL)
948	return NULL;
949	output_start = output = PyString_AsString(result);
950	input = PyString_AsString(input_obj);
951
952	if (dellen == 0) {
953	/* If no deletions are required, use faster code */
954	for (i = inlen; --i >= 0; ) {
955	c = Py_CHARMASK(*input++);
956	if (Py_CHARMASK((*output++ = table[c])) != c)
957	changed = 1;
958	}
959	if (changed)
960	return result;
961	Py_DECREF(result);
962	Py_INCREF(input_obj);
963	return input_obj;
964	}
965
966	for (i = 0; i < 256; i++)
967	trans_table[i] = Py_CHARMASK(table[i]);
968
969	for (i = 0; i < dellen; i++)
970	trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
971
972	for (i = inlen; --i >= 0; ) {
973	c = Py_CHARMASK(*input++);
974	if (trans_table[c] != -1)
975	if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
976	continue;
977	changed = 1;
978	}
979	if (!changed) {
980	Py_DECREF(result);
981	Py_INCREF(input_obj);
982	return input_obj;
983	}
984	/* Fix the size of the resulting string */
985	if (inlen > 0)
986	_PyString_Resize(&result, output - output_start);
987	return result;
988	}
989
990
991	/* What follows is used for implementing replace(). Perry Stoll. */
992
993	/*
994	mymemfind
995
996	strstr replacement for arbitrary blocks of memory.
997
998	Locates the first occurrence in the memory pointed to by MEM of the
999	contents of memory pointed to by PAT. Returns the index into MEM if
1000	found, or -1 if not found. If len of PAT is greater than length of
1001	MEM, the function returns -1.
1002	*/
1003	static Py_ssize_t
1004	mymemfind(const char mem, Py_ssize_t len, const char pat, Py_ssize_t pat_len)
1005	{
1006	register Py_ssize_t ii;
1007
1008	/* pattern can not occur in the last pat_len-1 chars */
1009	len -= pat_len;
1010
1011	for (ii = 0; ii <= len; ii++) {
1012	if (mem[ii] == pat[0] &&
1013	(pat_len == 1 \|\|
1014	memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1015	return ii;
1016	}
1017	}
1018	return -1;
1019	}
1020
1021	/*
1022	mymemcnt
1023
1024	Return the number of distinct times PAT is found in MEM.
1025	meaning mem=1111 and pat==11 returns 2.
1026	mem=11111 and pat==11 also return 2.
1027	*/
1028	static Py_ssize_t
1029	mymemcnt(const char mem, Py_ssize_t len, const char pat, Py_ssize_t pat_len)
1030	{
1031	register Py_ssize_t offset = 0;
1032	Py_ssize_t nfound = 0;
1033
1034	while (len >= 0) {
1035	offset = mymemfind(mem, len, pat, pat_len);
1036	if (offset == -1)
1037	break;
1038	mem += offset + pat_len;
1039	len -= offset + pat_len;
1040	nfound++;
1041	}
1042	return nfound;
1043	}
1044
1045	/*
1046	mymemreplace
1047
1048	Return a string in which all occurrences of PAT in memory STR are
1049	replaced with SUB.
1050
1051	If length of PAT is less than length of STR or there are no occurrences
1052	of PAT in STR, then the original string is returned. Otherwise, a new
1053	string is allocated here and returned.
1054
1055	on return, out_len is:
1056	the length of output string, or
1057	-1 if the input string is returned, or
1058	unchanged if an error occurs (no memory).
1059
1060	return value is:
1061	the new string allocated locally, or
1062	NULL if an error occurred.
1063	*/
1064	static char *
1065	mymemreplace(const char str, Py_ssize_t len, / input string */
1066	const char pat, Py_ssize_t pat_len, / pattern string to find */
1067	const char sub, Py_ssize_t sub_len, / substitution string */
1068	Py_ssize_t count, /* number of replacements */
1069	Py_ssize_t *out_len)
1070	{
1071	char *out_s;
1072	char *new_s;
1073	Py_ssize_t nfound, offset, new_len;
1074
1075	if (len == 0 \|\| pat_len > len)
1076	goto return_same;
1077
1078	/* find length of output string */
1079	nfound = mymemcnt(str, len, pat, pat_len);
1080	if (count < 0)
1081	count = PY_SSIZE_T_MAX;
1082	else if (nfound > count)
1083	nfound = count;
1084	if (nfound == 0)
1085	goto return_same;
1086
1087	new_len = len + nfound*(sub_len - pat_len);
1088	if (new_len == 0) {
1089	/* Have to allocate something for the caller to free(). */
1090	out_s = (char *)PyMem_MALLOC(1);
1091	if (out_s == NULL)
1092	return NULL;
1093	out_s[0] = '\0';
1094	}
1095	else {
1096	assert(new_len > 0);
1097	new_s = (char *)PyMem_MALLOC(new_len);
1098	if (new_s == NULL)
1099	return NULL;
1100	out_s = new_s;
1101
1102	for (; count > 0 && len > 0; --count) {
1103	/* find index of next instance of pattern */
1104	offset = mymemfind(str, len, pat, pat_len);
1105	if (offset == -1)
1106	break;
1107
1108	/* copy non matching part of input string */
1109	memcpy(new_s, str, offset);
1110	str += offset + pat_len;
1111	len -= offset + pat_len;
1112
1113	/* copy substitute into the output string */
1114	new_s += offset;
1115	memcpy(new_s, sub, sub_len);
1116	new_s += sub_len;
1117	}
1118	/* copy any remaining values into output string */
1119	if (len > 0)
1120	memcpy(new_s, str, len);
1121	}
1122	*out_len = new_len;
1123	return out_s;
1124
1125	return_same:
1126	*out_len = -1;
1127	return (char )str; / cast away const */
1128	}
1129
1130
1131	PyDoc_STRVAR(replace__doc__,
1132	"replace (str, old, new[, maxsplit]) -> string\n"
1133	"\n"
1134	"Return a copy of string str with all occurrences of substring\n"
1135	"old replaced by new. If the optional argument maxsplit is\n"
1136	"given, only the first maxsplit occurrences are replaced.");
1137
1138	static PyObject *
1139	strop_replace(PyObject self, PyObject args)
1140	{
1141	char str, pat,sub,new_s;
1142	Py_ssize_t len,pat_len,sub_len,out_len;
1143	Py_ssize_t count = -1;
1144	PyObject *newstr;
1145
1146	WARN;
1147	if (!PyArg_ParseTuple(args, "t#t#t#\|n:replace",
1148	&str, &len, &pat, &pat_len, &sub, &sub_len,
1149	&count))
1150	return NULL;
1151	if (pat_len <= 0) {
1152	PyErr_SetString(PyExc_ValueError, "empty pattern string");
1153	return NULL;
1154	}
1155	/* CAUTION: strop treats a replace count of 0 as infinity, unlke
1156	* current (2.1) string.py and string methods. Preserve this for
1157	* ... well, hard to say for what <wink>.
1158	*/
1159	if (count == 0)
1160	count = -1;
1161	new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1162	if (new_s == NULL) {
1163	PyErr_NoMemory();
1164	return NULL;
1165	}
1166	if (out_len == -1) {
1167	/* we're returning another reference to the input string */
1168	newstr = PyTuple_GetItem(args, 0);
1169	Py_XINCREF(newstr);
1170	}
1171	else {
1172	newstr = PyString_FromStringAndSize(new_s, out_len);
1173	PyMem_FREE(new_s);
1174	}
1175	return newstr;
1176	}
1177
1178
1179	/* List of functions defined in the module */
1180
1181	static PyMethodDef
1182	strop_methods[] = {
1183	{"atof", strop_atof, METH_VARARGS, atof__doc__},
1184	{"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1185	{"atol", strop_atol, METH_VARARGS, atol__doc__},
1186	{"capitalize", strop_capitalize, METH_O, capitalize__doc__},
1187	{"count", strop_count, METH_VARARGS, count__doc__},
1188	{"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1189	{"find", strop_find, METH_VARARGS, find__doc__},
1190	{"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1191	{"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1192	{"lstrip", strop_lstrip, METH_O, lstrip__doc__},
1193	{"lower", strop_lower, METH_O, lower__doc__},
1194	{"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1195	{"replace", strop_replace, METH_VARARGS, replace__doc__},
1196	{"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1197	{"rstrip", strop_rstrip, METH_O, rstrip__doc__},
1198	{"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1199	{"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1200	{"strip", strop_strip, METH_O, strip__doc__},
1201	{"swapcase", strop_swapcase, METH_O, swapcase__doc__},
1202	{"translate", strop_translate, METH_VARARGS, translate__doc__},
1203	{"upper", strop_upper, METH_O, upper__doc__},
1204	{NULL, NULL} /* sentinel */
1205	};
1206
1207
1208	PyMODINIT_FUNC
1209	initstrop(void)
1210	{
1211	PyObject m, s;
1212	char buf[256];
1213	int c, n;
1214	m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1215	(PyObject*)NULL, PYTHON_API_VERSION);
1216	if (m == NULL)
1217	return;
1218
1219	/* Create 'whitespace' object */
1220	n = 0;
1221	for (c = 0; c < 256; c++) {
1222	if (isspace(c))
1223	buf[n++] = c;
1224	}
1225	s = PyString_FromStringAndSize(buf, n);
1226	if (s)
1227	PyModule_AddObject(m, "whitespace", s);
1228
1229	/* Create 'lowercase' object */
1230	n = 0;
1231	for (c = 0; c < 256; c++) {
1232	if (islower(c))
1233	buf[n++] = c;
1234	}
1235	s = PyString_FromStringAndSize(buf, n);
1236	if (s)
1237	PyModule_AddObject(m, "lowercase", s);
1238
1239	/* Create 'uppercase' object */
1240	n = 0;
1241	for (c = 0; c < 256; c++) {
1242	if (isupper(c))
1243	buf[n++] = c;
1244	}
1245	s = PyString_FromStringAndSize(buf, n);
1246	if (s)
1247	PyModule_AddObject(m, "uppercase", s);
1248	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/python/2.5/Modules/stropmodule.c

Download in other formats: