| 1 | /* doop.c
|
|---|
| 2 | *
|
|---|
| 3 | * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
|
|---|
| 4 | * 2000, 2001, 2002, 2004, 2005, 2006, by Larry Wall and others
|
|---|
| 5 | *
|
|---|
| 6 | * You may distribute under the terms of either the GNU General Public
|
|---|
| 7 | * License or the Artistic License, as specified in the README file.
|
|---|
| 8 | *
|
|---|
| 9 | */
|
|---|
| 10 |
|
|---|
| 11 | /*
|
|---|
| 12 | * "'So that was the job I felt I had to do when I started,' thought Sam."
|
|---|
| 13 | */
|
|---|
| 14 |
|
|---|
| 15 | /* This file contains some common functions needed to carry out certain
|
|---|
| 16 | * ops. For example both pp_schomp() and pp_chomp() - scalar and array
|
|---|
| 17 | * chomp operations - call the function do_chomp() found in this file.
|
|---|
| 18 | */
|
|---|
| 19 |
|
|---|
| 20 | #include "EXTERN.h"
|
|---|
| 21 | #define PERL_IN_DOOP_C
|
|---|
| 22 | #include "perl.h"
|
|---|
| 23 |
|
|---|
| 24 | #ifndef PERL_MICRO
|
|---|
| 25 | #include <signal.h>
|
|---|
| 26 | #endif
|
|---|
| 27 |
|
|---|
| 28 | STATIC I32
|
|---|
| 29 | S_do_trans_simple(pTHX_ SV *sv)
|
|---|
| 30 | {
|
|---|
| 31 | U8 *s;
|
|---|
| 32 | U8 *d;
|
|---|
| 33 | const U8 *send;
|
|---|
| 34 | U8 *dstart;
|
|---|
| 35 | I32 matches = 0;
|
|---|
| 36 | const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
|
|---|
| 37 | STRLEN len;
|
|---|
| 38 |
|
|---|
| 39 | const short *tbl = (short*)cPVOP->op_pv;
|
|---|
| 40 | if (!tbl)
|
|---|
| 41 | Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__);
|
|---|
| 42 |
|
|---|
| 43 | s = (U8*)SvPV(sv, len);
|
|---|
| 44 | send = s + len;
|
|---|
| 45 |
|
|---|
| 46 | /* First, take care of non-UTF-8 input strings, because they're easy */
|
|---|
| 47 | if (!SvUTF8(sv)) {
|
|---|
| 48 | while (s < send) {
|
|---|
| 49 | const I32 ch = tbl[*s];
|
|---|
| 50 | if (ch >= 0) {
|
|---|
| 51 | matches++;
|
|---|
| 52 | *s++ = (U8)ch;
|
|---|
| 53 | }
|
|---|
| 54 | else
|
|---|
| 55 | s++;
|
|---|
| 56 | }
|
|---|
| 57 | SvSETMAGIC(sv);
|
|---|
| 58 | return matches;
|
|---|
| 59 | }
|
|---|
| 60 |
|
|---|
| 61 | /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
|
|---|
| 62 | if (grows)
|
|---|
| 63 | Newx(d, len*2+1, U8);
|
|---|
| 64 | else
|
|---|
| 65 | d = s;
|
|---|
| 66 | dstart = d;
|
|---|
| 67 | while (s < send) {
|
|---|
| 68 | STRLEN ulen;
|
|---|
| 69 | I32 ch;
|
|---|
| 70 |
|
|---|
| 71 | /* Need to check this, otherwise 128..255 won't match */
|
|---|
| 72 | const UV c = utf8n_to_uvchr(s, send - s, &ulen, 0);
|
|---|
| 73 | if (c < 0x100 && (ch = tbl[c]) >= 0) {
|
|---|
| 74 | matches++;
|
|---|
| 75 | d = uvchr_to_utf8(d, ch);
|
|---|
| 76 | s += ulen;
|
|---|
| 77 | }
|
|---|
| 78 | else { /* No match -> copy */
|
|---|
| 79 | Move(s, d, ulen, U8);
|
|---|
| 80 | d += ulen;
|
|---|
| 81 | s += ulen;
|
|---|
| 82 | }
|
|---|
| 83 | }
|
|---|
| 84 | if (grows) {
|
|---|
| 85 | sv_setpvn(sv, (char*)dstart, d - dstart);
|
|---|
| 86 | Safefree(dstart);
|
|---|
| 87 | }
|
|---|
| 88 | else {
|
|---|
| 89 | *d = '\0';
|
|---|
| 90 | SvCUR_set(sv, d - dstart);
|
|---|
| 91 | }
|
|---|
| 92 | SvUTF8_on(sv);
|
|---|
| 93 | SvSETMAGIC(sv);
|
|---|
| 94 | return matches;
|
|---|
| 95 | }
|
|---|
| 96 |
|
|---|
| 97 | STATIC I32
|
|---|
| 98 | S_do_trans_count(pTHX_ SV *sv)
|
|---|
| 99 | {
|
|---|
| 100 | const U8 *s;
|
|---|
| 101 | const U8 *send;
|
|---|
| 102 | I32 matches = 0;
|
|---|
| 103 | STRLEN len;
|
|---|
| 104 | const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
|
|---|
| 105 |
|
|---|
| 106 | const short * const tbl = (short*)cPVOP->op_pv;
|
|---|
| 107 | if (!tbl)
|
|---|
| 108 | Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__);
|
|---|
| 109 |
|
|---|
| 110 | s = (const U8*)SvPV_const(sv, len);
|
|---|
| 111 | send = s + len;
|
|---|
| 112 |
|
|---|
| 113 | if (!SvUTF8(sv))
|
|---|
| 114 | while (s < send) {
|
|---|
| 115 | if (tbl[*s++] >= 0)
|
|---|
| 116 | matches++;
|
|---|
| 117 | }
|
|---|
| 118 | else
|
|---|
| 119 | while (s < send) {
|
|---|
| 120 | STRLEN ulen;
|
|---|
| 121 | const UV c = utf8n_to_uvchr((U8 *)s, send - s, &ulen, 0);
|
|---|
| 122 | if (c < 0x100) {
|
|---|
| 123 | if (tbl[c] >= 0)
|
|---|
| 124 | matches++;
|
|---|
| 125 | } else if (complement)
|
|---|
| 126 | matches++;
|
|---|
| 127 | s += ulen;
|
|---|
| 128 | }
|
|---|
| 129 |
|
|---|
| 130 | return matches;
|
|---|
| 131 | }
|
|---|
| 132 |
|
|---|
| 133 | STATIC I32
|
|---|
| 134 | S_do_trans_complex(pTHX_ SV *sv)
|
|---|
| 135 | {
|
|---|
| 136 | U8 *s;
|
|---|
| 137 | U8 *send;
|
|---|
| 138 | U8 *d;
|
|---|
| 139 | U8 *dstart;
|
|---|
| 140 | I32 isutf8;
|
|---|
| 141 | I32 matches = 0;
|
|---|
| 142 | const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
|
|---|
| 143 | const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
|
|---|
| 144 | const I32 del = PL_op->op_private & OPpTRANS_DELETE;
|
|---|
| 145 | STRLEN len, rlen = 0;
|
|---|
| 146 |
|
|---|
| 147 | const short * const tbl = (short*)cPVOP->op_pv;
|
|---|
| 148 | if (!tbl)
|
|---|
| 149 | Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
|
|---|
| 150 |
|
|---|
| 151 | s = (U8*)SvPV(sv, len);
|
|---|
| 152 | isutf8 = SvUTF8(sv);
|
|---|
| 153 | send = s + len;
|
|---|
| 154 |
|
|---|
| 155 | if (!isutf8) {
|
|---|
| 156 | dstart = d = s;
|
|---|
| 157 | if (PL_op->op_private & OPpTRANS_SQUASH) {
|
|---|
| 158 | const U8* p = send;
|
|---|
| 159 | while (s < send) {
|
|---|
| 160 | const I32 ch = tbl[*s];
|
|---|
| 161 | if (ch >= 0) {
|
|---|
| 162 | *d = (U8)ch;
|
|---|
| 163 | matches++;
|
|---|
| 164 | if (p != d - 1 || *p != *d)
|
|---|
| 165 | p = d++;
|
|---|
| 166 | }
|
|---|
| 167 | else if (ch == -1) /* -1 is unmapped character */
|
|---|
| 168 | *d++ = *s;
|
|---|
| 169 | else if (ch == -2) /* -2 is delete character */
|
|---|
| 170 | matches++;
|
|---|
| 171 | s++;
|
|---|
| 172 | }
|
|---|
| 173 | }
|
|---|
| 174 | else {
|
|---|
| 175 | while (s < send) {
|
|---|
| 176 | const I32 ch = tbl[*s];
|
|---|
| 177 | if (ch >= 0) {
|
|---|
| 178 | matches++;
|
|---|
| 179 | *d++ = (U8)ch;
|
|---|
| 180 | }
|
|---|
| 181 | else if (ch == -1) /* -1 is unmapped character */
|
|---|
| 182 | *d++ = *s;
|
|---|
| 183 | else if (ch == -2) /* -2 is delete character */
|
|---|
| 184 | matches++;
|
|---|
| 185 | s++;
|
|---|
| 186 | }
|
|---|
| 187 | }
|
|---|
| 188 | *d = '\0';
|
|---|
| 189 | SvCUR_set(sv, d - dstart);
|
|---|
| 190 | }
|
|---|
| 191 | else { /* isutf8 */
|
|---|
| 192 | if (grows)
|
|---|
| 193 | Newx(d, len*2+1, U8);
|
|---|
| 194 | else
|
|---|
| 195 | d = s;
|
|---|
| 196 | dstart = d;
|
|---|
| 197 | if (complement && !del)
|
|---|
| 198 | rlen = tbl[0x100];
|
|---|
| 199 |
|
|---|
| 200 | #ifdef MACOS_TRADITIONAL
|
|---|
| 201 | #define comp CoMP /* "comp" is a keyword in some compilers ... */
|
|---|
| 202 | #endif
|
|---|
| 203 |
|
|---|
| 204 | if (PL_op->op_private & OPpTRANS_SQUASH) {
|
|---|
| 205 | UV pch = 0xfeedface;
|
|---|
| 206 | while (s < send) {
|
|---|
| 207 | STRLEN len;
|
|---|
| 208 | const UV comp = utf8_to_uvchr(s, &len);
|
|---|
| 209 | I32 ch;
|
|---|
| 210 |
|
|---|
| 211 | if (comp > 0xff) {
|
|---|
| 212 | if (!complement) {
|
|---|
| 213 | Copy(s, d, len, U8);
|
|---|
| 214 | d += len;
|
|---|
| 215 | }
|
|---|
| 216 | else {
|
|---|
| 217 | matches++;
|
|---|
| 218 | if (!del) {
|
|---|
| 219 | ch = (rlen == 0) ? comp :
|
|---|
| 220 | (comp - 0x100 < rlen) ?
|
|---|
| 221 | tbl[comp+1] : tbl[0x100+rlen];
|
|---|
| 222 | if ((UV)ch != pch) {
|
|---|
| 223 | d = uvchr_to_utf8(d, ch);
|
|---|
| 224 | pch = (UV)ch;
|
|---|
| 225 | }
|
|---|
| 226 | s += len;
|
|---|
| 227 | continue;
|
|---|
| 228 | }
|
|---|
| 229 | }
|
|---|
| 230 | }
|
|---|
| 231 | else if ((ch = tbl[comp]) >= 0) {
|
|---|
| 232 | matches++;
|
|---|
| 233 | if ((UV)ch != pch) {
|
|---|
| 234 | d = uvchr_to_utf8(d, ch);
|
|---|
| 235 | pch = (UV)ch;
|
|---|
| 236 | }
|
|---|
| 237 | s += len;
|
|---|
| 238 | continue;
|
|---|
| 239 | }
|
|---|
| 240 | else if (ch == -1) { /* -1 is unmapped character */
|
|---|
| 241 | Copy(s, d, len, U8);
|
|---|
| 242 | d += len;
|
|---|
| 243 | }
|
|---|
| 244 | else if (ch == -2) /* -2 is delete character */
|
|---|
| 245 | matches++;
|
|---|
| 246 | s += len;
|
|---|
| 247 | pch = 0xfeedface;
|
|---|
| 248 | }
|
|---|
| 249 | }
|
|---|
| 250 | else {
|
|---|
| 251 | while (s < send) {
|
|---|
| 252 | STRLEN len;
|
|---|
| 253 | const UV comp = utf8_to_uvchr(s, &len);
|
|---|
| 254 | I32 ch;
|
|---|
| 255 | if (comp > 0xff) {
|
|---|
| 256 | if (!complement) {
|
|---|
| 257 | Move(s, d, len, U8);
|
|---|
| 258 | d += len;
|
|---|
| 259 | }
|
|---|
| 260 | else {
|
|---|
| 261 | matches++;
|
|---|
| 262 | if (!del) {
|
|---|
| 263 | if (comp - 0x100 < rlen)
|
|---|
| 264 | d = uvchr_to_utf8(d, tbl[comp+1]);
|
|---|
| 265 | else
|
|---|
| 266 | d = uvchr_to_utf8(d, tbl[0x100+rlen]);
|
|---|
| 267 | }
|
|---|
| 268 | }
|
|---|
| 269 | }
|
|---|
| 270 | else if ((ch = tbl[comp]) >= 0) {
|
|---|
| 271 | d = uvchr_to_utf8(d, ch);
|
|---|
| 272 | matches++;
|
|---|
| 273 | }
|
|---|
| 274 | else if (ch == -1) { /* -1 is unmapped character */
|
|---|
| 275 | Copy(s, d, len, U8);
|
|---|
| 276 | d += len;
|
|---|
| 277 | }
|
|---|
| 278 | else if (ch == -2) /* -2 is delete character */
|
|---|
| 279 | matches++;
|
|---|
| 280 | s += len;
|
|---|
| 281 | }
|
|---|
| 282 | }
|
|---|
| 283 | if (grows) {
|
|---|
| 284 | sv_setpvn(sv, (char*)dstart, d - dstart);
|
|---|
| 285 | Safefree(dstart);
|
|---|
| 286 | }
|
|---|
| 287 | else {
|
|---|
| 288 | *d = '\0';
|
|---|
| 289 | SvCUR_set(sv, d - dstart);
|
|---|
| 290 | }
|
|---|
| 291 | SvUTF8_on(sv);
|
|---|
| 292 | }
|
|---|
| 293 | SvSETMAGIC(sv);
|
|---|
| 294 | return matches;
|
|---|
| 295 | }
|
|---|
| 296 |
|
|---|
| 297 | STATIC I32
|
|---|
| 298 | S_do_trans_simple_utf8(pTHX_ SV *sv)
|
|---|
| 299 | {
|
|---|
| 300 | U8 *s;
|
|---|
| 301 | U8 *send;
|
|---|
| 302 | U8 *d;
|
|---|
| 303 | U8 *start;
|
|---|
| 304 | U8 *dstart, *dend;
|
|---|
| 305 | I32 matches = 0;
|
|---|
| 306 | const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
|
|---|
| 307 | STRLEN len;
|
|---|
| 308 |
|
|---|
| 309 | SV* const rv = (SV*)cSVOP->op_sv;
|
|---|
| 310 | HV* const hv = (HV*)SvRV(rv);
|
|---|
| 311 | SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
|
|---|
| 312 | const UV none = svp ? SvUV(*svp) : 0x7fffffff;
|
|---|
| 313 | const UV extra = none + 1;
|
|---|
| 314 | UV final = 0;
|
|---|
| 315 | UV uv;
|
|---|
| 316 | I32 isutf8;
|
|---|
| 317 | U8 hibit = 0;
|
|---|
| 318 |
|
|---|
| 319 | s = (U8*)SvPV(sv, len);
|
|---|
| 320 | isutf8 = SvUTF8(sv);
|
|---|
| 321 | if (!isutf8) {
|
|---|
| 322 | const U8 *t = s, *e = s + len;
|
|---|
| 323 | while (t < e) {
|
|---|
| 324 | const U8 ch = *t++;
|
|---|
| 325 | if ((hibit = !NATIVE_IS_INVARIANT(ch)))
|
|---|
| 326 | break;
|
|---|
| 327 | }
|
|---|
| 328 | if (hibit)
|
|---|
| 329 | s = bytes_to_utf8(s, &len);
|
|---|
| 330 | }
|
|---|
| 331 | send = s + len;
|
|---|
| 332 | start = s;
|
|---|
| 333 |
|
|---|
| 334 | svp = hv_fetch(hv, "FINAL", 5, FALSE);
|
|---|
| 335 | if (svp)
|
|---|
| 336 | final = SvUV(*svp);
|
|---|
| 337 |
|
|---|
| 338 | if (grows) {
|
|---|
| 339 | /* d needs to be bigger than s, in case e.g. upgrading is required */
|
|---|
| 340 | Newx(d, len * 3 + UTF8_MAXBYTES, U8);
|
|---|
| 341 | dend = d + len * 3;
|
|---|
| 342 | dstart = d;
|
|---|
| 343 | }
|
|---|
| 344 | else {
|
|---|
| 345 | dstart = d = s;
|
|---|
| 346 | dend = d + len;
|
|---|
| 347 | }
|
|---|
| 348 |
|
|---|
| 349 | while (s < send) {
|
|---|
| 350 | if ((uv = swash_fetch(rv, s, TRUE)) < none) {
|
|---|
| 351 | s += UTF8SKIP(s);
|
|---|
| 352 | matches++;
|
|---|
| 353 | d = uvuni_to_utf8(d, uv);
|
|---|
| 354 | }
|
|---|
| 355 | else if (uv == none) {
|
|---|
| 356 | const int i = UTF8SKIP(s);
|
|---|
| 357 | Move(s, d, i, U8);
|
|---|
| 358 | d += i;
|
|---|
| 359 | s += i;
|
|---|
| 360 | }
|
|---|
| 361 | else if (uv == extra) {
|
|---|
| 362 | s += UTF8SKIP(s);
|
|---|
| 363 | matches++;
|
|---|
| 364 | d = uvuni_to_utf8(d, final);
|
|---|
| 365 | }
|
|---|
| 366 | else
|
|---|
| 367 | s += UTF8SKIP(s);
|
|---|
| 368 |
|
|---|
| 369 | if (d > dend) {
|
|---|
| 370 | const STRLEN clen = d - dstart;
|
|---|
| 371 | const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
|
|---|
| 372 | if (!grows)
|
|---|
| 373 | Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__);
|
|---|
| 374 | Renew(dstart, nlen + UTF8_MAXBYTES, U8);
|
|---|
| 375 | d = dstart + clen;
|
|---|
| 376 | dend = dstart + nlen;
|
|---|
| 377 | }
|
|---|
| 378 | }
|
|---|
| 379 | if (grows || hibit) {
|
|---|
| 380 | sv_setpvn(sv, (char*)dstart, d - dstart);
|
|---|
| 381 | Safefree(dstart);
|
|---|
| 382 | if (grows && hibit)
|
|---|
| 383 | Safefree(start);
|
|---|
| 384 | }
|
|---|
| 385 | else {
|
|---|
| 386 | *d = '\0';
|
|---|
| 387 | SvCUR_set(sv, d - dstart);
|
|---|
| 388 | }
|
|---|
| 389 | SvSETMAGIC(sv);
|
|---|
| 390 | SvUTF8_on(sv);
|
|---|
| 391 |
|
|---|
| 392 | return matches;
|
|---|
| 393 | }
|
|---|
| 394 |
|
|---|
| 395 | STATIC I32
|
|---|
| 396 | S_do_trans_count_utf8(pTHX_ SV *sv)
|
|---|
| 397 | {
|
|---|
| 398 | const U8 *s;
|
|---|
| 399 | const U8 *start = 0, *send;
|
|---|
| 400 | I32 matches = 0;
|
|---|
| 401 | STRLEN len;
|
|---|
| 402 |
|
|---|
| 403 | SV* const rv = (SV*)cSVOP->op_sv;
|
|---|
| 404 | HV* const hv = (HV*)SvRV(rv);
|
|---|
| 405 | SV** const svp = hv_fetch(hv, "NONE", 4, FALSE);
|
|---|
| 406 | const UV none = svp ? SvUV(*svp) : 0x7fffffff;
|
|---|
| 407 | const UV extra = none + 1;
|
|---|
| 408 | U8 hibit = 0;
|
|---|
| 409 |
|
|---|
| 410 | s = (const U8*)SvPV_const(sv, len);
|
|---|
| 411 | if (!SvUTF8(sv)) {
|
|---|
| 412 | const U8 *t = s;
|
|---|
| 413 | const U8 *e = s + len;
|
|---|
| 414 | while (t < e) {
|
|---|
| 415 | const U8 ch = *t++;
|
|---|
| 416 | if ((hibit = !NATIVE_IS_INVARIANT(ch)))
|
|---|
| 417 | break;
|
|---|
| 418 | }
|
|---|
| 419 | if (hibit)
|
|---|
| 420 | start = s = bytes_to_utf8((U8 *)s, &len);
|
|---|
| 421 | }
|
|---|
| 422 | send = s + len;
|
|---|
| 423 |
|
|---|
| 424 | while (s < send) {
|
|---|
| 425 | UV uv;
|
|---|
| 426 | if ((uv = swash_fetch(rv, (U8 *)s, TRUE)) < none || uv == extra)
|
|---|
| 427 | matches++;
|
|---|
| 428 | s += UTF8SKIP(s);
|
|---|
| 429 | }
|
|---|
| 430 | if (hibit)
|
|---|
| 431 | Safefree(start);
|
|---|
| 432 |
|
|---|
| 433 | return matches;
|
|---|
| 434 | }
|
|---|
| 435 |
|
|---|
| 436 | STATIC I32
|
|---|
| 437 | S_do_trans_complex_utf8(pTHX_ SV *sv)
|
|---|
| 438 | {
|
|---|
| 439 | U8 *start, *send;
|
|---|
| 440 | U8 *d;
|
|---|
| 441 | I32 matches = 0;
|
|---|
| 442 | const I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
|
|---|
| 443 | const I32 del = PL_op->op_private & OPpTRANS_DELETE;
|
|---|
| 444 | const I32 grows = PL_op->op_private & OPpTRANS_GROWS;
|
|---|
| 445 | SV * const rv = (SV*)cSVOP->op_sv;
|
|---|
| 446 | HV * const hv = (HV*)SvRV(rv);
|
|---|
| 447 | SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
|
|---|
| 448 | const UV none = svp ? SvUV(*svp) : 0x7fffffff;
|
|---|
| 449 | const UV extra = none + 1;
|
|---|
| 450 | UV final = 0;
|
|---|
| 451 | bool havefinal = FALSE;
|
|---|
| 452 | STRLEN len;
|
|---|
| 453 | U8 *dstart, *dend;
|
|---|
| 454 | U8 hibit = 0;
|
|---|
| 455 |
|
|---|
| 456 | U8 *s = (U8*)SvPV(sv, len);
|
|---|
| 457 | const I32 isutf8 = SvUTF8(sv);
|
|---|
| 458 | if (!isutf8) {
|
|---|
| 459 | const U8 *t = s;
|
|---|
| 460 | const U8 * const e = s + len;
|
|---|
| 461 | while (t < e) {
|
|---|
| 462 | const U8 ch = *t++;
|
|---|
| 463 | if ((hibit = !NATIVE_IS_INVARIANT(ch)))
|
|---|
| 464 | break;
|
|---|
| 465 | }
|
|---|
| 466 | if (hibit)
|
|---|
| 467 | s = bytes_to_utf8(s, &len);
|
|---|
| 468 | }
|
|---|
| 469 | send = s + len;
|
|---|
| 470 | start = s;
|
|---|
| 471 |
|
|---|
| 472 | svp = hv_fetch(hv, "FINAL", 5, FALSE);
|
|---|
| 473 | if (svp) {
|
|---|
| 474 | final = SvUV(*svp);
|
|---|
| 475 | havefinal = TRUE;
|
|---|
| 476 | }
|
|---|
| 477 |
|
|---|
| 478 | if (grows) {
|
|---|
| 479 | /* d needs to be bigger than s, in case e.g. upgrading is required */
|
|---|
| 480 | Newx(d, len * 3 + UTF8_MAXBYTES, U8);
|
|---|
| 481 | dend = d + len * 3;
|
|---|
| 482 | dstart = d;
|
|---|
| 483 | }
|
|---|
| 484 | else {
|
|---|
| 485 | dstart = d = s;
|
|---|
| 486 | dend = d + len;
|
|---|
| 487 | }
|
|---|
| 488 |
|
|---|
| 489 | if (squash) {
|
|---|
| 490 | UV puv = 0xfeedface;
|
|---|
| 491 | while (s < send) {
|
|---|
| 492 | UV uv = swash_fetch(rv, s, TRUE);
|
|---|
| 493 |
|
|---|
| 494 | if (d > dend) {
|
|---|
| 495 | const STRLEN clen = d - dstart;
|
|---|
| 496 | const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
|
|---|
| 497 | if (!grows)
|
|---|
| 498 | Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
|
|---|
| 499 | Renew(dstart, nlen + UTF8_MAXBYTES, U8);
|
|---|
| 500 | d = dstart + clen;
|
|---|
| 501 | dend = dstart + nlen;
|
|---|
| 502 | }
|
|---|
| 503 | if (uv < none) {
|
|---|
| 504 | matches++;
|
|---|
| 505 | s += UTF8SKIP(s);
|
|---|
| 506 | if (uv != puv) {
|
|---|
| 507 | d = uvuni_to_utf8(d, uv);
|
|---|
| 508 | puv = uv;
|
|---|
| 509 | }
|
|---|
| 510 | continue;
|
|---|
| 511 | }
|
|---|
| 512 | else if (uv == none) { /* "none" is unmapped character */
|
|---|
| 513 | const int i = UTF8SKIP(s);
|
|---|
| 514 | Move(s, d, i, U8);
|
|---|
| 515 | d += i;
|
|---|
| 516 | s += i;
|
|---|
| 517 | puv = 0xfeedface;
|
|---|
| 518 | continue;
|
|---|
| 519 | }
|
|---|
| 520 | else if (uv == extra && !del) {
|
|---|
| 521 | matches++;
|
|---|
| 522 | if (havefinal) {
|
|---|
| 523 | s += UTF8SKIP(s);
|
|---|
| 524 | if (puv != final) {
|
|---|
| 525 | d = uvuni_to_utf8(d, final);
|
|---|
| 526 | puv = final;
|
|---|
| 527 | }
|
|---|
| 528 | }
|
|---|
| 529 | else {
|
|---|
| 530 | STRLEN len;
|
|---|
| 531 | uv = utf8_to_uvuni(s, &len);
|
|---|
| 532 | if (uv != puv) {
|
|---|
| 533 | Move(s, d, len, U8);
|
|---|
| 534 | d += len;
|
|---|
| 535 | puv = uv;
|
|---|
| 536 | }
|
|---|
| 537 | s += len;
|
|---|
| 538 | }
|
|---|
| 539 | continue;
|
|---|
| 540 | }
|
|---|
| 541 | matches++; /* "none+1" is delete character */
|
|---|
| 542 | s += UTF8SKIP(s);
|
|---|
| 543 | }
|
|---|
| 544 | }
|
|---|
| 545 | else {
|
|---|
| 546 | while (s < send) {
|
|---|
| 547 | const UV uv = swash_fetch(rv, s, TRUE);
|
|---|
| 548 | if (d > dend) {
|
|---|
| 549 | const STRLEN clen = d - dstart;
|
|---|
| 550 | const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES;
|
|---|
| 551 | if (!grows)
|
|---|
| 552 | Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__);
|
|---|
| 553 | Renew(dstart, nlen + UTF8_MAXBYTES, U8);
|
|---|
| 554 | d = dstart + clen;
|
|---|
| 555 | dend = dstart + nlen;
|
|---|
| 556 | }
|
|---|
| 557 | if (uv < none) {
|
|---|
| 558 | matches++;
|
|---|
| 559 | s += UTF8SKIP(s);
|
|---|
| 560 | d = uvuni_to_utf8(d, uv);
|
|---|
| 561 | continue;
|
|---|
| 562 | }
|
|---|
| 563 | else if (uv == none) { /* "none" is unmapped character */
|
|---|
| 564 | const int i = UTF8SKIP(s);
|
|---|
| 565 | Move(s, d, i, U8);
|
|---|
| 566 | d += i;
|
|---|
| 567 | s += i;
|
|---|
| 568 | continue;
|
|---|
| 569 | }
|
|---|
| 570 | else if (uv == extra && !del) {
|
|---|
| 571 | matches++;
|
|---|
| 572 | s += UTF8SKIP(s);
|
|---|
| 573 | d = uvuni_to_utf8(d, final);
|
|---|
| 574 | continue;
|
|---|
| 575 | }
|
|---|
| 576 | matches++; /* "none+1" is delete character */
|
|---|
| 577 | s += UTF8SKIP(s);
|
|---|
| 578 | }
|
|---|
| 579 | }
|
|---|
| 580 | if (grows || hibit) {
|
|---|
| 581 | sv_setpvn(sv, (char*)dstart, d - dstart);
|
|---|
| 582 | Safefree(dstart);
|
|---|
| 583 | if (grows && hibit)
|
|---|
| 584 | Safefree(start);
|
|---|
| 585 | }
|
|---|
| 586 | else {
|
|---|
| 587 | *d = '\0';
|
|---|
| 588 | SvCUR_set(sv, d - dstart);
|
|---|
| 589 | }
|
|---|
| 590 | SvUTF8_on(sv);
|
|---|
| 591 | SvSETMAGIC(sv);
|
|---|
| 592 |
|
|---|
| 593 | return matches;
|
|---|
| 594 | }
|
|---|
| 595 |
|
|---|
| 596 | I32
|
|---|
| 597 | Perl_do_trans(pTHX_ SV *sv)
|
|---|
| 598 | {
|
|---|
| 599 | STRLEN len;
|
|---|
| 600 | const I32 hasutf = (PL_op->op_private &
|
|---|
| 601 | (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
|
|---|
| 602 |
|
|---|
| 603 | if (SvREADONLY(sv)) {
|
|---|
| 604 | if (SvFAKE(sv))
|
|---|
| 605 | sv_force_normal(sv);
|
|---|
| 606 | if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
|
|---|
| 607 | Perl_croak(aTHX_ PL_no_modify);
|
|---|
| 608 | }
|
|---|
| 609 | (void)SvPV_const(sv, len);
|
|---|
| 610 | if (!len)
|
|---|
| 611 | return 0;
|
|---|
| 612 | if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) {
|
|---|
| 613 | if (!SvPOKp(sv))
|
|---|
| 614 | (void)SvPV_force(sv, len);
|
|---|
| 615 | (void)SvPOK_only_UTF8(sv);
|
|---|
| 616 | }
|
|---|
| 617 |
|
|---|
| 618 | DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
|
|---|
| 619 |
|
|---|
| 620 | switch (PL_op->op_private & ~hasutf & (
|
|---|
| 621 | OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL|
|
|---|
| 622 | OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) {
|
|---|
| 623 | case 0:
|
|---|
| 624 | if (hasutf)
|
|---|
| 625 | return do_trans_simple_utf8(sv);
|
|---|
| 626 | else
|
|---|
| 627 | return do_trans_simple(sv);
|
|---|
| 628 |
|
|---|
| 629 | case OPpTRANS_IDENTICAL:
|
|---|
| 630 | case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT:
|
|---|
| 631 | if (hasutf)
|
|---|
| 632 | return do_trans_count_utf8(sv);
|
|---|
| 633 | else
|
|---|
| 634 | return do_trans_count(sv);
|
|---|
| 635 |
|
|---|
| 636 | default:
|
|---|
| 637 | if (hasutf)
|
|---|
| 638 | return do_trans_complex_utf8(sv);
|
|---|
| 639 | else
|
|---|
| 640 | return do_trans_complex(sv);
|
|---|
| 641 | }
|
|---|
| 642 | }
|
|---|
| 643 |
|
|---|
| 644 | void
|
|---|
| 645 | Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **sp)
|
|---|
| 646 | {
|
|---|
| 647 | SV ** const oldmark = mark;
|
|---|
| 648 | register I32 items = sp - mark;
|
|---|
| 649 | register STRLEN len;
|
|---|
| 650 | STRLEN delimlen;
|
|---|
| 651 |
|
|---|
| 652 | (void) SvPV_const(del, delimlen); /* stringify and get the delimlen */
|
|---|
| 653 | /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */
|
|---|
| 654 |
|
|---|
| 655 | mark++;
|
|---|
| 656 | len = (items > 0 ? (delimlen * (items - 1) ) : 0);
|
|---|
| 657 | (void)SvUPGRADE(sv, SVt_PV);
|
|---|
| 658 | if (SvLEN(sv) < len + items) { /* current length is way too short */
|
|---|
| 659 | while (items-- > 0) {
|
|---|
| 660 | if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) {
|
|---|
| 661 | STRLEN tmplen;
|
|---|
| 662 | SvPV_const(*mark, tmplen);
|
|---|
| 663 | len += tmplen;
|
|---|
| 664 | }
|
|---|
| 665 | mark++;
|
|---|
| 666 | }
|
|---|
| 667 | SvGROW(sv, len + 1); /* so try to pre-extend */
|
|---|
| 668 |
|
|---|
| 669 | mark = oldmark;
|
|---|
| 670 | items = sp - mark;
|
|---|
| 671 | ++mark;
|
|---|
| 672 | }
|
|---|
| 673 |
|
|---|
| 674 | sv_setpvn(sv, "", 0);
|
|---|
| 675 | /* sv_setpv retains old UTF8ness [perl #24846] */
|
|---|
| 676 | SvUTF8_off(sv);
|
|---|
| 677 |
|
|---|
| 678 | if (PL_tainting && SvMAGICAL(sv))
|
|---|
| 679 | SvTAINTED_off(sv);
|
|---|
| 680 |
|
|---|
| 681 | if (items-- > 0) {
|
|---|
| 682 | if (*mark)
|
|---|
| 683 | sv_catsv(sv, *mark);
|
|---|
| 684 | mark++;
|
|---|
| 685 | }
|
|---|
| 686 |
|
|---|
| 687 | if (delimlen) {
|
|---|
| 688 | for (; items > 0; items--,mark++) {
|
|---|
| 689 | sv_catsv(sv,del);
|
|---|
| 690 | sv_catsv(sv,*mark);
|
|---|
| 691 | }
|
|---|
| 692 | }
|
|---|
| 693 | else {
|
|---|
| 694 | for (; items > 0; items--,mark++)
|
|---|
| 695 | sv_catsv(sv,*mark);
|
|---|
| 696 | }
|
|---|
| 697 | SvSETMAGIC(sv);
|
|---|
| 698 | }
|
|---|
| 699 |
|
|---|
| 700 | void
|
|---|
| 701 | Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
|
|---|
| 702 | {
|
|---|
| 703 | STRLEN patlen;
|
|---|
| 704 | const char * const pat = SvPV_const(*sarg, patlen);
|
|---|
| 705 | bool do_taint = FALSE;
|
|---|
| 706 |
|
|---|
| 707 | SvUTF8_off(sv);
|
|---|
| 708 | if (DO_UTF8(*sarg))
|
|---|
| 709 | SvUTF8_on(sv);
|
|---|
| 710 | sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint);
|
|---|
| 711 | SvSETMAGIC(sv);
|
|---|
| 712 | if (do_taint)
|
|---|
| 713 | SvTAINTED_on(sv);
|
|---|
| 714 | }
|
|---|
| 715 |
|
|---|
| 716 | /* currently converts input to bytes if possible, but doesn't sweat failure */
|
|---|
| 717 | UV
|
|---|
| 718 | Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
|
|---|
| 719 | {
|
|---|
| 720 | STRLEN srclen, len;
|
|---|
| 721 | const unsigned char *s = (const unsigned char *) SvPV_const(sv, srclen);
|
|---|
| 722 | UV retnum = 0;
|
|---|
| 723 |
|
|---|
| 724 | if (offset < 0)
|
|---|
| 725 | return retnum;
|
|---|
| 726 | if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
|
|---|
| 727 | Perl_croak(aTHX_ "Illegal number of bits in vec");
|
|---|
| 728 |
|
|---|
| 729 | if (SvUTF8(sv))
|
|---|
| 730 | (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
|
|---|
| 731 |
|
|---|
| 732 | offset *= size; /* turn into bit offset */
|
|---|
| 733 | len = (offset + size + 7) / 8; /* required number of bytes */
|
|---|
| 734 | if (len > srclen) {
|
|---|
| 735 | if (size <= 8)
|
|---|
| 736 | retnum = 0;
|
|---|
| 737 | else {
|
|---|
| 738 | offset >>= 3; /* turn into byte offset */
|
|---|
| 739 | if (size == 16) {
|
|---|
| 740 | if ((STRLEN)offset >= srclen)
|
|---|
| 741 | retnum = 0;
|
|---|
| 742 | else
|
|---|
| 743 | retnum = (UV) s[offset] << 8;
|
|---|
| 744 | }
|
|---|
| 745 | else if (size == 32) {
|
|---|
| 746 | if ((STRLEN)offset >= srclen)
|
|---|
| 747 | retnum = 0;
|
|---|
| 748 | else if ((STRLEN)(offset + 1) >= srclen)
|
|---|
| 749 | retnum =
|
|---|
| 750 | ((UV) s[offset ] << 24);
|
|---|
| 751 | else if ((STRLEN)(offset + 2) >= srclen)
|
|---|
| 752 | retnum =
|
|---|
| 753 | ((UV) s[offset ] << 24) +
|
|---|
| 754 | ((UV) s[offset + 1] << 16);
|
|---|
| 755 | else
|
|---|
| 756 | retnum =
|
|---|
| 757 | ((UV) s[offset ] << 24) +
|
|---|
| 758 | ((UV) s[offset + 1] << 16) +
|
|---|
| 759 | ( s[offset + 2] << 8);
|
|---|
| 760 | }
|
|---|
| 761 | #ifdef UV_IS_QUAD
|
|---|
| 762 | else if (size == 64) {
|
|---|
| 763 | if (ckWARN(WARN_PORTABLE))
|
|---|
| 764 | Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
|
|---|
| 765 | "Bit vector size > 32 non-portable");
|
|---|
| 766 | if (offset >= srclen)
|
|---|
| 767 | retnum = 0;
|
|---|
| 768 | else if (offset + 1 >= srclen)
|
|---|
| 769 | retnum =
|
|---|
| 770 | (UV) s[offset ] << 56;
|
|---|
| 771 | else if (offset + 2 >= srclen)
|
|---|
| 772 | retnum =
|
|---|
| 773 | ((UV) s[offset ] << 56) +
|
|---|
| 774 | ((UV) s[offset + 1] << 48);
|
|---|
| 775 | else if (offset + 3 >= srclen)
|
|---|
| 776 | retnum =
|
|---|
| 777 | ((UV) s[offset ] << 56) +
|
|---|
| 778 | ((UV) s[offset + 1] << 48) +
|
|---|
| 779 | ((UV) s[offset + 2] << 40);
|
|---|
| 780 | else if (offset + 4 >= srclen)
|
|---|
| 781 | retnum =
|
|---|
| 782 | ((UV) s[offset ] << 56) +
|
|---|
| 783 | ((UV) s[offset + 1] << 48) +
|
|---|
| 784 | ((UV) s[offset + 2] << 40) +
|
|---|
| 785 | ((UV) s[offset + 3] << 32);
|
|---|
| 786 | else if (offset + 5 >= srclen)
|
|---|
| 787 | retnum =
|
|---|
| 788 | ((UV) s[offset ] << 56) +
|
|---|
| 789 | ((UV) s[offset + 1] << 48) +
|
|---|
| 790 | ((UV) s[offset + 2] << 40) +
|
|---|
| 791 | ((UV) s[offset + 3] << 32) +
|
|---|
| 792 | ( s[offset + 4] << 24);
|
|---|
| 793 | else if (offset + 6 >= srclen)
|
|---|
| 794 | retnum =
|
|---|
| 795 | ((UV) s[offset ] << 56) +
|
|---|
| 796 | ((UV) s[offset + 1] << 48) +
|
|---|
| 797 | ((UV) s[offset + 2] << 40) +
|
|---|
| 798 | ((UV) s[offset + 3] << 32) +
|
|---|
| 799 | ((UV) s[offset + 4] << 24) +
|
|---|
| 800 | ((UV) s[offset + 5] << 16);
|
|---|
| 801 | else
|
|---|
| 802 | retnum =
|
|---|
| 803 | ((UV) s[offset ] << 56) +
|
|---|
| 804 | ((UV) s[offset + 1] << 48) +
|
|---|
| 805 | ((UV) s[offset + 2] << 40) +
|
|---|
| 806 | ((UV) s[offset + 3] << 32) +
|
|---|
| 807 | ((UV) s[offset + 4] << 24) +
|
|---|
| 808 | ((UV) s[offset + 5] << 16) +
|
|---|
| 809 | ( s[offset + 6] << 8);
|
|---|
| 810 | }
|
|---|
| 811 | #endif
|
|---|
| 812 | }
|
|---|
| 813 | }
|
|---|
| 814 | else if (size < 8)
|
|---|
| 815 | retnum = (s[offset >> 3] >> (offset & 7)) & ((1 << size) - 1);
|
|---|
| 816 | else {
|
|---|
| 817 | offset >>= 3; /* turn into byte offset */
|
|---|
| 818 | if (size == 8)
|
|---|
| 819 | retnum = s[offset];
|
|---|
| 820 | else if (size == 16)
|
|---|
| 821 | retnum =
|
|---|
| 822 | ((UV) s[offset] << 8) +
|
|---|
| 823 | s[offset + 1];
|
|---|
| 824 | else if (size == 32)
|
|---|
| 825 | retnum =
|
|---|
| 826 | ((UV) s[offset ] << 24) +
|
|---|
| 827 | ((UV) s[offset + 1] << 16) +
|
|---|
| 828 | ( s[offset + 2] << 8) +
|
|---|
| 829 | s[offset + 3];
|
|---|
| 830 | #ifdef UV_IS_QUAD
|
|---|
| 831 | else if (size == 64) {
|
|---|
| 832 | if (ckWARN(WARN_PORTABLE))
|
|---|
| 833 | Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
|
|---|
| 834 | "Bit vector size > 32 non-portable");
|
|---|
| 835 | retnum =
|
|---|
| 836 | ((UV) s[offset ] << 56) +
|
|---|
| 837 | ((UV) s[offset + 1] << 48) +
|
|---|
| 838 | ((UV) s[offset + 2] << 40) +
|
|---|
| 839 | ((UV) s[offset + 3] << 32) +
|
|---|
| 840 | ((UV) s[offset + 4] << 24) +
|
|---|
| 841 | ((UV) s[offset + 5] << 16) +
|
|---|
| 842 | ( s[offset + 6] << 8) +
|
|---|
| 843 | s[offset + 7];
|
|---|
| 844 | }
|
|---|
| 845 | #endif
|
|---|
| 846 | }
|
|---|
| 847 |
|
|---|
| 848 | return retnum;
|
|---|
| 849 | }
|
|---|
| 850 |
|
|---|
| 851 | /* currently converts input to bytes if possible but doesn't sweat failures,
|
|---|
| 852 | * although it does ensure that the string it clobbers is not marked as
|
|---|
| 853 | * utf8-valid any more
|
|---|
| 854 | */
|
|---|
| 855 | void
|
|---|
| 856 | Perl_do_vecset(pTHX_ SV *sv)
|
|---|
| 857 | {
|
|---|
| 858 | SV *targ = LvTARG(sv);
|
|---|
| 859 | register I32 offset;
|
|---|
| 860 | register I32 size;
|
|---|
| 861 | register unsigned char *s;
|
|---|
| 862 | register UV lval;
|
|---|
| 863 | I32 mask;
|
|---|
| 864 | STRLEN targlen;
|
|---|
| 865 | STRLEN len;
|
|---|
| 866 |
|
|---|
| 867 | if (!targ)
|
|---|
| 868 | return;
|
|---|
| 869 | s = (unsigned char*)SvPV_force(targ, targlen);
|
|---|
| 870 | if (SvUTF8(targ)) {
|
|---|
| 871 | /* This is handled by the SvPOK_only below...
|
|---|
| 872 | if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
|
|---|
| 873 | SvUTF8_off(targ);
|
|---|
| 874 | */
|
|---|
| 875 | (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
|
|---|
| 876 | }
|
|---|
| 877 |
|
|---|
| 878 | (void)SvPOK_only(targ);
|
|---|
| 879 | lval = SvUV(sv);
|
|---|
| 880 | offset = LvTARGOFF(sv);
|
|---|
| 881 | if (offset < 0)
|
|---|
| 882 | Perl_croak(aTHX_ "Negative offset to vec in lvalue context");
|
|---|
| 883 | size = LvTARGLEN(sv);
|
|---|
| 884 | if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
|
|---|
| 885 | Perl_croak(aTHX_ "Illegal number of bits in vec");
|
|---|
| 886 |
|
|---|
| 887 | offset *= size; /* turn into bit offset */
|
|---|
| 888 | len = (offset + size + 7) / 8; /* required number of bytes */
|
|---|
| 889 | if (len > targlen) {
|
|---|
| 890 | s = (unsigned char*)SvGROW(targ, len + 1);
|
|---|
| 891 | (void)memzero((char *)(s + targlen), len - targlen + 1);
|
|---|
| 892 | SvCUR_set(targ, len);
|
|---|
| 893 | }
|
|---|
| 894 |
|
|---|
| 895 | if (size < 8) {
|
|---|
| 896 | mask = (1 << size) - 1;
|
|---|
| 897 | size = offset & 7;
|
|---|
| 898 | lval &= mask;
|
|---|
| 899 | offset >>= 3; /* turn into byte offset */
|
|---|
| 900 | s[offset] &= ~(mask << size);
|
|---|
| 901 | s[offset] |= lval << size;
|
|---|
| 902 | }
|
|---|
| 903 | else {
|
|---|
| 904 | offset >>= 3; /* turn into byte offset */
|
|---|
| 905 | if (size == 8)
|
|---|
| 906 | s[offset ] = (U8)( lval & 0xff);
|
|---|
| 907 | else if (size == 16) {
|
|---|
| 908 | s[offset ] = (U8)((lval >> 8) & 0xff);
|
|---|
| 909 | s[offset+1] = (U8)( lval & 0xff);
|
|---|
| 910 | }
|
|---|
| 911 | else if (size == 32) {
|
|---|
| 912 | s[offset ] = (U8)((lval >> 24) & 0xff);
|
|---|
| 913 | s[offset+1] = (U8)((lval >> 16) & 0xff);
|
|---|
| 914 | s[offset+2] = (U8)((lval >> 8) & 0xff);
|
|---|
| 915 | s[offset+3] = (U8)( lval & 0xff);
|
|---|
| 916 | }
|
|---|
| 917 | #ifdef UV_IS_QUAD
|
|---|
| 918 | else if (size == 64) {
|
|---|
| 919 | if (ckWARN(WARN_PORTABLE))
|
|---|
| 920 | Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
|
|---|
| 921 | "Bit vector size > 32 non-portable");
|
|---|
| 922 | s[offset ] = (U8)((lval >> 56) & 0xff);
|
|---|
| 923 | s[offset+1] = (U8)((lval >> 48) & 0xff);
|
|---|
| 924 | s[offset+2] = (U8)((lval >> 40) & 0xff);
|
|---|
| 925 | s[offset+3] = (U8)((lval >> 32) & 0xff);
|
|---|
| 926 | s[offset+4] = (U8)((lval >> 24) & 0xff);
|
|---|
| 927 | s[offset+5] = (U8)((lval >> 16) & 0xff);
|
|---|
| 928 | s[offset+6] = (U8)((lval >> 8) & 0xff);
|
|---|
| 929 | s[offset+7] = (U8)( lval & 0xff);
|
|---|
| 930 | }
|
|---|
| 931 | #endif
|
|---|
| 932 | }
|
|---|
| 933 | SvSETMAGIC(targ);
|
|---|
| 934 | }
|
|---|
| 935 |
|
|---|
| 936 | void
|
|---|
| 937 | Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
|
|---|
| 938 | {
|
|---|
| 939 | STRLEN len;
|
|---|
| 940 | char *s;
|
|---|
| 941 |
|
|---|
| 942 | if (SvTYPE(sv) == SVt_PVAV) {
|
|---|
| 943 | register I32 i;
|
|---|
| 944 | AV* av = (AV*)sv;
|
|---|
| 945 | const I32 max = AvFILL(av);
|
|---|
| 946 |
|
|---|
| 947 | for (i = 0; i <= max; i++) {
|
|---|
| 948 | sv = (SV*)av_fetch(av, i, FALSE);
|
|---|
| 949 | if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
|
|---|
| 950 | do_chop(astr, sv);
|
|---|
| 951 | }
|
|---|
| 952 | return;
|
|---|
| 953 | }
|
|---|
| 954 | else if (SvTYPE(sv) == SVt_PVHV) {
|
|---|
| 955 | HV* hv = (HV*)sv;
|
|---|
| 956 | HE* entry;
|
|---|
| 957 | (void)hv_iterinit(hv);
|
|---|
| 958 | while ((entry = hv_iternext(hv)))
|
|---|
| 959 | do_chop(astr,hv_iterval(hv,entry));
|
|---|
| 960 | return;
|
|---|
| 961 | }
|
|---|
| 962 | else if (SvREADONLY(sv)) {
|
|---|
| 963 | if (SvFAKE(sv)) {
|
|---|
| 964 | /* SV is copy-on-write */
|
|---|
| 965 | sv_force_normal_flags(sv, 0);
|
|---|
| 966 | }
|
|---|
| 967 | if (SvREADONLY(sv))
|
|---|
| 968 | Perl_croak(aTHX_ PL_no_modify);
|
|---|
| 969 | }
|
|---|
| 970 |
|
|---|
| 971 | if (PL_encoding && !SvUTF8(sv)) {
|
|---|
| 972 | /* like in do_chomp(), utf8-ize the sv as a side-effect
|
|---|
| 973 | * if we're using encoding. */
|
|---|
| 974 | sv_recode_to_utf8(sv, PL_encoding);
|
|---|
| 975 | }
|
|---|
| 976 |
|
|---|
| 977 | s = SvPV(sv, len);
|
|---|
| 978 | if (len && !SvPOK(sv))
|
|---|
| 979 | s = SvPV_force(sv, len);
|
|---|
| 980 | if (DO_UTF8(sv)) {
|
|---|
| 981 | if (s && len) {
|
|---|
| 982 | char *send = s + len;
|
|---|
| 983 | char *start = s;
|
|---|
| 984 | s = send - 1;
|
|---|
| 985 | while (s > start && UTF8_IS_CONTINUATION(*s))
|
|---|
| 986 | s--;
|
|---|
| 987 | if (is_utf8_string((U8*)s, send - s)) {
|
|---|
| 988 | sv_setpvn(astr, s, send - s);
|
|---|
| 989 | *s = '\0';
|
|---|
| 990 | SvCUR_set(sv, s - start);
|
|---|
| 991 | SvNIOK_off(sv);
|
|---|
| 992 | SvUTF8_on(astr);
|
|---|
| 993 | }
|
|---|
| 994 | }
|
|---|
| 995 | else
|
|---|
| 996 | sv_setpvn(astr, "", 0);
|
|---|
| 997 | }
|
|---|
| 998 | else if (s && len) {
|
|---|
| 999 | s += --len;
|
|---|
| 1000 | sv_setpvn(astr, s, 1);
|
|---|
| 1001 | *s = '\0';
|
|---|
| 1002 | SvCUR_set(sv, len);
|
|---|
| 1003 | SvUTF8_off(sv);
|
|---|
| 1004 | SvNIOK_off(sv);
|
|---|
| 1005 | }
|
|---|
| 1006 | else
|
|---|
| 1007 | sv_setpvn(astr, "", 0);
|
|---|
| 1008 | SvSETMAGIC(sv);
|
|---|
| 1009 | }
|
|---|
| 1010 |
|
|---|
| 1011 | I32
|
|---|
| 1012 | Perl_do_chomp(pTHX_ register SV *sv)
|
|---|
| 1013 | {
|
|---|
| 1014 | register I32 count;
|
|---|
| 1015 | STRLEN len;
|
|---|
| 1016 | char *s;
|
|---|
| 1017 | char *temp_buffer = NULL;
|
|---|
| 1018 | SV* svrecode = Nullsv;
|
|---|
| 1019 |
|
|---|
| 1020 | if (RsSNARF(PL_rs))
|
|---|
| 1021 | return 0;
|
|---|
| 1022 | if (RsRECORD(PL_rs))
|
|---|
| 1023 | return 0;
|
|---|
| 1024 | count = 0;
|
|---|
| 1025 | if (SvTYPE(sv) == SVt_PVAV) {
|
|---|
| 1026 | register I32 i;
|
|---|
| 1027 | AV* const av = (AV*)sv;
|
|---|
| 1028 | const I32 max = AvFILL(av);
|
|---|
| 1029 |
|
|---|
| 1030 | for (i = 0; i <= max; i++) {
|
|---|
| 1031 | sv = (SV*)av_fetch(av, i, FALSE);
|
|---|
| 1032 | if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
|
|---|
| 1033 | count += do_chomp(sv);
|
|---|
| 1034 | }
|
|---|
| 1035 | return count;
|
|---|
| 1036 | }
|
|---|
| 1037 | else if (SvTYPE(sv) == SVt_PVHV) {
|
|---|
| 1038 | HV* const hv = (HV*)sv;
|
|---|
| 1039 | HE* entry;
|
|---|
| 1040 | (void)hv_iterinit(hv);
|
|---|
| 1041 | while ((entry = hv_iternext(hv)))
|
|---|
| 1042 | count += do_chomp(hv_iterval(hv,entry));
|
|---|
| 1043 | return count;
|
|---|
| 1044 | }
|
|---|
| 1045 | else if (SvREADONLY(sv)) {
|
|---|
| 1046 | if (SvFAKE(sv)) {
|
|---|
| 1047 | /* SV is copy-on-write */
|
|---|
| 1048 | sv_force_normal_flags(sv, 0);
|
|---|
| 1049 | }
|
|---|
| 1050 | if (SvREADONLY(sv))
|
|---|
| 1051 | Perl_croak(aTHX_ PL_no_modify);
|
|---|
| 1052 | }
|
|---|
| 1053 |
|
|---|
| 1054 | if (PL_encoding) {
|
|---|
| 1055 | if (!SvUTF8(sv)) {
|
|---|
| 1056 | /* XXX, here sv is utf8-ized as a side-effect!
|
|---|
| 1057 | If encoding.pm is used properly, almost string-generating
|
|---|
| 1058 | operations, including literal strings, chr(), input data, etc.
|
|---|
| 1059 | should have been utf8-ized already, right?
|
|---|
| 1060 | */
|
|---|
| 1061 | sv_recode_to_utf8(sv, PL_encoding);
|
|---|
| 1062 | }
|
|---|
| 1063 | }
|
|---|
| 1064 |
|
|---|
| 1065 | s = SvPV(sv, len);
|
|---|
| 1066 | if (s && len) {
|
|---|
| 1067 | s += --len;
|
|---|
| 1068 | if (RsPARA(PL_rs)) {
|
|---|
| 1069 | if (*s != '\n')
|
|---|
| 1070 | goto nope;
|
|---|
| 1071 | ++count;
|
|---|
| 1072 | while (len && s[-1] == '\n') {
|
|---|
| 1073 | --len;
|
|---|
| 1074 | --s;
|
|---|
| 1075 | ++count;
|
|---|
| 1076 | }
|
|---|
| 1077 | }
|
|---|
| 1078 | else {
|
|---|
| 1079 | STRLEN rslen, rs_charlen;
|
|---|
| 1080 | const char *rsptr = SvPV_const(PL_rs, rslen);
|
|---|
| 1081 |
|
|---|
| 1082 | rs_charlen = SvUTF8(PL_rs)
|
|---|
| 1083 | ? sv_len_utf8(PL_rs)
|
|---|
| 1084 | : rslen;
|
|---|
| 1085 |
|
|---|
| 1086 | if (SvUTF8(PL_rs) != SvUTF8(sv)) {
|
|---|
| 1087 | /* Assumption is that rs is shorter than the scalar. */
|
|---|
| 1088 | if (SvUTF8(PL_rs)) {
|
|---|
| 1089 | /* RS is utf8, scalar is 8 bit. */
|
|---|
| 1090 | bool is_utf8 = TRUE;
|
|---|
| 1091 | temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
|
|---|
| 1092 | &rslen, &is_utf8);
|
|---|
| 1093 | if (is_utf8) {
|
|---|
| 1094 | /* Cannot downgrade, therefore cannot possibly match
|
|---|
| 1095 | */
|
|---|
| 1096 | assert (temp_buffer == rsptr);
|
|---|
| 1097 | temp_buffer = NULL;
|
|---|
| 1098 | goto nope;
|
|---|
| 1099 | }
|
|---|
| 1100 | rsptr = temp_buffer;
|
|---|
| 1101 | }
|
|---|
| 1102 | else if (PL_encoding) {
|
|---|
| 1103 | /* RS is 8 bit, encoding.pm is used.
|
|---|
| 1104 | * Do not recode PL_rs as a side-effect. */
|
|---|
| 1105 | svrecode = newSVpvn(rsptr, rslen);
|
|---|
| 1106 | sv_recode_to_utf8(svrecode, PL_encoding);
|
|---|
| 1107 | rsptr = SvPV_const(svrecode, rslen);
|
|---|
| 1108 | rs_charlen = sv_len_utf8(svrecode);
|
|---|
| 1109 | }
|
|---|
| 1110 | else {
|
|---|
| 1111 | /* RS is 8 bit, scalar is utf8. */
|
|---|
| 1112 | temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
|
|---|
| 1113 | rsptr = temp_buffer;
|
|---|
| 1114 | }
|
|---|
| 1115 | }
|
|---|
| 1116 | if (rslen == 1) {
|
|---|
| 1117 | if (*s != *rsptr)
|
|---|
| 1118 | goto nope;
|
|---|
| 1119 | ++count;
|
|---|
| 1120 | }
|
|---|
| 1121 | else {
|
|---|
| 1122 | if (len < rslen - 1)
|
|---|
| 1123 | goto nope;
|
|---|
| 1124 | len -= rslen - 1;
|
|---|
| 1125 | s -= rslen - 1;
|
|---|
| 1126 | if (memNE(s, rsptr, rslen))
|
|---|
| 1127 | goto nope;
|
|---|
| 1128 | count += rs_charlen;
|
|---|
| 1129 | }
|
|---|
| 1130 | }
|
|---|
| 1131 | s = SvPV_force_nolen(sv);
|
|---|
| 1132 | SvCUR_set(sv, len);
|
|---|
| 1133 | *SvEND(sv) = '\0';
|
|---|
| 1134 | SvNIOK_off(sv);
|
|---|
| 1135 | SvSETMAGIC(sv);
|
|---|
| 1136 | }
|
|---|
| 1137 | nope:
|
|---|
| 1138 |
|
|---|
| 1139 | if (svrecode)
|
|---|
| 1140 | SvREFCNT_dec(svrecode);
|
|---|
| 1141 |
|
|---|
| 1142 | Safefree(temp_buffer);
|
|---|
| 1143 | return count;
|
|---|
| 1144 | }
|
|---|
| 1145 |
|
|---|
| 1146 | void
|
|---|
| 1147 | Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
|
|---|
| 1148 | {
|
|---|
| 1149 | #ifdef LIBERAL
|
|---|
| 1150 | register long *dl;
|
|---|
| 1151 | register long *ll;
|
|---|
| 1152 | register long *rl;
|
|---|
| 1153 | #endif
|
|---|
| 1154 | register char *dc;
|
|---|
| 1155 | STRLEN leftlen;
|
|---|
| 1156 | STRLEN rightlen;
|
|---|
| 1157 | register const char *lc;
|
|---|
| 1158 | register const char *rc;
|
|---|
| 1159 | register I32 len;
|
|---|
| 1160 | I32 lensave;
|
|---|
| 1161 | const char *lsave;
|
|---|
| 1162 | const char *rsave;
|
|---|
| 1163 | const bool left_utf = DO_UTF8(left);
|
|---|
| 1164 | const bool right_utf = DO_UTF8(right);
|
|---|
| 1165 | I32 needlen = 0;
|
|---|
| 1166 |
|
|---|
| 1167 | if (left_utf && !right_utf)
|
|---|
| 1168 | sv_utf8_upgrade(right);
|
|---|
| 1169 | else if (!left_utf && right_utf)
|
|---|
| 1170 | sv_utf8_upgrade(left);
|
|---|
| 1171 |
|
|---|
| 1172 | if (sv != left || (optype != OP_BIT_AND && !SvOK(sv) && !SvGMAGICAL(sv)))
|
|---|
| 1173 | sv_setpvn(sv, "", 0); /* avoid undef warning on |= and ^= */
|
|---|
| 1174 | lsave = lc = SvPV_const(left, leftlen);
|
|---|
| 1175 | rsave = rc = SvPV_const(right, rightlen);
|
|---|
| 1176 | len = leftlen < rightlen ? leftlen : rightlen;
|
|---|
| 1177 | lensave = len;
|
|---|
| 1178 | if ((left_utf || right_utf) && (sv == left || sv == right)) {
|
|---|
| 1179 | needlen = optype == OP_BIT_AND ? len : leftlen + rightlen;
|
|---|
| 1180 | Newxz(dc, needlen + 1, char);
|
|---|
| 1181 | }
|
|---|
| 1182 | else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) {
|
|---|
| 1183 | /* Fix this to nong when change 22613 is integrated.
|
|---|
| 1184 | (Which in turn awaits merging sv_2iv and sv_2uv) */
|
|---|
| 1185 | dc = SvPV_force_nolen(sv);
|
|---|
| 1186 | if (SvLEN(sv) < (STRLEN)(len + 1)) {
|
|---|
| 1187 | dc = SvGROW(sv, (STRLEN)(len + 1));
|
|---|
| 1188 | (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1);
|
|---|
| 1189 | }
|
|---|
| 1190 | if (optype != OP_BIT_AND && (left_utf || right_utf))
|
|---|
| 1191 | dc = SvGROW(sv, leftlen + rightlen + 1);
|
|---|
| 1192 | }
|
|---|
| 1193 | else {
|
|---|
| 1194 | needlen = ((optype == OP_BIT_AND)
|
|---|
| 1195 | ? len : (leftlen > rightlen ? leftlen : rightlen));
|
|---|
| 1196 | Newxz(dc, needlen + 1, char);
|
|---|
| 1197 | (void)sv_usepvn(sv, dc, needlen);
|
|---|
| 1198 | dc = SvPVX(sv); /* sv_usepvn() calls Renew() */
|
|---|
| 1199 | }
|
|---|
| 1200 | SvCUR_set(sv, len);
|
|---|
| 1201 | (void)SvPOK_only(sv);
|
|---|
| 1202 | if (left_utf || right_utf) {
|
|---|
| 1203 | UV duc, luc, ruc;
|
|---|
| 1204 | char *dcsave = dc;
|
|---|
| 1205 | STRLEN lulen = leftlen;
|
|---|
| 1206 | STRLEN rulen = rightlen;
|
|---|
| 1207 | STRLEN ulen;
|
|---|
| 1208 |
|
|---|
| 1209 | switch (optype) {
|
|---|
| 1210 | case OP_BIT_AND:
|
|---|
| 1211 | while (lulen && rulen) {
|
|---|
| 1212 | luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
|
|---|
| 1213 | lc += ulen;
|
|---|
| 1214 | lulen -= ulen;
|
|---|
| 1215 | ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
|
|---|
| 1216 | rc += ulen;
|
|---|
| 1217 | rulen -= ulen;
|
|---|
| 1218 | duc = luc & ruc;
|
|---|
| 1219 | dc = (char*)uvchr_to_utf8((U8*)dc, duc);
|
|---|
| 1220 | }
|
|---|
| 1221 | if (sv == left || sv == right)
|
|---|
| 1222 | (void)sv_usepvn(sv, dcsave, needlen);
|
|---|
| 1223 | SvCUR_set(sv, dc - dcsave);
|
|---|
| 1224 | break;
|
|---|
| 1225 | case OP_BIT_XOR:
|
|---|
| 1226 | while (lulen && rulen) {
|
|---|
| 1227 | luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
|
|---|
| 1228 | lc += ulen;
|
|---|
| 1229 | lulen -= ulen;
|
|---|
| 1230 | ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
|
|---|
| 1231 | rc += ulen;
|
|---|
| 1232 | rulen -= ulen;
|
|---|
| 1233 | duc = luc ^ ruc;
|
|---|
| 1234 | dc = (char*)uvchr_to_utf8((U8*)dc, duc);
|
|---|
| 1235 | }
|
|---|
| 1236 | goto mop_up_utf;
|
|---|
| 1237 | case OP_BIT_OR:
|
|---|
| 1238 | while (lulen && rulen) {
|
|---|
| 1239 | luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV);
|
|---|
| 1240 | lc += ulen;
|
|---|
| 1241 | lulen -= ulen;
|
|---|
| 1242 | ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV);
|
|---|
| 1243 | rc += ulen;
|
|---|
| 1244 | rulen -= ulen;
|
|---|
| 1245 | duc = luc | ruc;
|
|---|
| 1246 | dc = (char*)uvchr_to_utf8((U8*)dc, duc);
|
|---|
| 1247 | }
|
|---|
| 1248 | mop_up_utf:
|
|---|
| 1249 | if (sv == left || sv == right)
|
|---|
| 1250 | (void)sv_usepvn(sv, dcsave, needlen);
|
|---|
| 1251 | SvCUR_set(sv, dc - dcsave);
|
|---|
| 1252 | if (rulen)
|
|---|
| 1253 | sv_catpvn(sv, rc, rulen);
|
|---|
| 1254 | else if (lulen)
|
|---|
| 1255 | sv_catpvn(sv, lc, lulen);
|
|---|
| 1256 | else
|
|---|
| 1257 | *SvEND(sv) = '\0';
|
|---|
| 1258 | break;
|
|---|
| 1259 | }
|
|---|
| 1260 | SvUTF8_on(sv);
|
|---|
| 1261 | goto finish;
|
|---|
| 1262 | }
|
|---|
| 1263 | else
|
|---|
| 1264 | #ifdef LIBERAL
|
|---|
| 1265 | if (len >= sizeof(long)*4 &&
|
|---|
| 1266 | !((long)dc % sizeof(long)) &&
|
|---|
| 1267 | !((long)lc % sizeof(long)) &&
|
|---|
| 1268 | !((long)rc % sizeof(long))) /* It's almost always aligned... */
|
|---|
| 1269 | {
|
|---|
| 1270 | const I32 remainder = len % (sizeof(long)*4);
|
|---|
| 1271 | len /= (sizeof(long)*4);
|
|---|
| 1272 |
|
|---|
| 1273 | dl = (long*)dc;
|
|---|
| 1274 | ll = (long*)lc;
|
|---|
| 1275 | rl = (long*)rc;
|
|---|
| 1276 |
|
|---|
| 1277 | switch (optype) {
|
|---|
| 1278 | case OP_BIT_AND:
|
|---|
| 1279 | while (len--) {
|
|---|
| 1280 | *dl++ = *ll++ & *rl++;
|
|---|
| 1281 | *dl++ = *ll++ & *rl++;
|
|---|
| 1282 | *dl++ = *ll++ & *rl++;
|
|---|
| 1283 | *dl++ = *ll++ & *rl++;
|
|---|
| 1284 | }
|
|---|
| 1285 | break;
|
|---|
| 1286 | case OP_BIT_XOR:
|
|---|
| 1287 | while (len--) {
|
|---|
| 1288 | *dl++ = *ll++ ^ *rl++;
|
|---|
| 1289 | *dl++ = *ll++ ^ *rl++;
|
|---|
| 1290 | *dl++ = *ll++ ^ *rl++;
|
|---|
| 1291 | *dl++ = *ll++ ^ *rl++;
|
|---|
| 1292 | }
|
|---|
| 1293 | break;
|
|---|
| 1294 | case OP_BIT_OR:
|
|---|
| 1295 | while (len--) {
|
|---|
| 1296 | *dl++ = *ll++ | *rl++;
|
|---|
| 1297 | *dl++ = *ll++ | *rl++;
|
|---|
| 1298 | *dl++ = *ll++ | *rl++;
|
|---|
| 1299 | *dl++ = *ll++ | *rl++;
|
|---|
| 1300 | }
|
|---|
| 1301 | }
|
|---|
| 1302 |
|
|---|
| 1303 | dc = (char*)dl;
|
|---|
| 1304 | lc = (char*)ll;
|
|---|
| 1305 | rc = (char*)rl;
|
|---|
| 1306 |
|
|---|
| 1307 | len = remainder;
|
|---|
| 1308 | }
|
|---|
| 1309 | #endif
|
|---|
| 1310 | {
|
|---|
| 1311 | switch (optype) {
|
|---|
| 1312 | case OP_BIT_AND:
|
|---|
| 1313 | while (len--)
|
|---|
| 1314 | *dc++ = *lc++ & *rc++;
|
|---|
| 1315 | *dc = '\0';
|
|---|
| 1316 | break;
|
|---|
| 1317 | case OP_BIT_XOR:
|
|---|
| 1318 | while (len--)
|
|---|
| 1319 | *dc++ = *lc++ ^ *rc++;
|
|---|
| 1320 | goto mop_up;
|
|---|
| 1321 | case OP_BIT_OR:
|
|---|
| 1322 | while (len--)
|
|---|
| 1323 | *dc++ = *lc++ | *rc++;
|
|---|
| 1324 | mop_up:
|
|---|
| 1325 | len = lensave;
|
|---|
| 1326 | if (rightlen > (STRLEN)len)
|
|---|
| 1327 | sv_catpvn(sv, rsave + len, rightlen - len);
|
|---|
| 1328 | else if (leftlen > (STRLEN)len)
|
|---|
| 1329 | sv_catpvn(sv, lsave + len, leftlen - len);
|
|---|
| 1330 | else
|
|---|
| 1331 | *SvEND(sv) = '\0';
|
|---|
| 1332 | break;
|
|---|
| 1333 | }
|
|---|
| 1334 | }
|
|---|
| 1335 | finish:
|
|---|
| 1336 | SvTAINT(sv);
|
|---|
| 1337 | }
|
|---|
| 1338 |
|
|---|
| 1339 | OP *
|
|---|
| 1340 | Perl_do_kv(pTHX)
|
|---|
| 1341 | {
|
|---|
| 1342 | dSP;
|
|---|
| 1343 | HV *hv = (HV*)POPs;
|
|---|
| 1344 | HV *keys;
|
|---|
| 1345 | register HE *entry;
|
|---|
| 1346 | const I32 gimme = GIMME_V;
|
|---|
| 1347 | const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV);
|
|---|
| 1348 | const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS);
|
|---|
| 1349 | const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES);
|
|---|
| 1350 | I32 realhv = (SvTYPE(hv) == SVt_PVHV);
|
|---|
| 1351 |
|
|---|
| 1352 | if (!hv) {
|
|---|
| 1353 | if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
|
|---|
| 1354 | dTARGET; /* make sure to clear its target here */
|
|---|
| 1355 | if (SvTYPE(TARG) == SVt_PVLV)
|
|---|
| 1356 | LvTARG(TARG) = Nullsv;
|
|---|
| 1357 | PUSHs(TARG);
|
|---|
| 1358 | }
|
|---|
| 1359 | RETURN;
|
|---|
| 1360 | }
|
|---|
| 1361 |
|
|---|
| 1362 | keys = realhv ? hv : avhv_keys((AV*)hv);
|
|---|
| 1363 | (void)hv_iterinit(keys); /* always reset iterator regardless */
|
|---|
| 1364 |
|
|---|
| 1365 | if (gimme == G_VOID)
|
|---|
| 1366 | RETURN;
|
|---|
| 1367 |
|
|---|
| 1368 | if (gimme == G_SCALAR) {
|
|---|
| 1369 | IV i;
|
|---|
| 1370 | dTARGET;
|
|---|
| 1371 |
|
|---|
| 1372 | if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */
|
|---|
| 1373 | if (SvTYPE(TARG) < SVt_PVLV) {
|
|---|
| 1374 | sv_upgrade(TARG, SVt_PVLV);
|
|---|
| 1375 | sv_magic(TARG, Nullsv, PERL_MAGIC_nkeys, Nullch, 0);
|
|---|
| 1376 | }
|
|---|
| 1377 | LvTYPE(TARG) = 'k';
|
|---|
| 1378 | if (LvTARG(TARG) != (SV*)keys) {
|
|---|
| 1379 | if (LvTARG(TARG))
|
|---|
| 1380 | SvREFCNT_dec(LvTARG(TARG));
|
|---|
| 1381 | LvTARG(TARG) = SvREFCNT_inc(keys);
|
|---|
| 1382 | }
|
|---|
| 1383 | PUSHs(TARG);
|
|---|
| 1384 | RETURN;
|
|---|
| 1385 | }
|
|---|
| 1386 |
|
|---|
| 1387 | if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied))
|
|---|
| 1388 | i = HvKEYS(keys);
|
|---|
| 1389 | else {
|
|---|
| 1390 | i = 0;
|
|---|
| 1391 | while (hv_iternext(keys)) i++;
|
|---|
| 1392 | }
|
|---|
| 1393 | PUSHi( i );
|
|---|
| 1394 | RETURN;
|
|---|
| 1395 | }
|
|---|
| 1396 |
|
|---|
| 1397 | EXTEND(SP, HvKEYS(keys) * (dokeys + dovalues));
|
|---|
| 1398 |
|
|---|
| 1399 | PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */
|
|---|
| 1400 | while ((entry = hv_iternext(keys))) {
|
|---|
| 1401 | SPAGAIN;
|
|---|
| 1402 | if (dokeys) {
|
|---|
| 1403 | SV* const sv = hv_iterkeysv(entry);
|
|---|
| 1404 | XPUSHs(sv); /* won't clobber stack_sp */
|
|---|
| 1405 | }
|
|---|
| 1406 | if (dovalues) {
|
|---|
| 1407 | SV *tmpstr;
|
|---|
| 1408 | PUTBACK;
|
|---|
| 1409 | tmpstr = realhv ?
|
|---|
| 1410 | hv_iterval(hv,entry) : avhv_iterval((AV*)hv,entry);
|
|---|
| 1411 | DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu",
|
|---|
| 1412 | (unsigned long)HeHASH(entry),
|
|---|
| 1413 | (int)HvMAX(keys)+1,
|
|---|
| 1414 | (unsigned long)(HeHASH(entry) & HvMAX(keys))));
|
|---|
| 1415 | SPAGAIN;
|
|---|
| 1416 | XPUSHs(tmpstr);
|
|---|
| 1417 | }
|
|---|
| 1418 | PUTBACK;
|
|---|
| 1419 | }
|
|---|
| 1420 | return NORMAL;
|
|---|
| 1421 | }
|
|---|
| 1422 |
|
|---|
| 1423 | /*
|
|---|
| 1424 | * Local variables:
|
|---|
| 1425 | * c-indentation-style: bsd
|
|---|
| 1426 | * c-basic-offset: 4
|
|---|
| 1427 | * indent-tabs-mode: t
|
|---|
| 1428 | * End:
|
|---|
| 1429 | *
|
|---|
| 1430 | * ex: set ts=8 sts=4 sw=4 noet:
|
|---|
| 1431 | */
|
|---|