source: trunk/src/gui/painting/qdrawhelper_mmx_p.h

Last change on this file was 846, checked in by Dmitry A. Kuminov, 14 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 28.6 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation (qt-info@nokia.com)
6**
7** This file is part of the QtGui module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at qt-info@nokia.com.
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#ifndef QDRAWHELPER_MMX_P_H
43#define QDRAWHELPER_MMX_P_H
44
45//
46// W A R N I N G
47// -------------
48//
49// This file is not part of the Qt API. It exists purely as an
50// implementation detail. This header file may change from version to
51// version without notice, or even be removed.
52//
53// We mean it.
54//
55
56#include <private/qdrawhelper_p.h>
57#include <private/qdrawhelper_x86_p.h>
58#include <private/qpaintengine_raster_p.h>
59
60#ifdef QT_HAVE_MMX
61#include <mmintrin.h>
62#endif
63
64#define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff)
65#define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80)
66#define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()
67
68#ifdef Q_CC_MSVC
69# pragma warning(disable: 4799) // No EMMS at end of function
70#endif
71
72typedef __m64 m64;
73
74QT_BEGIN_NAMESPACE
75
76struct QMMXCommonIntrinsics
77{
78 static inline m64 alpha(m64 x) {
79 x = _mm_unpackhi_pi16(x, x);
80 x = _mm_unpackhi_pi16(x, x);
81 return x;
82 }
83
84 static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff) {
85 return _mm_xor_si64(x, mmx_0x00ff);
86 }
87
88 static inline m64 add(const m64 &a, const m64 &b) {
89 return _mm_adds_pu16 (a, b);
90 }
91
92 static inline m64 _byte_mul(const m64 &a, const m64 &b,
93 const m64 &mmx_0x0080)
94 {
95 m64 res = _mm_mullo_pi16(a, b);
96 res = _mm_adds_pu16(res, mmx_0x0080);
97 res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
98 return _mm_srli_pi16(res, 8);
99 }
100
101 static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a,
102 const m64 &y, const m64 &b)
103 {
104 m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
105 return _mm_srli_pi16(res, 8);
106 }
107
108 static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a,
109 const m64 &y, const m64 &b,
110 const m64 &mmx_0x0080)
111 {
112 m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
113 res = _mm_adds_pu16(res, mmx_0x0080);
114 res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
115 return _mm_srli_pi16(res, 8);
116 }
117
118 static inline m64 _premul(m64 x, const m64 &mmx_0x0080) {
119 m64 a = alpha(x);
120 return _byte_mul(x, a, mmx_0x0080);
121 }
122
123 static inline m64 _load(uint x, const m64 &mmx_0x0000) {
124 return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
125 }
126
127 static inline m64 _load_alpha(uint x, const m64 &) {
128 x |= (x << 16);
129 return _mm_set1_pi32(x);
130 }
131
132 static inline uint _store(const m64 &x, const m64 &mmx_0x0000) {
133 return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000));
134 }
135};
136
137#define negate(x) _negate(x, mmx_0x00ff)
138#define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080)
139#define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080)
140#define premul(x) _premul(x, mmx_0x0080)
141#define load(x) _load(x, mmx_0x0000)
142#define load_alpha(x) _load_alpha(x, mmx_0x0000)
143#define store(x) _store(x, mmx_0x0000)
144
145/*
146 result = 0
147 d = d * cia
148*/
149#define comp_func_Clear_impl(dest, length, const_alpha)\
150{\
151 if (const_alpha == 255) {\
152 qt_memfill(static_cast<quint32*>(dest), quint32(0), length);\
153 } else {\
154 C_FF; C_80; C_00;\
155 m64 ia = MM::negate(MM::load_alpha(const_alpha));\
156 for (int i = 0; i < length; ++i) {\
157 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));\
158 }\
159 MM::end();\
160 }\
161}
162
163template <class MM>
164static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
165{
166 comp_func_Clear_impl(dest, length, const_alpha);
167}
168
169template <class MM>
170static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha)
171{
172 comp_func_Clear_impl(dest, length, const_alpha);
173}
174
175/*
176 result = s
177 dest = s * ca + d * cia
178*/
179template <class MM>
180static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha)
181{
182 if (const_alpha == 255) {
183 qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
184 } else {
185 C_FF; C_80; C_00;
186 const m64 a = MM::load_alpha(const_alpha);
187 const m64 ia = MM::negate(a);
188 const m64 s = MM::byte_mul(MM::load(src), a);
189 for (int i = 0; i < length; ++i) {
190 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
191 }
192 MM::end();
193 }
194}
195
196template <class MM>
197static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha)
198{
199 if (const_alpha == 255) {
200 ::memcpy(dest, src, length * sizeof(uint));
201 } else {
202 C_FF; C_80; C_00;
203 const m64 a = MM::load_alpha(const_alpha);
204 const m64 ia = MM::negate(a);
205 for (int i = 0; i < length; ++i)
206 dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), a,
207 MM::load(dest[i]), ia));
208 }
209 MM::end();
210}
211
212/*
213 result = s + d * sia
214 dest = (s + d * sia) * ca + d * cia
215 = s * ca + d * (sia * ca + cia)
216 = s * ca + d * (1 - sa*ca)
217*/
218template <class MM>
219static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha)
220{
221 if ((const_alpha & qAlpha(src)) == 255) {
222 qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
223 } else {
224 C_FF; C_80; C_00;
225 m64 s = MM::load(src);
226 if (const_alpha != 255) {
227 m64 ca = MM::load_alpha(const_alpha);
228 s = MM::byte_mul(s, ca);
229 }
230 m64 a = MM::negate(MM::alpha(s));
231 for (int i = 0; i < length; ++i)
232 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), a)));
233 MM::end();
234 }
235}
236
237template <class MM>
238static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha)
239{
240 C_FF; C_80; C_00;
241 if (const_alpha == 255) {
242 for (int i = 0; i < length; ++i) {
243 const uint alphaMaskedSource = 0xff000000 & src[i];
244 if (alphaMaskedSource == 0)
245 continue;
246 if (alphaMaskedSource == 0xff000000) {
247 dest[i] = src[i];
248 } else {
249 m64 s = MM::load(src[i]);
250 m64 ia = MM::negate(MM::alpha(s));
251 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
252 }
253 }
254 } else {
255 m64 ca = MM::load_alpha(const_alpha);
256 for (int i = 0; i < length; ++i) {
257 if ((0xff000000 & src[i]) == 0)
258 continue;
259 m64 s = MM::byte_mul(MM::load(src[i]), ca);
260 m64 ia = MM::negate(MM::alpha(s));
261 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
262 }
263 }
264 MM::end();
265}
266
267/*
268 result = d + s * dia
269 dest = (d + s * dia) * ca + d * cia
270 = d + s * dia * ca
271*/
272template <class MM>
273static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha)
274{
275 C_FF; C_80; C_00;
276 m64 s = MM::load(src);
277 if (const_alpha != 255)
278 s = MM::byte_mul(s, MM::load_alpha(const_alpha));
279
280 for (int i = 0; i < length; ++i) {
281 m64 d = MM::load(dest[i]);
282 m64 dia = MM::negate(MM::alpha(d));
283 dest[i] = MM::store(MM::add(d, MM::byte_mul(s, dia)));
284 }
285 MM::end();
286}
287
288template <class MM>
289static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha)
290{
291 C_FF; C_80; C_00;
292 if (const_alpha == 255) {
293 for (int i = 0; i < length; ++i) {
294 m64 d = MM::load(dest[i]);
295 m64 ia = MM::negate(MM::alpha(d));
296 dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), ia)));
297 }
298 } else {
299 m64 ca = MM::load_alpha(const_alpha);
300 for (int i = 0; i < length; ++i) {
301 m64 d = MM::load(dest[i]);
302 m64 dia = MM::negate(MM::alpha(d));
303 dia = MM::byte_mul(dia, ca);
304 dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), dia)));
305 }
306 }
307 MM::end();
308}
309
310/*
311 result = s * da
312 dest = s * da * ca + d * cia
313*/
314template <class MM>
315static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha)
316{
317 C_80; C_00;
318 if (const_alpha == 255) {
319 m64 s = MM::load(src);
320 for (int i = 0; i < length; ++i) {
321 m64 da = MM::alpha(MM::load(dest[i]));
322 dest[i] = MM::store(MM::byte_mul(s, da));
323 }
324 } else {
325 C_FF;
326 m64 s = MM::load(src);
327 m64 ca = MM::load_alpha(const_alpha);
328 s = MM::byte_mul(s, ca);
329 m64 cia = MM::negate(ca);
330 for (int i = 0; i < length; ++i) {
331 m64 d = MM::load(dest[i]);
332 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, cia));
333 }
334 }
335 MM::end();
336}
337
338template <class MM>
339static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha)
340{
341 C_FF; C_80; C_00;
342 if (const_alpha == 255) {
343 for (int i = 0; i < length; ++i) {
344 m64 a = MM::alpha(MM::load(dest[i]));
345 dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), a));
346 }
347 } else {
348 m64 ca = MM::load_alpha(const_alpha);
349 m64 cia = MM::negate(ca);
350 for (int i = 0; i < length; ++i) {
351 m64 d = MM::load(dest[i]);
352 m64 da = MM::byte_mul(MM::alpha(d), ca);
353 dest[i] = MM::store(MM::interpolate_pixel_255(
354 MM::load(src[i]), da, d, cia));
355 }
356 }
357 MM::end();
358}
359
360/*
361 result = d * sa
362 dest = d * sa * ca + d * cia
363 = d * (sa * ca + cia)
364*/
365template <class MM>
366static void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint src, uint const_alpha)
367{
368 C_80; C_00;
369 m64 a = MM::alpha(MM::load(src));
370 if (const_alpha != 255) {
371 C_FF;
372 m64 ca = MM::load_alpha(const_alpha);
373 m64 cia = MM::negate(ca);
374 a = MM::byte_mul(a, ca);
375 a = MM::add(a, cia);
376 }
377 for (int i = 0; i < length; ++i)
378 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
379 MM::end();
380}
381
382template <class MM>
383static void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha)
384{
385 C_FF; C_80; C_00;
386 if (const_alpha == 255) {
387 for (int i = 0; i < length; ++i) {
388 m64 a = MM::alpha(MM::load(src[i]));
389 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
390 }
391 } else {
392 m64 ca = MM::load_alpha(const_alpha);
393 m64 cia = MM::negate(ca);
394 for (int i = 0; i < length; ++i) {
395 m64 d = MM::load(dest[i]);
396 m64 a = MM::alpha(MM::load(src[i]));
397 a = MM::byte_mul(a, ca);
398 a = MM::add(a, cia);
399 dest[i] = MM::store(MM::byte_mul(d, a));
400 }
401 }
402 MM::end();
403}
404
405/*
406 result = s * dia
407 dest = s * dia * ca + d * cia
408*/
409template <class MM>
410static void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint src, uint const_alpha)
411{
412 C_FF; C_80; C_00;
413 m64 s = MM::load(src);
414 if (const_alpha == 255) {
415 for (int i = 0; i < length; ++i) {
416 m64 dia = MM::negate(MM::alpha(MM::load(dest[i])));
417 dest[i] = MM::store(MM::byte_mul(s, dia));
418 }
419 } else {
420 m64 ca = MM::load_alpha(const_alpha);
421 m64 cia = MM::negate(ca);
422 s = MM::byte_mul(s, ca);
423 for (int i = 0; i < length; ++i) {
424 m64 d = MM::load(dest[i]);
425 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, cia));
426 }
427 }
428 MM::end();
429}
430
431template <class MM>
432static void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha)
433{
434 C_FF; C_80; C_00;
435 if (const_alpha == 255) {
436 for (int i = 0; i < length; ++i) {
437 m64 ia = MM::negate(MM::alpha(MM::load(dest[i])));
438 dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), ia));
439 }
440 } else {
441 m64 ca = MM::load_alpha(const_alpha);
442 m64 cia = MM::negate(ca);
443 for (int i = 0; i < length; ++i) {
444 m64 d = MM::load(dest[i]);
445 m64 dia = MM::byte_mul(MM::negate(MM::alpha(d)), ca);
446 dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), dia, d, cia));
447 }
448 }
449 MM::end();
450}
451
452/*
453 result = d * sia
454 dest = d * sia * ca + d * cia
455 = d * (sia * ca + cia)
456*/
457template <class MM>
458static void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint src, uint const_alpha)
459{
460 C_FF; C_80; C_00;
461 m64 a = MM::negate(MM::alpha(MM::load(src)));
462 if (const_alpha != 255) {
463 m64 ca = MM::load_alpha(const_alpha);
464 a = MM::byte_mul(a, ca);
465 a = MM::add(a, MM::negate(ca));
466 }
467 for (int i = 0; i < length; ++i)
468 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
469 MM::end();
470}
471
472template <class MM>
473static void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha)
474{
475 C_FF; C_80; C_00;
476 if (const_alpha == 255) {
477 for (int i = 0; i < length; ++i) {
478 m64 a = MM::negate(MM::alpha(MM::load(src[i])));
479 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
480 }
481 } else {
482 m64 ca = MM::load_alpha(const_alpha);
483 m64 cia = MM::negate(ca);
484 for (int i = 0; i < length; ++i) {
485 m64 d = MM::load(dest[i]);
486 m64 a = MM::negate(MM::alpha(MM::load(src[i])));
487 a = MM::byte_mul(a, ca);
488 a = MM::add(a, cia);
489 dest[i] = MM::store(MM::byte_mul(d, a));
490 }
491 }
492 MM::end();
493}
494
495/*
496 result = s*da + d*sia
497 dest = s*da*ca + d*sia*ca + d *cia
498 = s*ca * da + d * (sia*ca + cia)
499 = s*ca * da + d * (1 - sa*ca)
500*/
501template <class MM>
502static void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint src, uint const_alpha)
503{
504 C_FF; C_80; C_00;
505 m64 s = MM::load(src);
506 if (const_alpha != 255) {
507 m64 ca = MM::load_alpha(const_alpha);
508 s = MM::byte_mul(s, ca);
509 }
510 m64 a = MM::negate(MM::alpha(s));
511 for (int i = 0; i < length; ++i) {
512 m64 d = MM::load(dest[i]);
513 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, a));
514 }
515 MM::end();
516}
517
518template <class MM>
519static void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha)
520{
521 C_FF; C_80; C_00;
522 if (const_alpha == 255) {
523 for (int i = 0; i < length; ++i) {
524 m64 s = MM::load(src[i]);
525 m64 d = MM::load(dest[i]);
526 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
527 MM::negate(MM::alpha(s))));
528 }
529 } else {
530 m64 ca = MM::load_alpha(const_alpha);
531 for (int i = 0; i < length; ++i) {
532 m64 s = MM::load(src[i]);
533 s = MM::byte_mul(s, ca);
534 m64 d = MM::load(dest[i]);
535 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
536 MM::negate(MM::alpha(s))));
537 }
538 }
539 MM::end();
540}
541
542/*
543 result = d*sa + s*dia
544 dest = d*sa*ca + s*dia*ca + d *cia
545 = s*ca * dia + d * (sa*ca + cia)
546*/
547template <class MM>
548static void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint src, uint const_alpha)
549{
550 C_FF; C_80; C_00;
551 m64 s = MM::load(src);
552 m64 a = MM::alpha(s);
553 if (const_alpha != 255) {
554 m64 ca = MM::load_alpha(const_alpha);
555 s = MM::byte_mul(s, ca);
556 a = MM::alpha(s);
557 a = MM::add(a, MM::negate(ca));
558 }
559 for (int i = 0; i < length; ++i) {
560 m64 d = MM::load(dest[i]);
561 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, a));
562 }
563 MM::end();
564}
565
566template <class MM>
567static void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha)
568{
569 C_FF; C_80; C_00;
570 if (const_alpha == 255) {
571 for (int i = 0; i < length; ++i) {
572 m64 s = MM::load(src[i]);
573 m64 d = MM::load(dest[i]);
574 dest[i] = MM::store(MM::interpolate_pixel_255(d, MM::alpha(s), s,
575 MM::negate(MM::alpha(d))));
576 }
577 } else {
578 m64 ca = MM::load_alpha(const_alpha);
579 for (int i = 0; i < length; ++i) {
580 m64 s = MM::load(src[i]);
581 s = MM::byte_mul(s, ca);
582 m64 d = MM::load(dest[i]);
583 m64 a = MM::alpha(s);
584 a = MM::add(a, MM::negate(ca));
585 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
586 d, a));
587 }
588 }
589 MM::end();
590}
591
592/*
593 result = d*sia + s*dia
594 dest = d*sia*ca + s*dia*ca + d *cia
595 = s*ca * dia + d * (sia*ca + cia)
596 = s*ca * dia + d * (1 - sa*ca)
597*/
598template <class MM>
599static void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint src, uint const_alpha)
600{
601 C_FF; C_80; C_00;
602 m64 s = MM::load(src);
603 if (const_alpha != 255) {
604 m64 ca = MM::load_alpha(const_alpha);
605 s = MM::byte_mul(s, ca);
606 }
607 m64 a = MM::negate(MM::alpha(s));
608 for (int i = 0; i < length; ++i) {
609 m64 d = MM::load(dest[i]);
610 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
611 d, a));
612 }
613 MM::end();
614}
615
616template <class MM>
617static void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha)
618{
619 C_FF; C_80; C_00;
620 if (const_alpha == 255) {
621 for (int i = 0; i < length; ++i) {
622 m64 s = MM::load(src[i]);
623 m64 d = MM::load(dest[i]);
624 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
625 d, MM::negate(MM::alpha(s))));
626 }
627 } else {
628 m64 ca = MM::load_alpha(const_alpha);
629 for (int i = 0; i < length; ++i) {
630 m64 s = MM::load(src[i]);
631 s = MM::byte_mul(s, ca);
632 m64 d = MM::load(dest[i]);
633 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
634 d, MM::negate(MM::alpha(s))));
635 }
636 }
637 MM::end();
638}
639
640template <class MM>
641static void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest,
642 int length,
643 uint color,
644 uint const_alpha)
645{
646 Q_UNUSED(const_alpha);
647
648 if ((quintptr)(dest) & 0x7) {
649 *dest++ |= color;
650 --length;
651 }
652
653 const int length64 = length / 2;
654 if (length64) {
655 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
656 const __m64 color64 = _mm_set_pi32(color, color);
657
658 int n = (length64 + 3) / 4;
659 switch (length64 & 0x3) {
660 case 0: do { *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
661 case 3: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
662 case 2: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
663 case 1: *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
664 } while (--n > 0);
665 }
666 }
667
668 if (length & 0x1) {
669 dest[length - 1] |= color;
670 }
671
672 MM::end();
673}
674
675template <class MM>
676static void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest,
677 int length,
678 uint color,
679 uint const_alpha)
680{
681 Q_UNUSED(const_alpha);
682
683 color |= 0xff000000;
684
685 if ((quintptr)(dest) & 0x7) { // align
686 *dest++ &= color;
687 --length;
688 }
689
690 const int length64 = length / 2;
691 if (length64) {
692 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
693 const __m64 color64 = _mm_set_pi32(color, color);
694
695 int n = (length64 + 3) / 4;
696 switch (length64 & 0x3) {
697 case 0: do { *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
698 case 3: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
699 case 2: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
700 case 1: *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
701 } while (--n > 0);
702 }
703 }
704
705 if (length & 0x1) {
706 dest[length - 1] &= color;
707 }
708
709 MM::end();
710}
711
712template <class MM>
713static void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest,
714 int length,
715 uint color,
716 uint const_alpha)
717{
718 Q_UNUSED(const_alpha);
719
720 color &= 0x00ffffff;
721
722 if ((quintptr)(dest) & 0x7) {
723 *dest++ ^= color;
724 --length;
725 }
726
727 const int length64 = length / 2;
728 if (length64) {
729 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
730 const __m64 color64 = _mm_set_pi32(color, color);
731
732 int n = (length64 + 3) / 4;
733 switch (length64 & 0x3) {
734 case 0: do { *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
735 case 3: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
736 case 2: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
737 case 1: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
738 } while (--n > 0);
739 }
740 }
741
742 if (length & 0x1) {
743 dest[length - 1] ^= color;
744 }
745
746 MM::end();
747}
748
749template <class MM>
750static void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest,
751 int length,
752 uint color,
753 uint const_alpha)
754{
755
756 Q_UNUSED(const_alpha);
757
758 if ((quintptr)(dest) & 0x7) {
759 *dest = (color & ~(*dest)) | 0xff000000;
760 ++dest;
761 --length;
762 }
763
764 const int length64 = length / 2;
765 if (length64) {
766 __m64 *dst64 = reinterpret_cast<__m64*>(dest);
767 const __m64 color64 = _mm_set_pi32(color, color);
768 const m64 mmx_0xff000000 = _mm_set1_pi32(0xff000000);
769 __m64 tmp1, tmp2, tmp3, tmp4;
770
771 int n = (length64 + 3) / 4;
772 switch (length64 & 0x3) {
773 case 0: do { tmp1 = _mm_andnot_si64(*dst64, color64);
774 *dst64++ = _mm_or_si64(tmp1, mmx_0xff000000);
775 case 3: tmp2 = _mm_andnot_si64(*dst64, color64);
776 *dst64++ = _mm_or_si64(tmp2, mmx_0xff000000);
777 case 2: tmp3 = _mm_andnot_si64(*dst64, color64);
778 *dst64++ = _mm_or_si64(tmp3, mmx_0xff000000);
779 case 1: tmp4 = _mm_andnot_si64(*dst64, color64);
780 *dst64++ = _mm_or_si64(tmp4, mmx_0xff000000);
781 } while (--n > 0);
782 }
783 }
784
785 if (length & 0x1) {
786 dest[length - 1] = (color & ~(dest[length - 1])) | 0xff000000;
787 }
788
789 MM::end();
790}
791
792template <class MM>
793static void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest,
794 int length,
795 uint color,
796 uint const_alpha)
797{
798 rasterop_solid_SourceAndNotDestination<MM>(dest, length,
799 ~color, const_alpha);
800}
801
802template <class MM>
803static void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest,
804 int length,
805 uint color,
806 uint const_alpha)
807{
808 Q_UNUSED(const_alpha);
809 color = ~color | 0xff000000;
810 while (length--) {
811 *dest = color | ~(*dest);
812 ++dest;
813 }
814}
815
816template <class MM>
817static void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest,
818 int length,
819 uint color,
820 uint const_alpha)
821{
822 rasterop_solid_SourceXorDestination<MM>(dest, length, ~color, const_alpha);
823}
824
825template <class MM>
826static void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length,
827 uint color, uint const_alpha)
828{
829 Q_UNUSED(const_alpha);
830 qt_memfill((quint32*)dest, ~color | 0xff000000, length);
831}
832
833template <class MM>
834static void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest,
835 int length,
836 uint color,
837 uint const_alpha)
838{
839 rasterop_solid_SourceAndDestination<MM>(dest, length,
840 ~color, const_alpha);
841}
842
843template <class MM>
844static inline void qt_blend_color_argb_x86(int count, const QSpan *spans,
845 void *userData,
846 CompositionFunctionSolid *solidFunc)
847{
848 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
849 if (data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source
850 || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver
851 && qAlpha(data->solid.color) == 255)) {
852 // inline for performance
853 C_FF; C_80; C_00;
854 while (count--) {
855 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
856 if (spans->coverage == 255) {
857 qt_memfill(static_cast<quint32*>(target), quint32(data->solid.color), spans->len);
858 } else {
859 // dest = s * ca + d * (1 - sa*ca) --> dest = s * ca + d * (1-ca)
860 m64 ca = MM::load_alpha(spans->coverage);
861 m64 s = MM::byte_mul(MM::load(data->solid.color), ca);
862 m64 ica = MM::negate(ca);
863 for (int i = 0; i < spans->len; ++i)
864 target[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(target[i]), ica)));
865 }
866 ++spans;
867 }
868 MM::end();
869 return;
870 }
871 CompositionFunctionSolid func = solidFunc[data->rasterBuffer->compositionMode];
872 while (count--) {
873 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
874 func(target, spans->len, data->solid.color, spans->coverage);
875 ++spans;
876 }
877}
878
879#ifdef QT_HAVE_MMX
880struct QMMXIntrinsics : public QMMXCommonIntrinsics
881{
882 static inline void end() {
883#if !defined(Q_OS_WINCE) || defined(_X86_)
884 _mm_empty();
885#endif
886 }
887};
888#endif // QT_HAVE_MMX
889
890QT_END_NAMESPACE
891
892#endif // QDRAWHELPER_MMX_P_H
Note: See TracBrowser for help on using the repository browser.