source: trunk/src/gui/painting/qdrawhelper_ssse3.cpp

Last change on this file was 846, checked in by Dmitry A. Kuminov, 14 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 8.4 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation (qt-info@nokia.com)
6**
7** This file is part of the QtGui module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at qt-info@nokia.com.
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include <private/qdrawhelper_x86_p.h>
43
44#ifdef QT_HAVE_SSSE3
45
46#include <private/qdrawingprimitive_sse2_p.h>
47
48QT_BEGIN_NAMESPACE
49
50inline static void blend_pixel(quint32 &dst, const quint32 src)
51{
52 if (src >= 0xff000000)
53 dst = src;
54 else if (src != 0)
55 dst = src + BYTE_MUL(dst, qAlpha(~src));
56}
57
58
59/* The instruction palignr uses direct arguments, so we have to generate the code fo the different
60 shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.
61 */
62#define BLENDING_LOOP(palignrOffset, length)\
63 for (; x < length-3; x += 4) { \
64 const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\
65 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \
66 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
67 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
68 _mm_store_si128((__m128i *)&dst[x], srcVector); \
69 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
70 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
71 alphaChannel = _mm_sub_epi16(one, alphaChannel); \
72 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
73 __m128i destMultipliedByOneMinusAlpha; \
74 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
75 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
76 _mm_store_si128((__m128i *)&dst[x], result); \
77 } \
78 srcVectorPrevLoaded = srcVectorLastLoaded;\
79 }
80
81
82// Basically blend src over dst with the const alpha defined as constAlphaVector.
83// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
84//const __m128i nullVector = _mm_set1_epi32(0);
85//const __m128i half = _mm_set1_epi16(0x80);
86//const __m128i one = _mm_set1_epi16(0xff);
87//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
88//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
89//
90// The computation being done is:
91// result = s + d * (1-alpha)
92// with shortcuts if fully opaque or fully transparent.
93#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
94 int x = 0; \
95\
96 /* First, get dst aligned. */ \
97 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
98 blend_pixel(dst[x], src[x]); \
99 } \
100\
101 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\
102\
103 if (!minusOffsetToAlignSrcOn16Bytes) {\
104 /* src is aligned, usual algorithm but with aligned operations.\
105 See the SSE2 version for more documentation on the algorithm itself. */\
106 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\
107 for (; x < length-3; x += 4) { \
108 const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \
109 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
110 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
111 _mm_store_si128((__m128i *)&dst[x], srcVector); \
112 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
113 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
114 alphaChannel = _mm_sub_epi16(one, alphaChannel); \
115 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
116 __m128i destMultipliedByOneMinusAlpha; \
117 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
118 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
119 _mm_store_si128((__m128i *)&dst[x], result); \
120 } \
121 } /* end for() */\
122 } else if ((length - x) >= 8) {\
123 /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\
124 __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\
125 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\
126\
127 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\
128 switch (palignrOffset) {\
129 case 4:\
130 BLENDING_LOOP(4, length)\
131 break;\
132 case 8:\
133 BLENDING_LOOP(8, length)\
134 break;\
135 case 12:\
136 BLENDING_LOOP(12, length)\
137 break;\
138 }\
139 }\
140 for (; x < length; ++x) \
141 blend_pixel(dst[x], src[x]); \
142}
143
144void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
145 const uchar *srcPixels, int sbpl,
146 int w, int h,
147 int const_alpha)
148{
149 const quint32 *src = (const quint32 *) srcPixels;
150 quint32 *dst = (quint32 *) destPixels;
151 if (const_alpha == 256) {
152 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
153 const __m128i nullVector = _mm_setzero_si128();
154 const __m128i half = _mm_set1_epi16(0x80);
155 const __m128i one = _mm_set1_epi16(0xff);
156 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
157
158 for (int y = 0; y < h; ++y) {
159 BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
160 dst = (quint32 *)(((uchar *) dst) + dbpl);
161 src = (const quint32 *)(((const uchar *) src) + sbpl);
162 }
163 } else if (const_alpha != 0) {
164 // dest = (s + d * sia) * ca + d * cia
165 // = s * ca + d * (sia * ca + cia)
166 // = s * ca + d * (1 - sa*ca)
167 const_alpha = (const_alpha * 255) >> 8;
168 const __m128i nullVector = _mm_setzero_si128();
169 const __m128i half = _mm_set1_epi16(0x80);
170 const __m128i one = _mm_set1_epi16(0xff);
171 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
172 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
173 for (int y = 0; y < h; ++y) {
174 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
175 dst = (quint32 *)(((uchar *) dst) + dbpl);
176 src = (const quint32 *)(((const uchar *) src) + sbpl);
177 }
178 }
179}
180
181QT_END_NAMESPACE
182
183#endif // QT_HAVE_SSSE3
Note: See TracBrowser for help on using the repository browser.