Context Navigation

qdrawhelper_ssse3.cpp

Last change on this file was 846, checked in by Dmitry A. Kuminov, 14 years ago
trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.
File size: 8.4 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4	** All rights reserved.
5	** Contact: Nokia Corporation (qt-info@nokia.com)
6	**
7	** This file is part of the QtGui module of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:LGPL$
10	** Commercial Usage
11	** Licensees holding valid Qt Commercial licenses may use this file in
12	** accordance with the Qt Commercial License Agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and Nokia.
15	**
16	** GNU Lesser General Public License Usage
17	** Alternatively, this file may be used under the terms of the GNU Lesser
18	** General Public License version 2.1 as published by the Free Software
19	** Foundation and appearing in the file LICENSE.LGPL included in the
20	** packaging of this file. Please review the following information to
21	** ensure the GNU Lesser General Public License version 2.1 requirements
22	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23	**
24	** In addition, as a special exception, Nokia gives you certain additional
25	** rights. These rights are described in the Nokia Qt LGPL Exception
26	** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you have questions regarding the use of this file, please contact
37	** Nokia at qt-info@nokia.com.
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include <private/qdrawhelper_x86_p.h>
43
44	#ifdef QT_HAVE_SSSE3
45
46	#include <private/qdrawingprimitive_sse2_p.h>
47
48	QT_BEGIN_NAMESPACE
49
50	inline static void blend_pixel(quint32 &dst, const quint32 src)
51	{
52	if (src >= 0xff000000)
53	dst = src;
54	else if (src != 0)
55	dst = src + BYTE_MUL(dst, qAlpha(~src));
56	}
57
58
59	/* The instruction palignr uses direct arguments, so we have to generate the code fo the different
60	shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.
61	*/
62	#define BLENDING_LOOP(palignrOffset, length)\
63	for (; x < length-3; x += 4) { \
64	const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\
65	const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \
66	const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
67	if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
68	_mm_store_si128((__m128i *)&dst[x], srcVector); \
69	} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
70	__m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
71	alphaChannel = _mm_sub_epi16(one, alphaChannel); \
72	const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
73	__m128i destMultipliedByOneMinusAlpha; \
74	BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
75	const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
76	_mm_store_si128((__m128i *)&dst[x], result); \
77	} \
78	srcVectorPrevLoaded = srcVectorLastLoaded;\
79	}
80
81
82	// Basically blend src over dst with the const alpha defined as constAlphaVector.
83	// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
84	//const __m128i nullVector = _mm_set1_epi32(0);
85	//const __m128i half = _mm_set1_epi16(0x80);
86	//const __m128i one = _mm_set1_epi16(0xff);
87	//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
88	//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
89	//
90	// The computation being done is:
91	// result = s + d * (1-alpha)
92	// with shortcuts if fully opaque or fully transparent.
93	#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
94	int x = 0; \
95	\
96	/* First, get dst aligned. */ \
97	ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
98	blend_pixel(dst[x], src[x]); \
99	} \
100	\
101	const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\
102	\
103	if (!minusOffsetToAlignSrcOn16Bytes) {\
104	/* src is aligned, usual algorithm but with aligned operations.\
105	See the SSE2 version for more documentation on the algorithm itself. */\
106	const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\
107	for (; x < length-3; x += 4) { \
108	const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \
109	const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
110	if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
111	_mm_store_si128((__m128i *)&dst[x], srcVector); \
112	} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
113	__m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
114	alphaChannel = _mm_sub_epi16(one, alphaChannel); \
115	const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
116	__m128i destMultipliedByOneMinusAlpha; \
117	BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
118	const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
119	_mm_store_si128((__m128i *)&dst[x], result); \
120	} \
121	} /* end for() */\
122	} else if ((length - x) >= 8) {\
123	/* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\
124	__m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\
125	const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\
126	\
127	const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\
128	switch (palignrOffset) {\
129	case 4:\
130	BLENDING_LOOP(4, length)\
131	break;\
132	case 8:\
133	BLENDING_LOOP(8, length)\
134	break;\
135	case 12:\
136	BLENDING_LOOP(12, length)\
137	break;\
138	}\
139	}\
140	for (; x < length; ++x) \
141	blend_pixel(dst[x], src[x]); \
142	}
143
144	void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
145	const uchar *srcPixels, int sbpl,
146	int w, int h,
147	int const_alpha)
148	{
149	const quint32 src = (const quint32 ) srcPixels;
150	quint32 dst = (quint32 ) destPixels;
151	if (const_alpha == 256) {
152	const __m128i alphaMask = _mm_set1_epi32(0xff000000);
153	const __m128i nullVector = _mm_setzero_si128();
154	const __m128i half = _mm_set1_epi16(0x80);
155	const __m128i one = _mm_set1_epi16(0xff);
156	const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
157
158	for (int y = 0; y < h; ++y) {
159	BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
160	dst = (quint32 )(((uchar ) dst) + dbpl);
161	src = (const quint32 )(((const uchar ) src) + sbpl);
162	}
163	} else if (const_alpha != 0) {
164	// dest = (s + d * sia) * ca + d * cia
165	// = s * ca + d * (sia * ca + cia)
166	// = s * ca + d * (1 - sa*ca)
167	const_alpha = (const_alpha * 255) >> 8;
168	const __m128i nullVector = _mm_setzero_si128();
169	const __m128i half = _mm_set1_epi16(0x80);
170	const __m128i one = _mm_set1_epi16(0xff);
171	const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
172	const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
173	for (int y = 0; y < h; ++y) {
174	BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
175	dst = (quint32 )(((uchar ) dst) + dbpl);
176	src = (const quint32 )(((const uchar ) src) + sbpl);
177	}
178	}
179	}
180
181	QT_END_NAMESPACE
182
183	#endif // QT_HAVE_SSSE3

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/gui/painting/qdrawhelper_ssse3.cpp

Download in other formats: