10 #ifndef BLITTER_32BPP_SSE_FUNC_HPP 11 #define BLITTER_32BPP_SSE_FUNC_HPP 15 static inline void InsertFirstUint32(
const uint32 value, __m128i &into)
17 #if (SSE_VERSION >= 4) 18 into = _mm_insert_epi32(into, value, 0);
20 into = _mm_insert_epi16(into, value, 0);
21 into = _mm_insert_epi16(into, value >> 16, 1);
25 static inline void InsertSecondUint32(
const uint32 value, __m128i &into)
27 #if (SSE_VERSION >= 4) 28 into = _mm_insert_epi32(into, value, 1);
30 into = _mm_insert_epi16(into, value, 2);
31 into = _mm_insert_epi16(into, value >> 16, 3);
35 static inline void LoadUint64(
const uint64 value, __m128i &into)
38 into = _mm_cvtsi64_si128(value);
40 #if (SSE_VERSION >= 4) 41 into = _mm_cvtsi32_si128(value);
42 InsertSecondUint32(value >> 32, into);
44 (*(um128i*) &into).m128i_u64[0] = value;
49 static inline __m128i PackUnsaturated(__m128i from,
const __m128i &mask)
51 #if (SSE_VERSION == 2) 52 from = _mm_and_si128(from, mask);
53 return _mm_packus_epi16(from, from);
55 return _mm_shuffle_epi8(from, mask);
59 static inline __m128i DistributeAlpha(
const __m128i from,
const __m128i &mask)
61 #if (SSE_VERSION == 2) 62 __m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F);
63 return _mm_shufflehi_epi16(alphaAB, 0x3F);
65 return _mm_shuffle_epi8(from, mask);
69 static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst,
const __m128i &distribution_mask,
const __m128i &pack_mask)
71 __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
72 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
74 __m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128());
75 alphaAB = _mm_srli_epi16(alphaAB, 15);
76 alphaAB = _mm_add_epi16(alphaAB, srcAB);
77 alphaAB = DistributeAlpha(alphaAB, distribution_mask);
79 srcAB = _mm_sub_epi16(srcAB, dstAB);
80 srcAB = _mm_mullo_epi16(srcAB, alphaAB);
81 srcAB = _mm_srli_epi16(srcAB, 8);
82 srcAB = _mm_add_epi16(srcAB, dstAB);
83 return PackUnsaturated(srcAB, pack_mask);
89 static inline __m128i DarkenTwoPixels(__m128i src, __m128i dst,
const __m128i &distribution_mask,
const __m128i &tr_nom_base)
91 __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
92 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
93 __m128i alphaAB = DistributeAlpha(srcAB, distribution_mask);
94 alphaAB = _mm_srli_epi16(alphaAB, 2);
95 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
96 dstAB = _mm_mullo_epi16(dstAB, nom);
97 dstAB = _mm_srli_epi16(dstAB, 8);
98 return _mm_packus_epi16(dstAB, dstAB);
101 IGNORE_UNINITIALIZED_WARNING_START
102 static Colour ReallyAdjustBrightness(
Colour colour, uint8 brightness)
104 uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
107 c16 /= Blitter_32bppBase::DEFAULT_BRIGHTNESS;
108 c16 &= 0x01FF01FF01FFULL;
111 c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001ULL) * 0xFF) & c16;
112 const uint ob = ((uint16) c16_ob + (uint16) (c16_ob >> 16) + (uint16) (c16_ob >> 32)) / 2;
114 const uint32 alpha32 = colour.
data & 0xFF000000;
116 LoadUint64(c16, ret);
118 __m128i ob128 = _mm_cvtsi32_si128(ob);
119 ob128 = _mm_shufflelo_epi16(ob128, 0xC0);
120 __m128i white = OVERBRIGHT_VALUE_MASK;
122 ret = _mm_subs_epu16(white, c128);
123 ret = _mm_mullo_epi16(ret, ob128);
124 ret = _mm_srli_epi16(ret, 8);
125 ret = _mm_add_epi16(ret, c128);
128 ret = _mm_packus_epi16(ret, ret);
129 return alpha32 | _mm_cvtsi128_si32(ret);
131 IGNORE_UNINITIALIZED_WARNING_STOP
136 static inline Colour AdjustBrightneSSE(
Colour colour, uint8 brightness)
139 if (brightness == Blitter_32bppBase::DEFAULT_BRIGHTNESS)
return colour;
141 return ReallyAdjustBrightness(colour, brightness);
144 static inline __m128i AdjustBrightnessOfTwoPixels(__m128i from, uint32 brightness)
146 #if (SSE_VERSION < 3) 153 brightness &= 0xFF00FF00;
154 brightness += Blitter_32bppBase::DEFAULT_BRIGHTNESS;
156 __m128i colAB = _mm_unpacklo_epi8(from, _mm_setzero_si128());
157 __m128i briAB = _mm_cvtsi32_si128(brightness);
158 briAB = _mm_shuffle_epi8(briAB, BRIGHTNESS_LOW_CONTROL_MASK);
159 colAB = _mm_mullo_epi16(colAB, briAB);
160 __m128i colAB_ob = _mm_srli_epi16(colAB, 8 + 7);
161 colAB = _mm_srli_epi16(colAB, 7);
167 colAB = _mm_and_si128(colAB, BRIGHTNESS_DIV_CLEANER);
168 colAB_ob = _mm_and_si128(colAB_ob, OVERBRIGHT_PRESENCE_MASK);
169 colAB_ob = _mm_mullo_epi16(colAB_ob, OVERBRIGHT_VALUE_MASK);
170 colAB_ob = _mm_and_si128(colAB_ob, colAB);
171 __m128i obAB = _mm_hadd_epi16(_mm_hadd_epi16(colAB_ob, _mm_setzero_si128()), _mm_setzero_si128());
173 obAB = _mm_srli_epi16(obAB, 1);
174 obAB = _mm_shuffle_epi8(obAB, OVERBRIGHT_CONTROL_MASK);
175 __m128i retAB = OVERBRIGHT_VALUE_MASK;
176 retAB = _mm_subs_epu16(retAB, colAB);
177 retAB = _mm_mullo_epi16(retAB, obAB);
178 retAB = _mm_srli_epi16(retAB, 8);
179 retAB = _mm_add_epi16(retAB, colAB);
181 return _mm_packus_epi16(retAB, retAB);
185 #if FULL_ANIMATION == 0 193 IGNORE_UNINITIALIZED_WARNING_START
194 template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last,
bool translucent>
195 #if (SSE_VERSION == 2) 197 #elif (SSE_VERSION == 3) 199 #elif (SSE_VERSION == 4) 203 const byte *
const remap = bp->
remap;
205 int effective_width = bp->
width;
208 const SpriteData *
const sd = (
const SpriteData *) bp->
sprite;
209 const SpriteInfo *
const si = &sd->infos[zoom];
210 const MapValue *src_mv_line = (
const MapValue *) &sd->data[si->mv_offset] + bp->
skip_top * si->sprite_width;
211 const Colour *src_rgba_line = (
const Colour *) ((
const byte *) &sd->data[si->sprite_offset] + bp->
skip_top * si->sprite_line_size);
213 if (read_mode != RM_WITH_MARGIN) {
217 const MapValue *src_mv = src_mv_line;
220 #if (SSE_VERSION == 2) 221 const __m128i clear_hi = CLEAR_HIGH_BYTE_MASK;
222 #define ALPHA_BLEND_PARAM_1 clear_hi 223 #define ALPHA_BLEND_PARAM_2 clear_hi 224 #define DARKEN_PARAM_1 tr_nom_base 225 #define DARKEN_PARAM_2 tr_nom_base 227 const __m128i a_cm = ALPHA_CONTROL_MASK;
228 const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
229 #define ALPHA_BLEND_PARAM_1 a_cm 230 #define ALPHA_BLEND_PARAM_2 pack_low_cm 231 #define DARKEN_PARAM_1 a_cm 232 #define DARKEN_PARAM_2 tr_nom_base 234 const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
236 for (
int y = bp->
height; y != 0; y--) {
238 const Colour *src = src_rgba_line + META_LENGTH;
241 if (read_mode == RM_WITH_MARGIN) {
242 assert(bt_last == BT_NONE);
243 src += src_rgba_line[0].
data;
244 dst += src_rgba_line[0].
data;
246 const int width_diff = si->sprite_width - bp->
width;
247 effective_width = bp->
width - (int) src_rgba_line[0].data;
248 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
249 const int new_width = effective_width - delta_diff;
250 effective_width = delta_diff > 0 ? new_width : effective_width;
251 if (effective_width <= 0)
goto next_line;
257 for (uint x = (uint) effective_width; x > 0; x--) {
258 if (src->
a) *dst = *src;
265 for (uint x = (uint) effective_width / 2; x > 0; x--) {
266 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
267 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
268 _mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
273 if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
274 __m128i srcABCD = _mm_cvtsi32_si128(src->
data);
275 __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
276 dst->
data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
281 #if (SSE_VERSION >= 3) 282 for (uint x = (uint) effective_width / 2; x > 0; x--) {
283 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
284 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
285 uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
288 if (mvX2 & 0x00FF00FF) {
289 #define CMOV_REMAP(m_colour, m_colour_init, m_src, m_m) \ 291 Colour m_colour = m_colour_init; \ 293 const Colour srcm = (Colour) (m_src); \ 294 const uint m = (byte) (m_m); \ 295 const uint r = remap[m]; \ 296 const Colour cmap = (this->LookupColourInPalette(r).data & 0x00FFFFFF) | (srcm.data & 0xFF000000); \ 297 m_colour = r == 0 ? m_colour : cmap; \ 298 m_colour = m != 0 ? m_colour : srcm; \ 301 uint64 srcs = _mm_cvtsi128_si64(srcABCD);
302 uint64 remapped_src = 0;
303 CMOV_REMAP(c0, 0, srcs, mvX2);
304 remapped_src = c0.data;
305 CMOV_REMAP(c1, 0, srcs >> 32, mvX2 >> 16);
306 remapped_src |= (uint64) c1.data << 32;
307 srcABCD = _mm_cvtsi64_si128(remapped_src);
310 CMOV_REMAP(c0, 0, _mm_cvtsi128_si32(srcABCD), mvX2);
311 remapped_src[0] = c0.
data;
312 CMOV_REMAP(c1, 0, src[1], mvX2 >> 16);
313 remapped_src[1] = c1.
data;
314 srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src);
317 if ((mvX2 & 0xFF00FF00) != 0x80008000) srcABCD = AdjustBrightnessOfTwoPixels(srcABCD, mvX2);
321 _mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
327 if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
329 for (uint x = (uint) effective_width; x > 0; x--) {
334 const uint r = remap[src_mv->m];
336 Colour remapped_colour = AdjustBrightneSSE(this->LookupColourInPalette(r), src_mv->v);
338 *dst = remapped_colour;
340 remapped_colour.
a = src->
a;
341 srcABCD = _mm_cvtsi32_si128(remapped_colour.
data);
342 goto bmcr_alpha_blend_single;
346 srcABCD = _mm_cvtsi32_si128(src->
data);
348 bmcr_alpha_blend_single:
349 __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
350 srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2);
352 dst->
data = _mm_cvtsi128_si32(srcABCD);
354 #if (SSE_VERSION == 2) 364 for (uint x = (uint) bp->
width / 2; x > 0; x--) {
365 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
366 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
367 _mm_storel_epi64((__m128i *) dst, DarkenTwoPixels(srcABCD, dstABCD, DARKEN_PARAM_1, DARKEN_PARAM_2));
372 if ((bt_last == BT_NONE && bp->
width & 1) || bt_last == BT_ODD) {
373 __m128i srcABCD = _mm_cvtsi32_si128(src->
data);
374 __m128i dstABCD = _mm_cvtsi32_si128(dst->
data);
375 dst->
data = _mm_cvtsi128_si32(DarkenTwoPixels(srcABCD, dstABCD, DARKEN_PARAM_1, DARKEN_PARAM_2));
380 for (uint x = (uint) bp->
width; x > 0; x--) {
381 if (src_mv->m == 0) {
383 uint8 g = MakeDark(src->r, src->g, src->b);
384 *dst = ComposeColourRGBA(g, g, g, src->
a, *dst);
387 uint r = remap[src_mv->m];
388 if (r != 0) *dst = ComposeColourPANoCheck(this->AdjustBrightness(this->LookupColourInPalette(r), src_mv->v), src->
a, *dst);
397 for (uint x = (uint) bp->
width; x > 0; x--) {
410 src_rgba_line = (
const Colour*) ((
const byte*) src_rgba_line + si->sprite_line_size);
411 dst_line += bp->
pitch;
414 IGNORE_UNINITIALIZED_WARNING_STOP
423 #if (SSE_VERSION == 2) 425 #elif (SSE_VERSION == 3) 427 #elif (SSE_VERSION == 4) 435 const BlockType bt_last = (BlockType) (bp->
width & 1);
437 default: Draw<BM_NORMAL, RM_WITH_SKIP, BT_EVEN, true>(bp, zoom);
return;
438 case BT_ODD: Draw<BM_NORMAL, RM_WITH_SKIP, BT_ODD, true>(bp, zoom);
return;
441 if (((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags & SF_TRANSLUCENT) {
442 Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, true>(bp, zoom);
444 Draw<BM_NORMAL, RM_WITH_MARGIN, BT_NONE, false>(bp, zoom);
451 if (((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags & SF_NO_REMAP)
goto bm_normal;
453 Draw<BM_COLOUR_REMAP, RM_WITH_SKIP, BT_NONE, true>(bp, zoom);
return;
455 Draw<BM_COLOUR_REMAP, RM_WITH_MARGIN, BT_NONE, true>(bp, zoom);
return;
457 case BM_TRANSPARENT: Draw<BM_TRANSPARENT, RM_NONE, BT_NONE, true>(bp, zoom);
return;
458 case BM_CRASH_REMAP: Draw<BM_CRASH_REMAP, RM_NONE, BT_NONE, true>(bp, zoom);
return;
459 case BM_BLACK_REMAP: Draw<BM_BLACK_REMAP, RM_NONE, BT_NONE, true>(bp, zoom);
return;
int left
The left offset in the 'dst' in pixels to start drawing.
int height
The height in pixels that needs to be drawn to dst.
Perform transparency colour remapping.
int skip_top
How much pixels of the source to skip on the top (based on zoom of dst)
uint32 data
Conversion of the channel information to a 32 bit number.
int width
The width in pixels that needs to be drawn to dst.
uint8 a
colour channels in LE order
int skip_left
How much pixels of the source to skip on the left (based on zoom of dst)
Parameters related to blitting.
int pitch
The pitch of the destination buffer.
Perform a crash remapping.
Perform remapping to a completely blackened sprite.
int top
The top offset in the 'dst' in pixels to start drawing.
const byte * remap
XXX – Temporary storage for remap array.
const void * sprite
Pointer to the sprite how ever the encoder stored it.
Perform a colour remapping.
void * dst
Destination buffer.
Structure to access the alpha, red, green, and blue channels from a 32 bit number.
BlitterMode
The modes of blitting we can do.
ZoomLevel
All zoom levels we know.