OpenTTD
viewport_sprite_sorter_sse4.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #ifdef WITH_SSE
11 
12 #include "stdafx.h"
13 #include "cpu.h"
14 #include "smmintrin.h"
15 #include "viewport_sprite_sorter.h"
16 
17 #include "safeguards.h"
18 
19 #ifdef _SQ64
20  assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0);
21  #define LOAD_128 _mm_load_si128
22 #else
23  #define LOAD_128 _mm_loadu_si128
24 #endif
25 
27 void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
28 {
29  const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
30  auto const psdvend = psdv->end();
31  auto psd = psdv->begin();
32  while (psd != psdvend) {
33  ParentSpriteToDraw * const ps = *psd;
34 
35  if (ps->comparison_done) {
36  psd++;
37  continue;
38  }
39 
40  ps->comparison_done = true;
41 
42  for (auto psd2 = psd + 1; psd2 != psdvend; psd2++) {
43  ParentSpriteToDraw * const ps2 = *psd2;
44 
45  if (ps2->comparison_done) continue;
46 
47  /*
48  * Decide which comparator to use, based on whether the bounding boxes overlap
49  *
50  * Original code:
51  * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X?
52  * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y?
53  * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z?
54  *
55  * Above conditions are equivalent to:
56  * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) )
57  * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) )
58  * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) )
59  * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) )
60  * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 -------------------------------------->
61  */
62  __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
63  __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
64  __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
65  if (!_mm_testz_si128(mask_ptest, rslt1))
66  continue;
67 
68  __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
69  __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);
70  __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min);
71  if (_mm_testz_si128(mask_ptest, rslt2)) {
72  /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
73  * the screen and with higher Z elevation, are drawn in front.
74  * Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
75  * i.e. X=(left+right)/2, etc.
76  * However, since we only care about order, don't actually divide / 2
77  */
78  if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <=
79  ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) {
80  continue;
81  }
82  }
83 
84  /* Move ps2 in front of ps */
85  ParentSpriteToDraw * const temp = ps2;
86  for (auto psd3 = psd2; psd3 > psd; psd3--) {
87  *psd3 = *(psd3 - 1);
88  }
89  *psd = temp;
90  }
91  }
92 }
93 
98 bool ViewportSortParentSpritesSSE41Checker()
99 {
100  return HasCPUIDFlag(1, 2, 19);
101 }
102 
103 #endif /* WITH_SSE */
int32 zmin
minimal world Z coordinate of bounding box
int32 zmax
maximal world Z coordinate of bounding box
int32 ymin
minimal world Y coordinate of bounding box
Definition of base types and functions in a cross-platform compatible way.
A number of safeguards to prevent using unsafe methods.
int32 xmin
minimal world X coordinate of bounding box
Functions related to CPU specific instructions.
Parent sprite that should be drawn.
int32 ymax
maximal world Y coordinate of bounding box
int32 xmax
maximal world X coordinate of bounding box
bool HasCPUIDFlag(uint type, uint index, uint bit)
Check whether the current CPU has the given flag.
Definition: cpu.cpp:128
Types related to sprite sorting.
bool comparison_done
Used during sprite sorting: true if sprite has been compared with all other sprites.