#include "SM_DemoScriptPCH.h"
#include "SM_Conversiones.h"

#ifdef USEDDRAW


void ConvertA0R8G8B8toR5G5B5(void* pDestiny, unsigned uDestinyStride, 
                                void* pSource,  unsigned uSourceStride,
                                unsigned uWidth,
                                unsigned uHeight)
{
  // Rellena tu mismo
}

void ConvertA0R8G8B8toR5G6B5(void* pDestiny, unsigned uDestinyStride, 
                                void* pSource,  unsigned uSourceStride,
                                unsigned uWidth,
                                unsigned uHeight)
{
  // Rellena tu mismo
}


void ConvertA0R8G8B8toR5G5B5MMX(void* pDestiny, unsigned uDestinyStride, 
                                void* pSource,  unsigned uSourceStride,
                                unsigned uWidth,
                                unsigned uHeight)
{
__int64 rgbMulFactor=0x2000000820000008;
__int64 rgbMask1    =0x00f800f800f800f8;
__int64 rgbMask2    =0x0000f8000000f800;
  

__asm
{
  mov eax, uWidth
  mov ebx, pSource
  mov edx, pDestiny
  mov ecx, uHeight

L1:
  sub eax, 1
  and eax, 0xFFFFFFF8

  movq	mm7, rgbMulFactor	        // MM7 = pixel multiplication factor
	movq	mm6, DWORD PTR rgbMask2		// MM6 = green pixel mask
	movq	mm2, [ebx+eax*4+8]		    // get pixels 2 and 3
	movq	mm0, [ebx+eax*4]			    // get pixels 0 and 1
	movq	mm3, mm2				          // copy pixels 2 and 3
	pand	mm3, rgbMask1		          // get R and B of pixels 2 and 3
	movq	mm1, mm0				          // copy pixels 0 and 1
	pand	mm1, rgbMask1		          // get R and B of pixels 0 and 1
	pmaddwd mm3, mm7				        // SHIFT-OR pixels 2 and 3
	pmaddwd mm1, mm7				        // SHIFT-OR pixels 0 and 1
	pand	mm2, mm6				          // get G of pixels 2 and 3

//
// This section performs steps 1 through 8 for 4 pairs of pixels (or for a total
// of 8 pixels).
//
L2:
	movq	mm4, [ebx+eax*4+24]       // get pixels 6 and 7
	pand	mm0, mm6			            // get G of pixels 0 and 1

	movq	mm5, [ebx+eax*4+16]	      // get pixels 4 and 5
	por	mm3, mm2			              // OR to get RGB of pixels 2 and 3

	psrld	mm3, 6			              // SHIFT pixels 2 and 3 (step 7)
	por	mm1, mm0			              // OR to get RGB of pixels 0 and 1
	
	movq	mm0, mm4			            // copy pixels 6 and 7
	psrld	mm1, 6			              // SHIFT pixels 0 and 1 (step 7)

	pand	mm0, rgbMask1	            // get R and B of pixels 6 and 7
	packssdw mm1, mm3			          // combine pixels 0, 1, 2, and 3

	movq  mm3, mm5			            // copy pixels 4 and 5
	pmaddwd mm0, mm7			          // SHIFT-OR pixels 6 and 7

	pand  mm3, rgbMask1	            // get R and B of pixels 4 and 5
	pand  mm4, mm6			            // get G of pixels 6 and 7
							   
	movq	[edx+eax*2], mm1		      // store pixels 0, 1, 2, and 3
	pmaddwd mm3, mm7			          // SHIFT-OR pixels 4 and 5

	sub	eax, 8			                // subtract 8 pixels from the index
	por	mm4, mm0			              // OR to get RGB of pixels 6 and 7
	
	pand	mm5, mm6			            // get G of pixels 4 and 5
	psrld	mm4, 6			              // loop iteration// SHIFT pixels 6 

	movq	mm2, [ebx+eax*4+8]	      // get pixels 2 and 3 for the next
	por	mm5, mm3			              // loop iteration // OR to get RGB of // and 7 (step 7)

	movq	mm0, [ebx+eax*4]		      // get pixels 0 and 1 for the next 
	psrld	mm5, 6			              // SHIFT pixels 4 and 5 (step 7)
						                      // pixels 4 and 5

	movq	mm3, mm2			            // copy pixels 2 and 3
	movq	mm1, mm0			            // copy pixels 0 and 1

	pand	mm3, rgbMask1	            // get R and B of pixels 2 and 3
	packssdw mm5, mm4			          // combine pixels 4, 5, 6 and 7
	
	pand	mm1, rgbMask1	            // get R and B of pixels 0 and 1
	pand	mm2, mm6			            // get G of pixels 2 and 3

	movq	[edx+eax*2+24], mm5	      // store pixels 4, 5, 6 and 7
	pmaddwd mm3, mm7			          // SHIFT-OR pixels 2 and 3
	
	pmaddwd mm1, mm7			          // SHIFT-OR pixels 0 and 1
	jge	L2			                    // if we need to do more jump to the
						                      // beginning of the loop
  add ebx, uSourceStride
  add edx, uDestinyStride
  dec ecx
  mov eax, uWidth
  jnz L1
  emms
}

}
	

void ConvertA0R8G8B8toR5G6B5MMX(void* pDestiny, unsigned uDestinyStride, 
                                void* pSource,  unsigned uSourceStride,
                                unsigned uWidth,
                                unsigned uHeight)
{
  __int64  rgbMulFactor  =0x2000000420000004;
  __int64  rgbMask1      =0x00f800f800f800f8;
  __int64  rgbMask2      =0x0000fC000000fC00;
  __int64  rgbMask3      =0x00000000FFFFFFFF;
  __int64  rgbMask4      =0xFFFFFFFF00000000;
  __int64  aux;      
  #define  shift         5;
  unsigned j,i;
  unsigned* puSource =(unsigned*)pSource;
  for (j=0 ; j<uHeight ; j++)
  {
    for (i=0 ; i<uWidth ; i++)
    {
      //puSource[i]=0x00FFFFFF;
        
    }
    puSource +=uSourceStride/4;
  }
__asm
{
  mov eax, uWidth
  mov ebx, pSource
  mov edx, pDestiny
  mov ecx, uHeight

L1:
  sub eax, 1
  and eax, 0xFFFFFFF8

  movq	mm7, rgbMulFactor	        // MM7 = pixel multiplication factor
	movq	mm6, DWORD PTR rgbMask2		// MM6 = green pixel mask
	movq	mm2, [ebx+eax*4+8]		    // get pixels 2 and 3
	movq	mm0, [ebx+eax*4]			    // get pixels 0 and 1
	movq	mm3, mm2				          // copy pixels 2 and 3
	pand	mm3, rgbMask1		          // get R and B of pixels 2 and 3
	movq	mm1, mm0				          // copy pixels 0 and 1
	pand	mm1, rgbMask1		          // get R and B of pixels 0 and 1
	pmaddwd mm3, mm7				        // SHIFT-OR pixels 2 and 3
	pmaddwd mm1, mm7				        // SHIFT-OR pixels 0 and 1
	pand	mm2, mm6				          // get G of pixels 2 and 3

//
// This section performs steps 1 through 8 for 4 pairs of pixels (or for a total
// of 8 pixels).
//
L2:
	movq	mm4, [ebx+eax*4+24]       // get pixels 6 and 7
	pand	mm0, mm6			            // get G of pixels 0 and 1

	movq	mm5, [ebx+eax*4+16]	      // get pixels 4 and 5
	por	mm3, mm2			              // OR to get RGB of pixels 2 and 3

	psrld	mm3, shift			              // SHIFT pixels 2 and 3 (step 7)
	por	mm1, mm0			              // OR to get RGB of pixels 0 and 1
	
	movq	mm0, mm4			            // copy pixels 6 and 7
	psrld	mm1, shift			              // SHIFT pixels 0 and 1 (step 7)

	pand	mm0, rgbMask1	            // get R and B of pixels 6 and 7


  

  // packing. all this shit because packusdw doesn't exist
  movq  mm2, mm1
  pand  mm1, rgbMask3
  psrl  mm2, 16  
  por   mm1, mm2  
  movq  mm2, mm3
  psllq mm3, 32
  pand  mm2, rgbMask4
  psllq mm2, 16
  por   mm3, mm2
  
  por   mm1, mm3                  // Final pixels 0,1,2,3
  
	movq  mm3, mm5			            // copy pixels 4 and 5
	pmaddwd mm0, mm7			          // SHIFT-OR pixels 6 and 7

	pand  mm3, rgbMask1	            // get R and B of pixels 4 and 5
	pand  mm4, mm6			            // get G of pixels 6 and 7
							   
	movq	[edx+eax*2], mm1		      // store pixels 0, 1, 2, and 3
	pmaddwd mm3, mm7			          // SHIFT-OR pixels 4 and 5

	sub	eax, 8			                // subtract 8 pixels from the index
	por	mm4, mm0			              // OR to get RGB of pixels 6 and 7
	
	pand	mm5, mm6			            // get G of pixels 4 and 5
	psrld	mm4, shift			              // loop iteration// SHIFT pixels 6 

	movq	mm2, [ebx+eax*4+8]	      // get pixels 2 and 3 for the next
	por	  mm5, mm3			              // loop iteration // OR to get RGB of // and 7 (step 7)

	movq	mm0, [ebx+eax*4]		      // get pixels 0 and 1 for the next 
	psrld	mm5, shift			              // SHIFT pixels 4 and 5 (step 7)
						                      // pixels 4 and 5

	movq	mm3, mm2			            // copy pixels 2 and 3
	movq	mm1, mm0			            // copy pixels 0 and 1

	pand	mm3, rgbMask1	            // get R and B of pixels 2 and 3


  // packing
  movq  aux, mm0
  movq  mm0, mm5
  pand  mm5, rgbMask3
  psrl  mm0, 16  
  por   mm5, mm0
  movq  mm0, mm4
  psllq mm4, 32
  pand  mm0, rgbMask4
  psllq mm0, 16
  por   mm4, mm0
  
  
  por   mm5, mm4	
  movq  mm0, aux
	//packssdw mm5, mm4			          // combine pixels 4, 5, 6 and 7
	
	pand	mm1, rgbMask1	            // get R and B of pixels 0 and 1
	pand	mm2, mm6			            // get G of pixels 2 and 3

	movq	[edx+eax*2+24], mm5	      // store pixels 4, 5, 6 and 7
	pmaddwd mm3, mm7			          // SHIFT-OR pixels 2 and 3
	
	pmaddwd mm1, mm7			          // SHIFT-OR pixels 0 and 1
	jge	L2			                    // if we need to do more jump to the
						                      // beginning of the loop
  add ebx, uSourceStride
  add edx, uDestinyStride
  dec ecx
  mov eax, uWidth
  jnz L1
  emms
}


}

#endif