SSE2: Remove dead code

2014-06-13 22:21:07 +02:00 · 2014-06-13 22:21:07 +02:00 · f51843a721
commit f51843a721
parent 56c59d114d
3 changed files with 0 additions and 510 deletions
--- a/code/nel/include/nel/3d/matrix_3x4.h
+++ b/code/nel/include/nel/3d/matrix_3x4.h
@ -108,281 +108,6 @@ public:
 };
 // ***************************************************************************
 // ***************************************************************************
 // SSE Matrix
 // ***************************************************************************
 // ***************************************************************************
 // ***************************************************************************
 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
 /** For fast vector/point multiplication. Special usage for Skinning.
 *	NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage.
 */
 class	CMatrix3x4SSE
 {
 public:
 	// Order them in memory column first, for SSE column multiplication.
 	float	a11, a21, a31, a41;
 	float	a12, a22, a32, a42;
 	float	a13, a23, a33, a43;
 	float	a14, a24, a34, a44;
 	// Copy from a matrix.
 	void	set(const CMatrix &mat)
 	{
 		const float	*m =mat.get();
 		a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12];
 		a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13];
 		a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14];
 		// not used.
 		a41= 0   ; a42= 0   ; a43= 0    ; a44= 1;
 	}
 	// mulSetvector. NB: in should be different as v!! (else don't work).
 	void	mulSetVector(const CVector &vin, CVector &vout)
 	{
 		__asm
 		{
 			mov		eax, vin
 			mov		ebx, this
 			mov		edi, vout
 			// Load in vector in op[0]
 			movss	xmm0, [eax]vin.x
 			movss	xmm1, [eax]vin.y
 			movss	xmm2, [eax]vin.z
 			// Expand op[0] to op[1], op[2], op[3]
 			shufps	xmm0, xmm0, 0
 			shufps	xmm1, xmm1, 0
 			shufps	xmm2, xmm2, 0
 			// Mul each vector with 3 Matrix column
 			mulps	xmm0, [ebx]this.a11
 			mulps	xmm1, [ebx]this.a12
 			mulps	xmm2, [ebx]this.a13
 			// Add each column vector.
 			addps	xmm0, xmm1
 			addps	xmm0, xmm2
 			// write the result.
 			movss	[edi]vout.x, xmm0
 			shufps	xmm0, xmm0, 33
 			movss	[edi]vout.y, xmm0
 			movhlps	xmm0, xmm0
 			movss	[edi]vout.z, xmm0
 		}
 	}
 	// mulSetpoint. NB: in should be different as v!! (else don't work).
 	void	mulSetPoint(const CVector &vin, CVector &vout)
 	{
 		__asm
 		{
 			mov		eax, vin
 			mov		ebx, this
 			mov		edi, vout
 			// Load in vector in op[0]
 			movss	xmm0, [eax]vin.x
 			movss	xmm1, [eax]vin.y
 			movss	xmm2, [eax]vin.z
 			// Expand op[0] to op[1], op[2], op[3]
 			shufps	xmm0, xmm0, 0
 			shufps	xmm1, xmm1, 0
 			shufps	xmm2, xmm2, 0
 			// Mul each vector with 3 Matrix column
 			mulps	xmm0, [ebx]this.a11
 			mulps	xmm1, [ebx]this.a12
 			mulps	xmm2, [ebx]this.a13
 			// Add each column vector.
 			addps	xmm0, xmm1
 			addps	xmm0, xmm2
 			// Add Matrix translate column vector
 			addps	xmm0, [ebx]this.a14
 			// write the result.
 			movss	[edi]vout.x, xmm0
 			shufps	xmm0, xmm0, 33
 			movss	[edi]vout.y, xmm0
 			movhlps	xmm0, xmm0
 			movss	[edi]vout.z, xmm0
 		}
 	}
 	// mulSetvector. NB: vin should be different as v!! (else don't work).
 	void	mulSetVector(const CVector &vin, float scale, CVector &vout)
 	{
 		__asm
 		{
 			mov		eax, vin
 			mov		ebx, this
 			mov		edi, vout
 			// Load in vector in op[0]
 			movss	xmm0, [eax]vin.x
 			movss	xmm1, [eax]vin.y
 			movss	xmm2, [eax]vin.z
 			// Load scale in op[0]
 			movss	xmm3, scale
 			// Expand op[0] to op[1], op[2], op[3]
 			shufps	xmm0, xmm0, 0
 			shufps	xmm1, xmm1, 0
 			shufps	xmm2, xmm2, 0
 			shufps	xmm3, xmm3, 0
 			// Store vertex column in other regs.
 			movaps	xmm5, xmm0
 			movaps	xmm6, xmm1
 			movaps	xmm7, xmm2
 			// Mul each vector with 3 Matrix column
 			mulps	xmm0, [ebx]this.a11
 			mulps	xmm1, [ebx]this.a12
 			mulps	xmm2, [ebx]this.a13
 			// Add each column vector.
 			addps	xmm0, xmm1
 			addps	xmm0, xmm2
 			// mul final result with scale
 			mulps	xmm0, xmm3
 			// store it in xmm4 for future use.
 			movaps	xmm4, xmm0
 		}
 	}
 	// mulSetpoint. NB: vin should be different as v!! (else don't work).
 	void	mulSetPoint(const CVector &vin, float scale, CVector &vout)
 	{
 		__asm
 		{
 			mov		eax, vin
 			mov		ebx, this
 			mov		edi, vout
 			// Load in vector in op[0]
 			movss	xmm0, [eax]vin.x
 			movss	xmm1, [eax]vin.y
 			movss	xmm2, [eax]vin.z
 			// Load scale in op[0]
 			movss	xmm3, scale
 			// Expand op[0] to op[1], op[2], op[3]
 			shufps	xmm0, xmm0, 0
 			shufps	xmm1, xmm1, 0
 			shufps	xmm2, xmm2, 0
 			shufps	xmm3, xmm3, 0
 			// Store vertex column in other regs.
 			movaps	xmm5, xmm0
 			movaps	xmm6, xmm1
 			movaps	xmm7, xmm2
 			// Mul each vector with 3 Matrix column
 			mulps	xmm0, [ebx]this.a11
 			mulps	xmm1, [ebx]this.a12
 			mulps	xmm2, [ebx]this.a13
 			// Add each column vector.
 			addps	xmm0, xmm1
 			addps	xmm0, xmm2
 			// Add Matrix translate column vector
 			addps	xmm0, [ebx]this.a14
 			// mul final result with scale
 			mulps	xmm0, xmm3
 			// store it in xmm4 for future use.
 			movaps	xmm4, xmm0
 		}
 	}
 	// mulAddvector. NB: vin should be different as v!! (else don't work).
 	void	mulAddVector(const CVector &/* vin */, float scale, CVector &vout)
 	{
 		__asm
 		{
 			mov		ebx, this
 			mov		edi, vout
 			// Load vin vector loaded in mulSetVector
 			movaps	xmm0, xmm5
 			movaps	xmm1, xmm6
 			movaps	xmm2, xmm7
 			// Load scale in op[0]
 			movss	xmm3, scale
 			// Expand op[0] to op[1], op[2], op[3]
 			shufps	xmm3, xmm3, 0
 			// Mul each vector with 3 Matrix column
 			mulps	xmm0, [ebx]this.a11
 			mulps	xmm1, [ebx]this.a12
 			mulps	xmm2, [ebx]this.a13
 			// Add each column vector.
 			addps	xmm0, xmm1
 			addps	xmm0, xmm2
 			// mul final result with scale
 			mulps	xmm0, xmm3
 			// Add result, with prec sum.
 			addps	xmm0, xmm4
 			// store it in xmm4 for future use.
 			movaps	xmm4, xmm0
 			// write the result.
 			movss	[edi]vout.x, xmm0
 			shufps	xmm0, xmm0, 33
 			movss	[edi]vout.y, xmm0
 			movhlps	xmm0, xmm0
 			movss	[edi]vout.z, xmm0
 		}
 	}
 	// mulAddpoint. NB: vin should be different as v!! (else don't work).
 	void	mulAddPoint(const CVector &/* vin */, float scale, CVector &vout)
 	{
 		__asm
 		{
 			mov		ebx, this
 			mov		edi, vout
 			// Load vin vector loaded in mulSetPoint
 			movaps	xmm0, xmm5
 			movaps	xmm1, xmm6
 			movaps	xmm2, xmm7
 			// Load scale in op[0]
 			movss	xmm3, scale
 			// Expand op[0] to op[1], op[2], op[3]
 			shufps	xmm3, xmm3, 0
 			// Mul each vector with 3 Matrix column
 			mulps	xmm0, [ebx]this.a11
 			mulps	xmm1, [ebx]this.a12
 			mulps	xmm2, [ebx]this.a13
 			// Add each column vector.
 			addps	xmm0, xmm1
 			addps	xmm0, xmm2
 			// Add Matrix translate column vector
 			addps	xmm0, [ebx]this.a14
 			// mul final result with scale
 			mulps	xmm0, xmm3
 			// Add result, with prec sum.
 			addps	xmm0, xmm4
 			// store it in xmm4 for future use.
 			movaps	xmm4, xmm0
 			// write the result.
 			movss	[edi]vout.x, xmm0
 			shufps	xmm0, xmm0, 33
 			movss	[edi]vout.y, xmm0
 			movhlps	xmm0, xmm0
 			movss	[edi]vout.z, xmm0
 		}
 	}
 };
 #else // NL_OS_WINDOWS
 /// dummy CMatrix3x4SSE for non windows platform
 class CMatrix3x4SSE : public  CMatrix3x4 { };
 #endif
 } // NL3D
--- a/code/nel/src/3d/mesh_mrm_skin.cpp
+++ b/code/nel/src/3d/mesh_mrm_skin.cpp
@ -39,124 +39,6 @@ namespace NL3D
 {
 // ***************************************************************************
 // ***************************************************************************
 // CMatrix3x4SSE array correctly aligned
 // ***************************************************************************
 // ***************************************************************************
 // ***************************************************************************
 #define	NL3D_SSE_ALIGNEMENT		16
 /**
 *	A CMatrix3x4SSE array correctly aligned
 *	NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage.
 */
 class	CMatrix3x4SSEArray
 {
 private:
 	void	*_AllocData;
 	void	*_Data;
 	uint	_Size;
 	uint	_Capacity;
 public:
 	CMatrix3x4SSEArray()
 	{
 		_AllocData= NULL;
 		_Data= NULL;
 		_Size= 0;
 		_Capacity= 0;
 	}
 	~CMatrix3x4SSEArray()
 	{
 		clear();
 	}
 	CMatrix3x4SSEArray(const CMatrix3x4SSEArray &other)
 	{
 		_AllocData= NULL;
 		_Data= NULL;
 		_Size= 0;
 		_Capacity= 0;
 		*this= other;
 	}
 	CMatrix3x4SSEArray &operator=(const CMatrix3x4SSEArray &other)
 	{
 		if( this == &other)
 			return *this;
 		resize(other.size());
 		// copy data from aligned pointers to aligned pointers.
 		memcpy(_Data, other._Data, size() * sizeof(CMatrix3x4SSE) );
 		return *this;
 	}
 	CMatrix3x4SSE	*getPtr()
 	{
 		return (CMatrix3x4SSE*)_Data;
 	}
 	void	clear()
 	{
 		delete [] ((uint8 *)_AllocData);
 		_AllocData= NULL;
 		_Data= NULL;
 		_Size= 0;
 		_Capacity= 0;
 	}
 	void	resize(uint n)
 	{
 		// reserve ??
 		if(n>_Capacity)
 			reserve( max(2*_Capacity, n));
 		_Size= n;
 	}
 	void	reserve(uint n)
 	{
 		if(n==0)
 			clear();
 		else if(n>_Capacity)
 		{
 			// Alloc new data.
 			void	*newAllocData;
 			void	*newData;
 			// Alloc for alignement.
 			newAllocData= new uint8 [n * sizeof(CMatrix3x4SSE) + NL3D_SSE_ALIGNEMENT-1];
 			if(newAllocData==NULL)
 				throw Exception("SSE Allocation Failed");
 			// Align ptr
 			newData= (void*) ( ((ptrdiff_t)newAllocData+NL3D_SSE_ALIGNEMENT-1) & (~(NL3D_SSE_ALIGNEMENT-1)) );
 			// copy valid data from old to new.
 			memcpy(newData, _Data, size() * sizeof(CMatrix3x4SSE) );
 			// release old.
 			if(_AllocData)
 				delete [] ((uint8*)_AllocData);
 			// change ptrs and capacity.
 			_Data= newData;
 			_AllocData= newAllocData;
 			_Capacity= n;
 			// TestYoyo
 			//nlwarning("YOYO Tst SSE P4: %X, %d", _Data, n);
 		}
 	}
 	uint	size() const {return _Size;}
 	CMatrix3x4SSE	&operator[](uint i) {return ((CMatrix3x4SSE*)_Data)[i];}
 };
 // ***************************************************************************
 // ***************************************************************************
--- a/code/nel/src/3d/mesh_mrm_skinned.cpp
+++ b/code/nel/src/3d/mesh_mrm_skinned.cpp
@ -2247,123 +2247,6 @@ void CMeshMRMSkinnedGeom::getSkinWeights (std::vector<CMesh::CSkinWeight> &skinW
 	}
 }
 // ***************************************************************************
 // ***************************************************************************
 // CMatrix3x4SSE array correctly aligned
 // ***************************************************************************
 // ***************************************************************************
 // ***************************************************************************
 #define	NL3D_SSE_ALIGNEMENT		16
 /**
 *	A CMatrix3x4SSEArray array correctly aligned
 *	NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage.
 */
 class	CMatrix3x4SSEArray
 {
 private:
 	void	*_AllocData;
 	void	*_Data;
 	uint	_Size;
 	uint	_Capacity;
 public:
 	CMatrix3x4SSEArray()
 	{
 		_AllocData= NULL;
 		_Data= NULL;
 		_Size= 0;
 		_Capacity= 0;
 	}
 	~CMatrix3x4SSEArray()
 	{
 		clear();
 	}
 	CMatrix3x4SSEArray(const CMatrix3x4SSEArray &other)
 	{
 		_AllocData= NULL;
 		_Data= NULL;
 		_Size= 0;
 		_Capacity= 0;
 		*this= other;
 	}
 	CMatrix3x4SSEArray &operator=(const CMatrix3x4SSEArray &other)
 	{
 		if( this == &other)
 			return *this;
 		resize(other.size());
 		// copy data from aligned pointers to aligned pointers.
 		memcpy(_Data, other._Data, size() * sizeof(CMatrix3x4SSE) );
 		return *this;
 	}
 	CMatrix3x4SSE	*getPtr()
 	{
 		return (CMatrix3x4SSE*)_Data;
 	}
 	void	clear()
 	{
 		delete [] ((uint8 *) _AllocData);
 		_AllocData= NULL;
 		_Data= NULL;
 		_Size= 0;
 		_Capacity= 0;
 	}
 	void	resize(uint n)
 	{
 		// reserve ??
 		if(n>_Capacity)
 			reserve( max(2*_Capacity, n));
 		_Size= n;
 	}
 	void	reserve(uint n)
 	{
 		if(n==0)
 			clear();
 		else if(n>_Capacity)
 		{
 			// Alloc new data.
 			void	*newAllocData;
 			void	*newData;
 			// Alloc for alignement.
 			newAllocData= new uint8 [n * sizeof(CMatrix3x4SSE) + NL3D_SSE_ALIGNEMENT-1];
 			if(newAllocData==NULL)
 				throw Exception("SSE Allocation Failed");
 			// Align ptr
 			newData= (void*) ( ((ptrdiff_t)newAllocData+NL3D_SSE_ALIGNEMENT-1) & (~(NL3D_SSE_ALIGNEMENT-1)) );
 			// copy valid data from old to new.
 			memcpy(newData, _Data, size() * sizeof(CMatrix3x4SSE) );
 			// release old.
 			if(_AllocData)
 				delete [] ((uint8*)_AllocData);
 			// change ptrs and capacity.
 			_Data= newData;
 			_AllocData= newAllocData;
 			_Capacity= n;
 			// TestYoyo
 			//nlwarning("YOYO Tst SSE P4: %X, %d", _Data, n);
 		}
 	}
 	uint	size() const {return _Size;}
 	CMatrix3x4SSE	&operator[](uint i) {return ((CMatrix3x4SSE*)_Data)[i];}
 };
 // ***************************************************************************
 // ***************************************************************************