diff --git a/code/nel/include/nel/3d/matrix_3x4.h b/code/nel/include/nel/3d/matrix_3x4.h index d7ed660fc..45901c20e 100644 --- a/code/nel/include/nel/3d/matrix_3x4.h +++ b/code/nel/include/nel/3d/matrix_3x4.h @@ -108,281 +108,6 @@ public: }; -// *************************************************************************** -// *************************************************************************** -// SSE Matrix -// *************************************************************************** -// *************************************************************************** - - -// *************************************************************************** -#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) - - -/** For fast vector/point multiplication. Special usage for Skinning. - * NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage. - */ -class CMatrix3x4SSE -{ -public: - // Order them in memory column first, for SSE column multiplication. - float a11, a21, a31, a41; - float a12, a22, a32, a42; - float a13, a23, a33, a43; - float a14, a24, a34, a44; - - // Copy from a matrix. - void set(const CMatrix &mat) - { - const float *m =mat.get(); - a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12]; - a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13]; - a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14]; - // not used. - a41= 0 ; a42= 0 ; a43= 0 ; a44= 1; - } - - - // mulSetvector. NB: in should be different as v!! (else don't work). - void mulSetVector(const CVector &vin, CVector &vout) - { - __asm - { - mov eax, vin - mov ebx, this - mov edi, vout - // Load in vector in op[0] - movss xmm0, [eax]vin.x - movss xmm1, [eax]vin.y - movss xmm2, [eax]vin.z - // Expand op[0] to op[1], op[2], op[3] - shufps xmm0, xmm0, 0 - shufps xmm1, xmm1, 0 - shufps xmm2, xmm2, 0 - // Mul each vector with 3 Matrix column - mulps xmm0, [ebx]this.a11 - mulps xmm1, [ebx]this.a12 - mulps xmm2, [ebx]this.a13 - // Add each column vector. - addps xmm0, xmm1 - addps xmm0, xmm2 - - // write the result. - movss [edi]vout.x, xmm0 - shufps xmm0, xmm0, 33 - movss [edi]vout.y, xmm0 - movhlps xmm0, xmm0 - movss [edi]vout.z, xmm0 - } - } - // mulSetpoint. NB: in should be different as v!! (else don't work). - void mulSetPoint(const CVector &vin, CVector &vout) - { - __asm - { - mov eax, vin - mov ebx, this - mov edi, vout - // Load in vector in op[0] - movss xmm0, [eax]vin.x - movss xmm1, [eax]vin.y - movss xmm2, [eax]vin.z - // Expand op[0] to op[1], op[2], op[3] - shufps xmm0, xmm0, 0 - shufps xmm1, xmm1, 0 - shufps xmm2, xmm2, 0 - // Mul each vector with 3 Matrix column - mulps xmm0, [ebx]this.a11 - mulps xmm1, [ebx]this.a12 - mulps xmm2, [ebx]this.a13 - // Add each column vector. - addps xmm0, xmm1 - addps xmm0, xmm2 - // Add Matrix translate column vector - addps xmm0, [ebx]this.a14 - - // write the result. - movss [edi]vout.x, xmm0 - shufps xmm0, xmm0, 33 - movss [edi]vout.y, xmm0 - movhlps xmm0, xmm0 - movss [edi]vout.z, xmm0 - } - } - - - // mulSetvector. NB: vin should be different as v!! (else don't work). - void mulSetVector(const CVector &vin, float scale, CVector &vout) - { - __asm - { - mov eax, vin - mov ebx, this - mov edi, vout - // Load in vector in op[0] - movss xmm0, [eax]vin.x - movss xmm1, [eax]vin.y - movss xmm2, [eax]vin.z - // Load scale in op[0] - movss xmm3, scale - // Expand op[0] to op[1], op[2], op[3] - shufps xmm0, xmm0, 0 - shufps xmm1, xmm1, 0 - shufps xmm2, xmm2, 0 - shufps xmm3, xmm3, 0 - // Store vertex column in other regs. - movaps xmm5, xmm0 - movaps xmm6, xmm1 - movaps xmm7, xmm2 - // Mul each vector with 3 Matrix column - mulps xmm0, [ebx]this.a11 - mulps xmm1, [ebx]this.a12 - mulps xmm2, [ebx]this.a13 - // Add each column vector. - addps xmm0, xmm1 - addps xmm0, xmm2 - - // mul final result with scale - mulps xmm0, xmm3 - - // store it in xmm4 for future use. - movaps xmm4, xmm0 - } - } - // mulSetpoint. NB: vin should be different as v!! (else don't work). - void mulSetPoint(const CVector &vin, float scale, CVector &vout) - { - __asm - { - mov eax, vin - mov ebx, this - mov edi, vout - // Load in vector in op[0] - movss xmm0, [eax]vin.x - movss xmm1, [eax]vin.y - movss xmm2, [eax]vin.z - // Load scale in op[0] - movss xmm3, scale - // Expand op[0] to op[1], op[2], op[3] - shufps xmm0, xmm0, 0 - shufps xmm1, xmm1, 0 - shufps xmm2, xmm2, 0 - shufps xmm3, xmm3, 0 - // Store vertex column in other regs. - movaps xmm5, xmm0 - movaps xmm6, xmm1 - movaps xmm7, xmm2 - // Mul each vector with 3 Matrix column - mulps xmm0, [ebx]this.a11 - mulps xmm1, [ebx]this.a12 - mulps xmm2, [ebx]this.a13 - // Add each column vector. - addps xmm0, xmm1 - addps xmm0, xmm2 - // Add Matrix translate column vector - addps xmm0, [ebx]this.a14 - - // mul final result with scale - mulps xmm0, xmm3 - - // store it in xmm4 for future use. - movaps xmm4, xmm0 - } - } - - - // mulAddvector. NB: vin should be different as v!! (else don't work). - void mulAddVector(const CVector &/* vin */, float scale, CVector &vout) - { - __asm - { - mov ebx, this - mov edi, vout - // Load vin vector loaded in mulSetVector - movaps xmm0, xmm5 - movaps xmm1, xmm6 - movaps xmm2, xmm7 - // Load scale in op[0] - movss xmm3, scale - // Expand op[0] to op[1], op[2], op[3] - shufps xmm3, xmm3, 0 - // Mul each vector with 3 Matrix column - mulps xmm0, [ebx]this.a11 - mulps xmm1, [ebx]this.a12 - mulps xmm2, [ebx]this.a13 - // Add each column vector. - addps xmm0, xmm1 - addps xmm0, xmm2 - - // mul final result with scale - mulps xmm0, xmm3 - - // Add result, with prec sum. - addps xmm0, xmm4 - - // store it in xmm4 for future use. - movaps xmm4, xmm0 - - // write the result. - movss [edi]vout.x, xmm0 - shufps xmm0, xmm0, 33 - movss [edi]vout.y, xmm0 - movhlps xmm0, xmm0 - movss [edi]vout.z, xmm0 - } - } - // mulAddpoint. NB: vin should be different as v!! (else don't work). - void mulAddPoint(const CVector &/* vin */, float scale, CVector &vout) - { - __asm - { - mov ebx, this - mov edi, vout - // Load vin vector loaded in mulSetPoint - movaps xmm0, xmm5 - movaps xmm1, xmm6 - movaps xmm2, xmm7 - // Load scale in op[0] - movss xmm3, scale - // Expand op[0] to op[1], op[2], op[3] - shufps xmm3, xmm3, 0 - // Mul each vector with 3 Matrix column - mulps xmm0, [ebx]this.a11 - mulps xmm1, [ebx]this.a12 - mulps xmm2, [ebx]this.a13 - // Add each column vector. - addps xmm0, xmm1 - addps xmm0, xmm2 - // Add Matrix translate column vector - addps xmm0, [ebx]this.a14 - - // mul final result with scale - mulps xmm0, xmm3 - - // Add result, with prec sum. - addps xmm0, xmm4 - - // store it in xmm4 for future use. - movaps xmm4, xmm0 - - // write the result. - movss [edi]vout.x, xmm0 - shufps xmm0, xmm0, 33 - movss [edi]vout.y, xmm0 - movhlps xmm0, xmm0 - movss [edi]vout.z, xmm0 - } - } - -}; - -#else // NL_OS_WINDOWS -/// dummy CMatrix3x4SSE for non windows platform -class CMatrix3x4SSE : public CMatrix3x4 { }; -#endif - - - } // NL3D diff --git a/code/nel/src/3d/mesh_mrm_skin.cpp b/code/nel/src/3d/mesh_mrm_skin.cpp index 13e8bdd21..a34eeb766 100644 --- a/code/nel/src/3d/mesh_mrm_skin.cpp +++ b/code/nel/src/3d/mesh_mrm_skin.cpp @@ -39,124 +39,6 @@ namespace NL3D { -// *************************************************************************** -// *************************************************************************** -// CMatrix3x4SSE array correctly aligned -// *************************************************************************** -// *************************************************************************** - - - -// *************************************************************************** -#define NL3D_SSE_ALIGNEMENT 16 -/** - * A CMatrix3x4SSE array correctly aligned - * NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage. - */ -class CMatrix3x4SSEArray -{ -private: - void *_AllocData; - void *_Data; - uint _Size; - uint _Capacity; - -public: - CMatrix3x4SSEArray() - { - _AllocData= NULL; - _Data= NULL; - _Size= 0; - _Capacity= 0; - } - ~CMatrix3x4SSEArray() - { - clear(); - } - CMatrix3x4SSEArray(const CMatrix3x4SSEArray &other) - { - _AllocData= NULL; - _Data= NULL; - _Size= 0; - _Capacity= 0; - *this= other; - } - CMatrix3x4SSEArray &operator=(const CMatrix3x4SSEArray &other) - { - if( this == &other) - return *this; - resize(other.size()); - // copy data from aligned pointers to aligned pointers. - memcpy(_Data, other._Data, size() * sizeof(CMatrix3x4SSE) ); - - return *this; - } - - - CMatrix3x4SSE *getPtr() - { - return (CMatrix3x4SSE*)_Data; - } - - void clear() - { - delete [] ((uint8 *)_AllocData); - _AllocData= NULL; - _Data= NULL; - _Size= 0; - _Capacity= 0; - } - - void resize(uint n) - { - // reserve ?? - if(n>_Capacity) - reserve( max(2*_Capacity, n)); - _Size= n; - } - - void reserve(uint n) - { - if(n==0) - clear(); - else if(n>_Capacity) - { - // Alloc new data. - void *newAllocData; - void *newData; - - // Alloc for alignement. - newAllocData= new uint8 [n * sizeof(CMatrix3x4SSE) + NL3D_SSE_ALIGNEMENT-1]; - if(newAllocData==NULL) - throw Exception("SSE Allocation Failed"); - - // Align ptr - newData= (void*) ( ((ptrdiff_t)newAllocData+NL3D_SSE_ALIGNEMENT-1) & (~(NL3D_SSE_ALIGNEMENT-1)) ); - - // copy valid data from old to new. - memcpy(newData, _Data, size() * sizeof(CMatrix3x4SSE) ); - - // release old. - if(_AllocData) - delete [] ((uint8*)_AllocData); - - // change ptrs and capacity. - _Data= newData; - _AllocData= newAllocData; - _Capacity= n; - - // TestYoyo - //nlwarning("YOYO Tst SSE P4: %X, %d", _Data, n); - } - } - - uint size() const {return _Size;} - - - CMatrix3x4SSE &operator[](uint i) {return ((CMatrix3x4SSE*)_Data)[i];} -}; - - // *************************************************************************** // *************************************************************************** diff --git a/code/nel/src/3d/mesh_mrm_skinned.cpp b/code/nel/src/3d/mesh_mrm_skinned.cpp index 2b1c3beb6..6af29bf73 100644 --- a/code/nel/src/3d/mesh_mrm_skinned.cpp +++ b/code/nel/src/3d/mesh_mrm_skinned.cpp @@ -2247,123 +2247,6 @@ void CMeshMRMSkinnedGeom::getSkinWeights (std::vector &skinW } } -// *************************************************************************** -// *************************************************************************** -// CMatrix3x4SSE array correctly aligned -// *************************************************************************** -// *************************************************************************** - - - -// *************************************************************************** -#define NL3D_SSE_ALIGNEMENT 16 -/** - * A CMatrix3x4SSEArray array correctly aligned - * NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage. - */ -class CMatrix3x4SSEArray -{ -private: - void *_AllocData; - void *_Data; - uint _Size; - uint _Capacity; - -public: - CMatrix3x4SSEArray() - { - _AllocData= NULL; - _Data= NULL; - _Size= 0; - _Capacity= 0; - } - ~CMatrix3x4SSEArray() - { - clear(); - } - CMatrix3x4SSEArray(const CMatrix3x4SSEArray &other) - { - _AllocData= NULL; - _Data= NULL; - _Size= 0; - _Capacity= 0; - *this= other; - } - CMatrix3x4SSEArray &operator=(const CMatrix3x4SSEArray &other) - { - if( this == &other) - return *this; - resize(other.size()); - // copy data from aligned pointers to aligned pointers. - memcpy(_Data, other._Data, size() * sizeof(CMatrix3x4SSE) ); - - return *this; - } - - - CMatrix3x4SSE *getPtr() - { - return (CMatrix3x4SSE*)_Data; - } - - void clear() - { - delete [] ((uint8 *) _AllocData); - _AllocData= NULL; - _Data= NULL; - _Size= 0; - _Capacity= 0; - } - - void resize(uint n) - { - // reserve ?? - if(n>_Capacity) - reserve( max(2*_Capacity, n)); - _Size= n; - } - - void reserve(uint n) - { - if(n==0) - clear(); - else if(n>_Capacity) - { - // Alloc new data. - void *newAllocData; - void *newData; - - // Alloc for alignement. - newAllocData= new uint8 [n * sizeof(CMatrix3x4SSE) + NL3D_SSE_ALIGNEMENT-1]; - if(newAllocData==NULL) - throw Exception("SSE Allocation Failed"); - - // Align ptr - newData= (void*) ( ((ptrdiff_t)newAllocData+NL3D_SSE_ALIGNEMENT-1) & (~(NL3D_SSE_ALIGNEMENT-1)) ); - - // copy valid data from old to new. - memcpy(newData, _Data, size() * sizeof(CMatrix3x4SSE) ); - - // release old. - if(_AllocData) - delete [] ((uint8*)_AllocData); - - // change ptrs and capacity. - _Data= newData; - _AllocData= newAllocData; - _Capacity= n; - - // TestYoyo - //nlwarning("YOYO Tst SSE P4: %X, %d", _Data, n); - } - } - - uint size() const {return _Size;} - - - CMatrix3x4SSE &operator[](uint i) {return ((CMatrix3x4SSE*)_Data)[i];} -}; - // *************************************************************************** // ***************************************************************************