mirror of
https://port.numenaute.org/aleajactaest/khanat-opennel-code.git
synced 2024-11-14 03:09:08 +00:00
SSE2: Remove dead code
--HG-- branch : sse2
This commit is contained in:
parent
848932f93a
commit
71a598db7e
3 changed files with 0 additions and 510 deletions
|
@ -108,281 +108,6 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// ***************************************************************************
|
|
||||||
// SSE Matrix
|
|
||||||
// ***************************************************************************
|
|
||||||
// ***************************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
#if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
|
|
||||||
|
|
||||||
|
|
||||||
/** For fast vector/point multiplication. Special usage for Skinning.
|
|
||||||
* NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage.
|
|
||||||
*/
|
|
||||||
class CMatrix3x4SSE
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
// Order them in memory column first, for SSE column multiplication.
|
|
||||||
float a11, a21, a31, a41;
|
|
||||||
float a12, a22, a32, a42;
|
|
||||||
float a13, a23, a33, a43;
|
|
||||||
float a14, a24, a34, a44;
|
|
||||||
|
|
||||||
// Copy from a matrix.
|
|
||||||
void set(const CMatrix &mat)
|
|
||||||
{
|
|
||||||
const float *m =mat.get();
|
|
||||||
a11= m[0]; a12= m[4]; a13= m[8] ; a14= m[12];
|
|
||||||
a21= m[1]; a22= m[5]; a23= m[9] ; a24= m[13];
|
|
||||||
a31= m[2]; a32= m[6]; a33= m[10]; a34= m[14];
|
|
||||||
// not used.
|
|
||||||
a41= 0 ; a42= 0 ; a43= 0 ; a44= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// mulSetvector. NB: in should be different as v!! (else don't work).
|
|
||||||
void mulSetVector(const CVector &vin, CVector &vout)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, vin
|
|
||||||
mov ebx, this
|
|
||||||
mov edi, vout
|
|
||||||
// Load in vector in op[0]
|
|
||||||
movss xmm0, [eax]vin.x
|
|
||||||
movss xmm1, [eax]vin.y
|
|
||||||
movss xmm2, [eax]vin.z
|
|
||||||
// Expand op[0] to op[1], op[2], op[3]
|
|
||||||
shufps xmm0, xmm0, 0
|
|
||||||
shufps xmm1, xmm1, 0
|
|
||||||
shufps xmm2, xmm2, 0
|
|
||||||
// Mul each vector with 3 Matrix column
|
|
||||||
mulps xmm0, [ebx]this.a11
|
|
||||||
mulps xmm1, [ebx]this.a12
|
|
||||||
mulps xmm2, [ebx]this.a13
|
|
||||||
// Add each column vector.
|
|
||||||
addps xmm0, xmm1
|
|
||||||
addps xmm0, xmm2
|
|
||||||
|
|
||||||
// write the result.
|
|
||||||
movss [edi]vout.x, xmm0
|
|
||||||
shufps xmm0, xmm0, 33
|
|
||||||
movss [edi]vout.y, xmm0
|
|
||||||
movhlps xmm0, xmm0
|
|
||||||
movss [edi]vout.z, xmm0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// mulSetpoint. NB: in should be different as v!! (else don't work).
|
|
||||||
void mulSetPoint(const CVector &vin, CVector &vout)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, vin
|
|
||||||
mov ebx, this
|
|
||||||
mov edi, vout
|
|
||||||
// Load in vector in op[0]
|
|
||||||
movss xmm0, [eax]vin.x
|
|
||||||
movss xmm1, [eax]vin.y
|
|
||||||
movss xmm2, [eax]vin.z
|
|
||||||
// Expand op[0] to op[1], op[2], op[3]
|
|
||||||
shufps xmm0, xmm0, 0
|
|
||||||
shufps xmm1, xmm1, 0
|
|
||||||
shufps xmm2, xmm2, 0
|
|
||||||
// Mul each vector with 3 Matrix column
|
|
||||||
mulps xmm0, [ebx]this.a11
|
|
||||||
mulps xmm1, [ebx]this.a12
|
|
||||||
mulps xmm2, [ebx]this.a13
|
|
||||||
// Add each column vector.
|
|
||||||
addps xmm0, xmm1
|
|
||||||
addps xmm0, xmm2
|
|
||||||
// Add Matrix translate column vector
|
|
||||||
addps xmm0, [ebx]this.a14
|
|
||||||
|
|
||||||
// write the result.
|
|
||||||
movss [edi]vout.x, xmm0
|
|
||||||
shufps xmm0, xmm0, 33
|
|
||||||
movss [edi]vout.y, xmm0
|
|
||||||
movhlps xmm0, xmm0
|
|
||||||
movss [edi]vout.z, xmm0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// mulSetvector. NB: vin should be different as v!! (else don't work).
|
|
||||||
void mulSetVector(const CVector &vin, float scale, CVector &vout)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, vin
|
|
||||||
mov ebx, this
|
|
||||||
mov edi, vout
|
|
||||||
// Load in vector in op[0]
|
|
||||||
movss xmm0, [eax]vin.x
|
|
||||||
movss xmm1, [eax]vin.y
|
|
||||||
movss xmm2, [eax]vin.z
|
|
||||||
// Load scale in op[0]
|
|
||||||
movss xmm3, scale
|
|
||||||
// Expand op[0] to op[1], op[2], op[3]
|
|
||||||
shufps xmm0, xmm0, 0
|
|
||||||
shufps xmm1, xmm1, 0
|
|
||||||
shufps xmm2, xmm2, 0
|
|
||||||
shufps xmm3, xmm3, 0
|
|
||||||
// Store vertex column in other regs.
|
|
||||||
movaps xmm5, xmm0
|
|
||||||
movaps xmm6, xmm1
|
|
||||||
movaps xmm7, xmm2
|
|
||||||
// Mul each vector with 3 Matrix column
|
|
||||||
mulps xmm0, [ebx]this.a11
|
|
||||||
mulps xmm1, [ebx]this.a12
|
|
||||||
mulps xmm2, [ebx]this.a13
|
|
||||||
// Add each column vector.
|
|
||||||
addps xmm0, xmm1
|
|
||||||
addps xmm0, xmm2
|
|
||||||
|
|
||||||
// mul final result with scale
|
|
||||||
mulps xmm0, xmm3
|
|
||||||
|
|
||||||
// store it in xmm4 for future use.
|
|
||||||
movaps xmm4, xmm0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// mulSetpoint. NB: vin should be different as v!! (else don't work).
|
|
||||||
void mulSetPoint(const CVector &vin, float scale, CVector &vout)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, vin
|
|
||||||
mov ebx, this
|
|
||||||
mov edi, vout
|
|
||||||
// Load in vector in op[0]
|
|
||||||
movss xmm0, [eax]vin.x
|
|
||||||
movss xmm1, [eax]vin.y
|
|
||||||
movss xmm2, [eax]vin.z
|
|
||||||
// Load scale in op[0]
|
|
||||||
movss xmm3, scale
|
|
||||||
// Expand op[0] to op[1], op[2], op[3]
|
|
||||||
shufps xmm0, xmm0, 0
|
|
||||||
shufps xmm1, xmm1, 0
|
|
||||||
shufps xmm2, xmm2, 0
|
|
||||||
shufps xmm3, xmm3, 0
|
|
||||||
// Store vertex column in other regs.
|
|
||||||
movaps xmm5, xmm0
|
|
||||||
movaps xmm6, xmm1
|
|
||||||
movaps xmm7, xmm2
|
|
||||||
// Mul each vector with 3 Matrix column
|
|
||||||
mulps xmm0, [ebx]this.a11
|
|
||||||
mulps xmm1, [ebx]this.a12
|
|
||||||
mulps xmm2, [ebx]this.a13
|
|
||||||
// Add each column vector.
|
|
||||||
addps xmm0, xmm1
|
|
||||||
addps xmm0, xmm2
|
|
||||||
// Add Matrix translate column vector
|
|
||||||
addps xmm0, [ebx]this.a14
|
|
||||||
|
|
||||||
// mul final result with scale
|
|
||||||
mulps xmm0, xmm3
|
|
||||||
|
|
||||||
// store it in xmm4 for future use.
|
|
||||||
movaps xmm4, xmm0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// mulAddvector. NB: vin should be different as v!! (else don't work).
|
|
||||||
void mulAddVector(const CVector &/* vin */, float scale, CVector &vout)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov ebx, this
|
|
||||||
mov edi, vout
|
|
||||||
// Load vin vector loaded in mulSetVector
|
|
||||||
movaps xmm0, xmm5
|
|
||||||
movaps xmm1, xmm6
|
|
||||||
movaps xmm2, xmm7
|
|
||||||
// Load scale in op[0]
|
|
||||||
movss xmm3, scale
|
|
||||||
// Expand op[0] to op[1], op[2], op[3]
|
|
||||||
shufps xmm3, xmm3, 0
|
|
||||||
// Mul each vector with 3 Matrix column
|
|
||||||
mulps xmm0, [ebx]this.a11
|
|
||||||
mulps xmm1, [ebx]this.a12
|
|
||||||
mulps xmm2, [ebx]this.a13
|
|
||||||
// Add each column vector.
|
|
||||||
addps xmm0, xmm1
|
|
||||||
addps xmm0, xmm2
|
|
||||||
|
|
||||||
// mul final result with scale
|
|
||||||
mulps xmm0, xmm3
|
|
||||||
|
|
||||||
// Add result, with prec sum.
|
|
||||||
addps xmm0, xmm4
|
|
||||||
|
|
||||||
// store it in xmm4 for future use.
|
|
||||||
movaps xmm4, xmm0
|
|
||||||
|
|
||||||
// write the result.
|
|
||||||
movss [edi]vout.x, xmm0
|
|
||||||
shufps xmm0, xmm0, 33
|
|
||||||
movss [edi]vout.y, xmm0
|
|
||||||
movhlps xmm0, xmm0
|
|
||||||
movss [edi]vout.z, xmm0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// mulAddpoint. NB: vin should be different as v!! (else don't work).
|
|
||||||
void mulAddPoint(const CVector &/* vin */, float scale, CVector &vout)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov ebx, this
|
|
||||||
mov edi, vout
|
|
||||||
// Load vin vector loaded in mulSetPoint
|
|
||||||
movaps xmm0, xmm5
|
|
||||||
movaps xmm1, xmm6
|
|
||||||
movaps xmm2, xmm7
|
|
||||||
// Load scale in op[0]
|
|
||||||
movss xmm3, scale
|
|
||||||
// Expand op[0] to op[1], op[2], op[3]
|
|
||||||
shufps xmm3, xmm3, 0
|
|
||||||
// Mul each vector with 3 Matrix column
|
|
||||||
mulps xmm0, [ebx]this.a11
|
|
||||||
mulps xmm1, [ebx]this.a12
|
|
||||||
mulps xmm2, [ebx]this.a13
|
|
||||||
// Add each column vector.
|
|
||||||
addps xmm0, xmm1
|
|
||||||
addps xmm0, xmm2
|
|
||||||
// Add Matrix translate column vector
|
|
||||||
addps xmm0, [ebx]this.a14
|
|
||||||
|
|
||||||
// mul final result with scale
|
|
||||||
mulps xmm0, xmm3
|
|
||||||
|
|
||||||
// Add result, with prec sum.
|
|
||||||
addps xmm0, xmm4
|
|
||||||
|
|
||||||
// store it in xmm4 for future use.
|
|
||||||
movaps xmm4, xmm0
|
|
||||||
|
|
||||||
// write the result.
|
|
||||||
movss [edi]vout.x, xmm0
|
|
||||||
shufps xmm0, xmm0, 33
|
|
||||||
movss [edi]vout.y, xmm0
|
|
||||||
movhlps xmm0, xmm0
|
|
||||||
movss [edi]vout.z, xmm0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#else // NL_OS_WINDOWS
|
|
||||||
/// dummy CMatrix3x4SSE for non windows platform
|
|
||||||
class CMatrix3x4SSE : public CMatrix3x4 { };
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // NL3D
|
} // NL3D
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -39,124 +39,6 @@ namespace NL3D
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// ***************************************************************************
|
|
||||||
// CMatrix3x4SSE array correctly aligned
|
|
||||||
// ***************************************************************************
|
|
||||||
// ***************************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
#define NL3D_SSE_ALIGNEMENT 16
|
|
||||||
/**
|
|
||||||
* A CMatrix3x4SSE array correctly aligned
|
|
||||||
* NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage.
|
|
||||||
*/
|
|
||||||
class CMatrix3x4SSEArray
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
void *_AllocData;
|
|
||||||
void *_Data;
|
|
||||||
uint _Size;
|
|
||||||
uint _Capacity;
|
|
||||||
|
|
||||||
public:
|
|
||||||
CMatrix3x4SSEArray()
|
|
||||||
{
|
|
||||||
_AllocData= NULL;
|
|
||||||
_Data= NULL;
|
|
||||||
_Size= 0;
|
|
||||||
_Capacity= 0;
|
|
||||||
}
|
|
||||||
~CMatrix3x4SSEArray()
|
|
||||||
{
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
CMatrix3x4SSEArray(const CMatrix3x4SSEArray &other)
|
|
||||||
{
|
|
||||||
_AllocData= NULL;
|
|
||||||
_Data= NULL;
|
|
||||||
_Size= 0;
|
|
||||||
_Capacity= 0;
|
|
||||||
*this= other;
|
|
||||||
}
|
|
||||||
CMatrix3x4SSEArray &operator=(const CMatrix3x4SSEArray &other)
|
|
||||||
{
|
|
||||||
if( this == &other)
|
|
||||||
return *this;
|
|
||||||
resize(other.size());
|
|
||||||
// copy data from aligned pointers to aligned pointers.
|
|
||||||
memcpy(_Data, other._Data, size() * sizeof(CMatrix3x4SSE) );
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
CMatrix3x4SSE *getPtr()
|
|
||||||
{
|
|
||||||
return (CMatrix3x4SSE*)_Data;
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear()
|
|
||||||
{
|
|
||||||
delete [] ((uint8 *)_AllocData);
|
|
||||||
_AllocData= NULL;
|
|
||||||
_Data= NULL;
|
|
||||||
_Size= 0;
|
|
||||||
_Capacity= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void resize(uint n)
|
|
||||||
{
|
|
||||||
// reserve ??
|
|
||||||
if(n>_Capacity)
|
|
||||||
reserve( max(2*_Capacity, n));
|
|
||||||
_Size= n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void reserve(uint n)
|
|
||||||
{
|
|
||||||
if(n==0)
|
|
||||||
clear();
|
|
||||||
else if(n>_Capacity)
|
|
||||||
{
|
|
||||||
// Alloc new data.
|
|
||||||
void *newAllocData;
|
|
||||||
void *newData;
|
|
||||||
|
|
||||||
// Alloc for alignement.
|
|
||||||
newAllocData= new uint8 [n * sizeof(CMatrix3x4SSE) + NL3D_SSE_ALIGNEMENT-1];
|
|
||||||
if(newAllocData==NULL)
|
|
||||||
throw Exception("SSE Allocation Failed");
|
|
||||||
|
|
||||||
// Align ptr
|
|
||||||
newData= (void*) ( ((ptrdiff_t)newAllocData+NL3D_SSE_ALIGNEMENT-1) & (~(NL3D_SSE_ALIGNEMENT-1)) );
|
|
||||||
|
|
||||||
// copy valid data from old to new.
|
|
||||||
memcpy(newData, _Data, size() * sizeof(CMatrix3x4SSE) );
|
|
||||||
|
|
||||||
// release old.
|
|
||||||
if(_AllocData)
|
|
||||||
delete [] ((uint8*)_AllocData);
|
|
||||||
|
|
||||||
// change ptrs and capacity.
|
|
||||||
_Data= newData;
|
|
||||||
_AllocData= newAllocData;
|
|
||||||
_Capacity= n;
|
|
||||||
|
|
||||||
// TestYoyo
|
|
||||||
//nlwarning("YOYO Tst SSE P4: %X, %d", _Data, n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint size() const {return _Size;}
|
|
||||||
|
|
||||||
|
|
||||||
CMatrix3x4SSE &operator[](uint i) {return ((CMatrix3x4SSE*)_Data)[i];}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
|
@ -2247,123 +2247,6 @@ void CMeshMRMSkinnedGeom::getSkinWeights (std::vector<CMesh::CSkinWeight> &skinW
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// ***************************************************************************
|
|
||||||
// CMatrix3x4SSE array correctly aligned
|
|
||||||
// ***************************************************************************
|
|
||||||
// ***************************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
#define NL3D_SSE_ALIGNEMENT 16
|
|
||||||
/**
|
|
||||||
* A CMatrix3x4SSEArray array correctly aligned
|
|
||||||
* NB: SSE is no more used (no speed gain, some memory problem), but keep it for possible future usage.
|
|
||||||
*/
|
|
||||||
class CMatrix3x4SSEArray
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
void *_AllocData;
|
|
||||||
void *_Data;
|
|
||||||
uint _Size;
|
|
||||||
uint _Capacity;
|
|
||||||
|
|
||||||
public:
|
|
||||||
CMatrix3x4SSEArray()
|
|
||||||
{
|
|
||||||
_AllocData= NULL;
|
|
||||||
_Data= NULL;
|
|
||||||
_Size= 0;
|
|
||||||
_Capacity= 0;
|
|
||||||
}
|
|
||||||
~CMatrix3x4SSEArray()
|
|
||||||
{
|
|
||||||
clear();
|
|
||||||
}
|
|
||||||
CMatrix3x4SSEArray(const CMatrix3x4SSEArray &other)
|
|
||||||
{
|
|
||||||
_AllocData= NULL;
|
|
||||||
_Data= NULL;
|
|
||||||
_Size= 0;
|
|
||||||
_Capacity= 0;
|
|
||||||
*this= other;
|
|
||||||
}
|
|
||||||
CMatrix3x4SSEArray &operator=(const CMatrix3x4SSEArray &other)
|
|
||||||
{
|
|
||||||
if( this == &other)
|
|
||||||
return *this;
|
|
||||||
resize(other.size());
|
|
||||||
// copy data from aligned pointers to aligned pointers.
|
|
||||||
memcpy(_Data, other._Data, size() * sizeof(CMatrix3x4SSE) );
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
CMatrix3x4SSE *getPtr()
|
|
||||||
{
|
|
||||||
return (CMatrix3x4SSE*)_Data;
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear()
|
|
||||||
{
|
|
||||||
delete [] ((uint8 *) _AllocData);
|
|
||||||
_AllocData= NULL;
|
|
||||||
_Data= NULL;
|
|
||||||
_Size= 0;
|
|
||||||
_Capacity= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void resize(uint n)
|
|
||||||
{
|
|
||||||
// reserve ??
|
|
||||||
if(n>_Capacity)
|
|
||||||
reserve( max(2*_Capacity, n));
|
|
||||||
_Size= n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void reserve(uint n)
|
|
||||||
{
|
|
||||||
if(n==0)
|
|
||||||
clear();
|
|
||||||
else if(n>_Capacity)
|
|
||||||
{
|
|
||||||
// Alloc new data.
|
|
||||||
void *newAllocData;
|
|
||||||
void *newData;
|
|
||||||
|
|
||||||
// Alloc for alignement.
|
|
||||||
newAllocData= new uint8 [n * sizeof(CMatrix3x4SSE) + NL3D_SSE_ALIGNEMENT-1];
|
|
||||||
if(newAllocData==NULL)
|
|
||||||
throw Exception("SSE Allocation Failed");
|
|
||||||
|
|
||||||
// Align ptr
|
|
||||||
newData= (void*) ( ((ptrdiff_t)newAllocData+NL3D_SSE_ALIGNEMENT-1) & (~(NL3D_SSE_ALIGNEMENT-1)) );
|
|
||||||
|
|
||||||
// copy valid data from old to new.
|
|
||||||
memcpy(newData, _Data, size() * sizeof(CMatrix3x4SSE) );
|
|
||||||
|
|
||||||
// release old.
|
|
||||||
if(_AllocData)
|
|
||||||
delete [] ((uint8*)_AllocData);
|
|
||||||
|
|
||||||
// change ptrs and capacity.
|
|
||||||
_Data= newData;
|
|
||||||
_AllocData= newAllocData;
|
|
||||||
_Capacity= n;
|
|
||||||
|
|
||||||
// TestYoyo
|
|
||||||
//nlwarning("YOYO Tst SSE P4: %X, %d", _Data, n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint size() const {return _Size;}
|
|
||||||
|
|
||||||
|
|
||||||
CMatrix3x4SSE &operator[](uint i) {return ((CMatrix3x4SSE*)_Data)[i];}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
Loading…
Reference in a new issue