// NeL - MMORPG Framework // Copyright (C) 2010 Winch Gate Property Limited // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as // published by the Free Software Foundation, either version 3 of the // License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . #include "std3d.h" #include "nel/3d/hls_color_texture.h" #include "nel/misc/fast_floor.h" #include "nel/3d/fasthls_modifier.h" #include "nel/misc/stream.h" #include "nel/misc/bitmap.h" #include "nel/misc/system_info.h" #include "nel/misc/algo.h" using namespace std; using namespace NLMISC; namespace NL3D { #define BLOCK_NUM_PIXEL 16 #define BLOCK_DXTC_SIZE 16 #define BLOCK_ALPHA_SIZE 16 // *************************************************************************** void CHLSColorDelta::serial(NLMISC::IStream &f) { f.serialVersion(0); f.serial(DHue, DLum, DSat); } // *************************************************************************** void CHLSColorTexture::CMask::serial(NLMISC::IStream &f) { f.serialVersion(0); f.serial(FullBlockIndex); f.serial(MixtBlockIndex); f.serialCont(Data); } // *************************************************************************** void CHLSColorTexture::CMask::setBit(uint bitId) { uint8 &b= Data[bitId/8]; b|= 1<<(bitId&7); } // *************************************************************************** CHLSColorTexture::CHLSColorTexture() { reset(); } // *************************************************************************** void CHLSColorTexture::reset() { _Width= 0; _Height= 0; _NumMipMap= 0; contReset(_Texture); contReset(_Masks); } // *************************************************************************** void CHLSColorTexture::setBitmap(const NLMISC::CBitmap &bmp) { nlassert(bmp.getPixelFormat()==CBitmap::DXTC5); uint width= bmp.getWidth(); uint height= bmp.getHeight(); uint mmCount= bmp.getMipMapCount(); nlassert(width>=1 && height>=1); nlassert(mmCount>1 || width*height==1); // restart reset(); // resize. uint m; uint pixelSize= 0; uint numTotalBlock= 0; for(m=0;m no block to re-compress. _Texture.resize(pixelSize+blockToCompressSize, 0); // fill texture uint8 *ptr= &_Texture[0]; for(m=0;m Blocks; // 0 empty, 1. Full. 2. Mixt. }; // *************************************************************************** void CHLSColorTexture::addMask(const NLMISC::CBitmap &bmpIn, uint threshold) { // copy the bitmap and set RGBA/mipmaps. CBitmap bmp= bmpIn; bmp.convertToType(CBitmap::RGBA); bmp.buildMipMaps(); // verify widht... nlassert(bmp.getWidth()== _Width); nlassert(bmp.getHeight()== _Height); nlassert(bmp.getMipMapCount()== _NumMipMap); // ***** build the information for all mipmaps vector masks; masks.resize(_NumMipMap); uint m; uint numMixtBlock= 0; uint numTotalBlock= 0; for(m=0;m<_NumMipMap;m++) { CMaskInfo &mask= masks[m]; uint mmWidth= bmp.getWidth(m); uint mmHeight= bmp.getHeight(m); mask.WBlock= (mmWidth+3)/4; mask.HBlock= (mmHeight+3)/4; mask.NumBlock= mask.WBlock*mask.HBlock; mask.Blocks.resize(mask.NumBlock); numTotalBlock+= mask.NumBlock; CRGBA *src= (CRGBA*)(&bmp.getPixels(m)[0]); for(uint yB=0;yB255-threshold) alphaMask= 255; // Add to the accum accum+= alphaMask; } } // full black? if(accum==0) mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_EMPTY; else if(accum==w*h*255) mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_FULL; // if not full white or full black, mixt block else { mask.Blocks[yB*mask.WBlock+xB]= MASK_BLOCK_MIXT; numMixtBlock++; } } } } // ***** compress into CMask CMask newMask; uint newMaskDataSize= 0; // add the mixt block data size (16*uint8 per block) newMaskDataSize+= numMixtBlock*BLOCK_ALPHA_SIZE; // compute the bit size. NB: use uint32 to blocks bits. => data is aligned. uint bitDataSize= 4*((numTotalBlock+31)/32); // add fullBlock bits newMask.FullBlockIndex= newMaskDataSize; newMaskDataSize+= bitDataSize; // add mixtBlock bits newMask.MixtBlockIndex= newMaskDataSize; newMaskDataSize+= bitDataSize; // allocate. Fill with 0 to initialize bits per default EMPTY value newMask.Data.resize(newMaskDataSize, 0); // compress each mipMaps from bigger to smaller uint bitId= 0; uint mixtBlockId= 0; for(m=0;m<_NumMipMap;m++) { CMaskInfo &mask= masks[m]; // ---- build the mixtBlock alpha Mask for(uint yB=0;yB dstTexture; static vector dstUnCompTexture; uint32 *bitPtr; uint8 *srcPtr; uint8 *dstPtr; CRGBA *dstUnCompPtr; uint32 bitMask; // **** prepare Data // count number of DXTC5 block in _Texture. uint numBlocks= _BlockToCompressIndex/BLOCK_DXTC_SIZE; // create a tmp compressed block array, copy of Texture. dstTexture.resize(numBlocks*BLOCK_DXTC_SIZE); // copy from texture (to have non colored version already copied, and also ALPHA ok) memcpy(&dstTexture[0], &_Texture[0], dstTexture.size()); // create a tmp uncompressed block array, which will receive coloring of mixt blocks dstUnCompTexture.resize(numBlocks*BLOCK_NUM_PIXEL); // For all blockToCompress, uncompress them in dstUnCompTexture, because they will blend with future mask coloring uint n= numBlocks; bitPtr= (uint32*)(&_Texture[_BlockToCompressIndex]); dstUnCompPtr= &dstUnCompTexture[0]; srcPtr= &_Texture[0]; while(n>0) { uint nBits= min(n, 32U); getBitPack(bitPtr, bitMask); n-= nBits; bitPtr++; for(;nBits>0;nBits--) { // need to compress/uncompress ?? if(bitMask&1) { // uncompress this block. ignore alpha uncompressBlockRGB(srcPtr, dstUnCompPtr); } bitMask>>=1; dstUnCompPtr+= BLOCK_NUM_PIXEL; srcPtr+= BLOCK_DXTC_SIZE; } } // **** build the color version for all masks. for(uint maskId= 0; maskId<_Masks.size();maskId++) { CMask &mask= _Masks[maskId]; // unpack colDelta, and prepare for use with CFastHLSModifier. uint8 dHue= colDeltaList[maskId].DHue; uint dLum= 0xFFFFFF00 + colDeltaList[maskId].DLum*2; uint dSat= 0xFFFFFF00 + colDeltaList[maskId].DSat*2; // get a ptr on alpha of mixt block. uint8 *alphaMixtBlock= &mask.Data[0]; // ---- for all Fullblock ot this mask, color and store in dstTexture // start at full Block bits desc bitPtr= (uint32*)(&mask.Data[mask.FullBlockIndex]); uint32 *bitCompPtr= (uint32*)(&_Texture[_BlockToCompressIndex]); srcPtr= &_Texture[0]; dstPtr= &dstTexture[0]; dstUnCompPtr= &dstUnCompTexture[0]; n= numBlocks; // run all blocks. while(n>0) { uint nBits= min(n, 32U); // get Full block mask. getBitPack(bitPtr, bitMask); n-= nBits; bitPtr++; // get Compress mask. uint32 bitCompMask; getBitPack(bitCompPtr, bitCompMask); bitCompPtr++; // for all bits for(;nBits>0;nBits--) { // need to colorize?? if(bitMask&1) { // colorize this block. ignore alpha colorizeDXTCBlockRGB(srcPtr, dstPtr, dHue, dLum, dSat); // If this block is "a block to recompress", then must uncompress it in dstUnCompPtr uncompressBlockRGB(dstPtr, dstUnCompPtr); } bitMask>>=1; bitCompMask>>=1; srcPtr+= BLOCK_DXTC_SIZE; dstPtr+= BLOCK_DXTC_SIZE; dstUnCompPtr+= BLOCK_NUM_PIXEL; } } // ---- for all mixtblock ot this mask, color, uncompress and blend in store in dstUnCompTexture static uint8 tmpColoredBlockDXTC[BLOCK_NUM_PIXEL]; static CRGBA tmpColoredBlockRGBA[BLOCK_NUM_PIXEL]; // start at mixt Block bits desc bitPtr= (uint32*)(&mask.Data[mask.MixtBlockIndex]); srcPtr= &_Texture[0]; dstUnCompPtr= &dstUnCompTexture[0]; n= numBlocks; // run all blocks. while(n>0) { uint nBits= min(n, 32U); getBitPack(bitPtr, bitMask); n-= nBits; bitPtr++; for(;nBits>0;nBits--) { // need to colorize?? if(bitMask&1) { // colorize this block. store 2 colors in tmp colorizeDXTCBlockRGB(srcPtr, tmpColoredBlockDXTC, dHue, dLum, dSat); // copy RGB bits from src to tmp ((uint32*)tmpColoredBlockDXTC)[3]= ((uint32*)srcPtr)[3]; // uncompress the block. uncompressBlockRGB(tmpColoredBlockDXTC, tmpColoredBlockRGBA); // blend tmpColoredBlockRGBA into dstUnCompPtr, according to alphaMixtBlock. for(uint i=0;i<16;i++) { dstUnCompPtr[i].blendFromuiRGBOnly(dstUnCompPtr[i], tmpColoredBlockRGBA[i], *alphaMixtBlock); // next pixel alphaMixtBlock++; } } bitMask>>=1; srcPtr+= BLOCK_DXTC_SIZE; dstUnCompPtr+= BLOCK_NUM_PIXEL; } } } // Since colorizeDXTCBlockRGB() use MMX, must end with emms. #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) if(CSystemInfo::hasMMX()) _asm emms; #endif // **** compress needed blocks n= numBlocks; bitPtr= (uint32*)(&_Texture[_BlockToCompressIndex]); dstUnCompPtr= &dstUnCompTexture[0]; dstPtr= &dstTexture[0]; while(n>0) { uint nBits= min(n, 32U); getBitPack(bitPtr, bitMask); n-= nBits; bitPtr++; for(;nBits>0;nBits--) { // need to compress ?? if(bitMask&1) { // uncompress this block. ignore alpha compressBlockRGB(dstUnCompPtr, dstPtr); } bitMask>>=1; dstUnCompPtr+= BLOCK_NUM_PIXEL; dstPtr+= BLOCK_DXTC_SIZE; } } // **** format bitmap out with dstTexture. out.reset(CBitmap::DXTC5); out.resize(_Width, _Height, CBitmap::DXTC5); // create and fill all the mipMaps uint w= _Width, h=_Height; dstPtr= &dstTexture[0]; for(uint m=0;m<_NumMipMap;m++) { // allocate. out.resizeMipMap(m, w, h); // get the size of this DXTC5 level. uint size= out.getPixels(m).size(); // fill memcpy(&out.getPixels(m)[0], dstPtr, size); // next mipmap dstPtr+= size; w= (w+1)/2; h= (h+1)/2; } // verify all filled nlassert( dstPtr== (&dstTexture[0] + dstTexture.size()) ); // set the correct num of mipmap out.setMipMapCount(_NumMipMap); } // *************************************************************************** void CHLSColorTexture::colorizeDXTCBlockRGB(const uint8 *srcPtr, uint8 *dstPtr, uint8 dHue, uint dLum, uint dSat) { // get modifier. CFastHLSModifier &fastHLS= CFastHLSModifier::getInstance(); // apply the color on the 2 DXTC colors *(uint16*)(dstPtr+8 )= fastHLS.applyHLSMod(*(uint16*)(srcPtr+8 ) , dHue, dLum, dSat); *(uint16*)(dstPtr+10)= fastHLS.applyHLSMod(*(uint16*)(srcPtr+10) , dHue, dLum, dSat); } // *************************************************************************** void CHLSColorTexture::uncompressBlockRGB(const uint8* srcDXTC, CRGBA *dstRGBA) { CRGBA c[4]; uint16 color0; uint16 color1; uint32 bits; color0= *(uint16*)(srcDXTC+8); color1= *(uint16*)(srcDXTC+10); bits= *(uint32*)(srcDXTC+12); c[0].set565(color0); c[1].set565(color1); // ignore color0>color1 for DXT3 and DXT5. c[2].blendFromui(c[0],c[1],85); c[3].blendFromui(c[0],c[1],171); // bits to color (ignore alpha result) for(uint n= 16;n>0;n--) { *dstRGBA= c[bits&3]; bits>>=2; dstRGBA++; } } // *************************************************************************** void CHLSColorTexture::computeMinMax(sint *diffBlock, CVectorInt &v, sint mean[3], sint rgb0[3], sint rgb1[3]) { // compute the min and max distance along the axis v. sint mind= INT_MAX; sint maxd= INT_MIN; sint *srcDiff= diffBlock; // for the 16 pixels for(uint n=16;n>0;n--,srcDiff+=3) { sint R= srcDiff[0]; sint G= srcDiff[1]; sint B= srcDiff[2]; sint d= R*v.x + G*v.y + B*v.z; if(dmaxd) maxd= d; } // avoid overflow. here, Higher possible bit is 16+8+2 (add of 3 values=> *4) == 26 // 26-12= 14. 14+16=30 => ok. mind>>= 12; maxd>>= 12; // compute the 2 colors: rgb0 on the min, and rgb1 on the max rgb0[0]= mean[0]+ (mind*v.x>>20); rgb0[1]= mean[1]+ (mind*v.y>>20); rgb0[2]= mean[2]+ (mind*v.z>>20); rgb1[0]= mean[0]+ (maxd*v.x>>20); rgb1[1]= mean[1]+ (maxd*v.y>>20); rgb1[2]= mean[2]+ (maxd*v.z>>20); // clamp to 0..255 fastClamp8(rgb0[0]); fastClamp8(rgb0[1]); fastClamp8(rgb0[2]); fastClamp8(rgb1[0]); fastClamp8(rgb1[1]); fastClamp8(rgb1[2]); } // *************************************************************************** void CHLSColorTexture::compressBlockRGB(CRGBA *srcRGBA, uint8* dstDXTC) { // skip alpha part. uint8 *dstBlock= dstDXTC+8; // **** compute RGB0 and RGB1. uint i,j,n; // compute the mean color of 16 pixels sint mean[3]; mean[0]= 0; mean[1]= 0; mean[2]= 0; CRGBA *src= srcRGBA; for(n=16;n>0;n--,src++) { mean[0]+= src->R; mean[1]+= src->G; mean[2]+= src->B; // at same time, setup alpha to 0. Important for "compute bits" part (see MMX)!! src->A= 0; } mean[0]>>= 4; mean[1]>>= 4; mean[2]>>= 4; // compute col-mean sint diffBlock[16*3]; src= srcRGBA; sint *srcDiff= diffBlock; for(n=16;n>0;n--,src++,srcDiff+=3) { srcDiff[0]= (sint)src->R - mean[0]; srcDiff[1]= (sint)src->G - mean[1]; srcDiff[2]= (sint)src->B - mean[2]; } // compute the covariant matrix. sint coMat[3][3]; // Apply std RGB factor (0.3, 0.56, 0.14) to choose the best Axis. This give far much best results. sint rgbFact[3]= {77, 143, 36}; for(i=0;i<3;i++) { // OPTIMIZE SINCE SYMETRIX MATRIX for(j=i;j<3;j++) { sint32 factor= 0; // divide / 16 to avoid overflow sint32 uint colFactor= (rgbFact[i]*rgbFact[j]) >> 4; // run all 16 pixels. sint *srcDiff= diffBlock; for(n=16;n>0;n--,srcDiff+=3) { factor+= srcDiff[i] * srcDiff[j] * colFactor; } coMat[i][j]= factor; } } // Fill symetrix matrix coMat[1][0]= coMat[0][1]; coMat[2][0]= coMat[0][2]; coMat[2][1]= coMat[1][2]; // take the bigger vector sint maxSize= 0; uint axis= 0; for(i=0;i<3;i++) { // Use abs since sqr fails because all sint32 range may be used. sint size= abs(coMat[i][0]) + abs(coMat[i][1]) + abs(coMat[i][2]); if(size>maxSize) { maxSize= size; axis= i; } } // normalize this vector CVector v; // remove some rgb factor... v.x= (float)coMat[axis][0]/rgbFact[0]; v.y= (float)coMat[axis][1]/rgbFact[1]; v.z= (float)coMat[axis][2]/rgbFact[2]; v.normalize(); // set a Fixed 16:16. CVectorInt vInt; // don't bother if OptFastFloorBegin() has been called. 16:16 precision is sufficient. vInt.x= OptFastFloor(v.x*65536); vInt.y= OptFastFloor(v.y*65536); vInt.z= OptFastFloor(v.z*65536); // For all pixels, choose the 2 colors along the axis sint rgb0[3]; sint rgb1[3]; computeMinMax(diffBlock, vInt, mean, rgb0, rgb1); // Average to 16 bits. NB: correclty encode 0..255 to 0.31 or 0..63. uint R,G,B; R= ((rgb0[0]*7967+32768)>>16); G= ((rgb0[1]*16191+32768)>>16); B= ((rgb0[2]*7967+32768)>>16); uint16 rgb016= (R<<11) + (G<<5) + (B); R= ((rgb1[0]*7967+32768)>>16); G= ((rgb1[1]*16191+32768)>>16); B= ((rgb1[2]*7967+32768)>>16); uint16 rgb116= (R<<11) + (G<<5) + (B); // copy to block ((uint16*)dstBlock)[0]= rgb016; ((uint16*)dstBlock)[1]= rgb116; // **** compute bits CRGBA c[4]; c[0].set565(rgb016); c[1].set565(rgb116); c[2].blendFromui(c[0],c[1],85); c[3].blendFromui(c[0],c[1],171); // it is important that c[] and src Alpha are set to 0, because of "pmaddwd" use in MMX code... c[0].A= 0; c[1].A= 0; c[2].A= 0; c[3].A= 0; CRGBA *cPtr= c; // result. uint32 bits= 0; #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM) if(CSystemInfo::hasMMX()) { // preapre mmx uint64 blank= 0; __asm { movq mm7, blank } // for 16 pixels src= srcRGBA; for(n=16;n>0;n--,src++) { /* // C Version (+ little asm). uint minDist= 0xFFFFFFFF; uint id= 0; for(i=0;i<4;i++) { // applying factors such *23, *80, *6 gives better results, but slower (in MMX). uint dist= sqr((sint)src->R-(sint)c[i].R); dist+= sqr((sint)src->G-(sint)c[i].G); dist+= sqr((sint)src->B-(sint)c[i].B); if(dist minimum // setup the "smaller" id. here esi= iB, ecx= iA not ebx // ebx= 0 if A0;n--,src++) { // C Version (+ little asm). uint minDist= 0xFFFFFFFF; uint id= 0; for(i=0;i<4;i++) { // applying factors such *23, *80, *6 gives better results, but slower (in MMX). uint dist= sqr((sint)src->R-(sint)c[i].R); dist+= sqr((sint)src->G-(sint)c[i].G); dist+= sqr((sint)src->B-(sint)c[i].B); if(dist1) bits>>=2; } } // copy ((uint32*)dstBlock)[1]= bits; } } // NL3D