/*************************************** Auteur : Pierre Aubert Mail : aubertp7@gmail.com Licence : CeCILL-C ****************************************/ #include "phoenix_intrinsics.h" #include #include "intrinsics_propagation.h" ///Propagate the U and V species in the matVecVecU and matVecV /** @param[out] outMatVecU : updated matrix U version (with vectorial neighbours) * @param[out] outMatVecV : updated matrix V version (with vectorial neighbours) * @param matVecVecU : input of matrix U (with vectorial neighbours) * @param matVecV : input of matrix V (with vectorial neighbours) * @param nbRow : number of rows of the matrices * @param nbCol : number of columns of the matrices * @param matBroadcastDeltaSquare : matrix of the delta square values (with broadcast neighbours) * @param nbStencilRow : number of rows of the matrix matBroadcastDeltaSquare * @param nbStencilCol : number of columns of the matrix matBroadcastDeltaSquare * @param diffusionRateU : diffusion rate of the U specie * @param diffudionRateV : diffusion rate of the V specie * @param feedRate : rate of the process which feeds U and drains U, V and P * @param killRate : rate of the process which converts V into P * @param dt : time interval between two steps */ void grayscott_propagation(float * outMatVecU, float * outMatVecV, const float * matVecVecU, const float * matVecVecV, long nbRow, long nbCol, const float * matBroadcastDeltaSquare, long nbStencilRow, long nbStencilCol, float diffusionRateU, float diffusionRateV, float feedRate, float killRate, float dt) { long offsetStencilRow((nbStencilRow - 1l)/2l); long offsetStencilCol((nbStencilCol - 1l)/2l); long nbVecCol(nbCol/PLIB_VECTOR_SIZE_FLOAT); PRegVecf vecOne(plib_broadcast_ss(1.0f)); PRegVecf vecFeedRate(plib_broadcast_ss(feedRate)); PRegVecf vecKillRate(plib_broadcast_ss(killRate)); PRegVecf vecDiffudionRateU(plib_broadcast_ss(diffusionRateU)); PRegVecf vecDiffudionRateV(plib_broadcast_ss(diffusionRateV)); PRegVecf vecDt(plib_broadcast_ss(dt)); for(long i(1l); i < nbRow - 1l; ++i){ long firstRowStencil(std::max(i - offsetStencilRow, 0l)); long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow)); for(long j(0l); j < nbVecCol; ++j){ long firstColStencil(std::max(j - offsetStencilCol, 0l)); long lastColStencil(std::min(j + offsetStencilCol + 1l, nbVecCol)); long stencilIndexRow(0l); // float u(matU[i*nbCol + j]), v(matV[i*nbCol + j]); // float fullU(0.0f), fullV(0.0f); PRegVecf vecU(plib_load_ps(matVecVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecV(plib_load_ps(matVecVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecFullU(plib_broadcast_ss(0.0f)), vecFullV(plib_broadcast_ss(0.0f)); for(long k(firstRowStencil); k < lastRowStencil; ++k){ long stencilIndexCol(0l); for(long l(firstColStencil); l < lastColStencil; ++l){ // float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]); PRegVecf vecDeltaSquare(plib_load_ps(matBroadcastDeltaSquare + (stencilIndexRow*nbStencilCol + stencilIndexCol)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecKLU(plib_load_ps(matVecVecU + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecKLV(plib_load_ps(matVecVecV + (k*nbVecCol + l)*PLIB_VECTOR_SIZE_FLOAT)); PRegVecf vecKLUminU(plib_sub_ps(vecKLU, vecU)); PRegVecf vecKLVminV(plib_sub_ps(vecKLV, vecV)); PRegVecf vecKLUminUdMultDeltaSquare(plib_mul_ps(vecKLUminU, vecDeltaSquare)); PRegVecf vecKLVminVdMultDeltaSquare(plib_mul_ps(vecKLVminV, vecDeltaSquare)); vecFullU = plib_add_ps(vecFullU, vecKLUminUdMultDeltaSquare); vecFullV = plib_add_ps(vecFullV, vecKLVminVdMultDeltaSquare); // fullU += (matU[k*nbCol + l] - u)*deltaSquare; // fullV += (matV[k*nbCol + l] - v)*deltaSquare; ++stencilIndexCol; } ++stencilIndexRow; } // float uvSquare(u*v*v); PRegVecf vecUVSquare(plib_mul_ps(vecU, plib_mul_ps(vecV, vecV))); PRegVecf vecOneMinusU(plib_sub_ps(vecOne, vecU)); PRegVecf vecFeedPlusKill(plib_add_ps(vecFeedRate, vecKillRate)); PRegVecf vecDiffFullU(plib_mul_ps(vecDiffudionRateU, vecFullU)); PRegVecf vecDiffFullV(plib_mul_ps(vecDiffudionRateV, vecFullV)); PRegVecf vecFeedRateMultOneMinusU(plib_mul_ps(vecFeedRate, vecOneMinusU)); PRegVecf vecFeedPlusKillMultV(plib_mul_ps(vecFeedPlusKill, vecV)); PRegVecf vecDiffFullUMinusUVSquare(plib_sub_ps(vecDiffFullU, vecUVSquare)); PRegVecf vecDiffFullVPlusUVSquare(plib_add_ps(vecDiffFullV, vecUVSquare)); PRegVecf vecDu(plib_add_ps(vecDiffFullUMinusUVSquare, vecFeedRateMultOneMinusU)); PRegVecf vecDv(plib_sub_ps(vecDiffFullVPlusUVSquare, vecFeedPlusKillMultV)); // float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u)); // float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v); PRegVecf vecDuDt(plib_mul_ps(vecDu, vecDt)); PRegVecf vecDvDt(plib_mul_ps(vecDv, vecDt)); PRegVecf vecUPlusDuDt(plib_add_ps(vecU, vecDuDt)); PRegVecf vecVPlusDvDt(plib_add_ps(vecV, vecDvDt)); plib_store_ps(outMatVecU + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecUPlusDuDt); plib_store_ps(outMatVecV + (i*nbVecCol + j)*PLIB_VECTOR_SIZE_FLOAT, vecVPlusDvDt); // outMatVecU[i*nbCol + j] = u + du*dt; // outMatVecV[i*nbCol + j] = v + dv*dt; } } }