AVX Prototype

MStatus DeltaMush::averageSmoothing(const MPointArray & verticesPositions, MPointArray & out_smoothedPositions, unsigned int iterations, double weight) { // TODO : RESOLVE ERROR FOR NON /40 VERTEX COUNT unsigned int vertexCount{ verticesPositions.length() }; out_smoothedPositions.setLength(vertexCount); double* verticesX = new double[vertexCount]; double* verticesY = new double[vertexCount]; double* verticesZ = new double[vertexCount]; decomposePointArray(verticesPositions, verticesX, verticesY, verticesZ, vertexCount); // A copy is necessary to avoid losing the original data trough the computations while working iteratively on the smoothed positions MPointArray verticesPositionsCopy{ verticesPositions }; double* verticesCopyX = new double[vertexCount]; std::copy(verticesX, verticesX + vertexCount, verticesCopyX); double* verticesCopyY = new double[vertexCount]; std::copy(verticesY, verticesY + vertexCount, verticesCopyY); double* verticesCopyZ = new double[vertexCount]; std::copy(verticesZ, verticesZ + vertexCount, verticesCopyZ); //Declaring the data needed by the loop __m256d averageX; __m256d averageY; __m256d averageZ; __m256d weighVector{ _mm256_set1_pd(weight) }; int* neighbourPtr{}; for (unsigned int iterationIndex{ 0 }; iterationIndex < iterations; ++iterationIndex) { neighbourPtr = &neighbours[0]; for (unsigned int vertexIndex{ 0 }; vertexIndex < vertexCount; vertexIndex += 4, neighbourPtr += 12) { averageX = _mm256_setzero_pd(); averageY = _mm256_setzero_pd(); averageZ = _mm256_setzero_pd(); for (unsigned int neighbourIndex{ 0 }; neighbourIndex < MAX_NEIGHBOURS; ++neighbourIndex, ++neighbourPtr) { __m256d neighboursX = _mm256_setr_pd(verticesCopyX[neighbourPtr[0]], verticesCopyX[neighbourPtr[0 + 4]], verticesCopyX[neighbourPtr[0 + 8]], verticesCopyX[neighbourPtr[0 + 12]]); __m256d neighboursY = _mm256_setr_pd(verticesCopyY[neighbourPtr[0]], verticesCopyY[neighbourPtr[0 + 4]], verticesCopyY[neighbourPtr[0 + 8]], verticesCopyY[neighbourPtr[0 + 12]]); __m256d neighboursZ = _mm256_setr_pd(verticesCopyZ[neighbourPtr[0]], verticesCopyZ[neighbourPtr[0 + 4]], verticesCopyZ[neighbourPtr[0 + 8]], verticesCopyZ[neighbourPtr[0 + 12]]); averageX = _mm256_add_pd(averageX, neighboursX); averageY = _mm256_add_pd(averageY, neighboursY); averageZ = _mm256_add_pd(averageZ, neighboursZ); } // Divides the accumulated vector to average it __m256d averageFactorVec = _mm256_set1_pd(AVERAGE_FACTOR); averageX = _mm256_mul_pd(averageX, averageFactorVec); averageY = _mm256_mul_pd(averageY, averageFactorVec); averageZ = _mm256_mul_pd(averageZ, averageFactorVec); __m256d verticesCopyXVector = _mm256_load_pd(verticesCopyX + vertexIndex); __m256d verticesCopyYVector = _mm256_load_pd(verticesCopyY + vertexIndex); __m256d verticesCopyZVector = _mm256_load_pd(verticesCopyZ + vertexIndex); averageX = _mm256_sub_pd(averageX, verticesCopyXVector); averageY = _mm256_sub_pd(averageY, verticesCopyYVector); averageZ = _mm256_sub_pd(averageZ, verticesCopyZVector); averageX = _mm256_mul_pd(averageX, weighVector); averageY = _mm256_mul_pd(averageY, weighVector); averageZ = _mm256_mul_pd(averageZ, weighVector); averageX = _mm256_add_pd(averageX, verticesCopyXVector); averageY = _mm256_add_pd(averageY, verticesCopyYVector); averageZ = _mm256_add_pd(averageZ, verticesCopyZVector); _mm256_store_pd(verticesX + vertexIndex, averageX); _mm256_store_pd(verticesY + vertexIndex, averageY); _mm256_store_pd(verticesZ + vertexIndex, averageZ); } std::swap(verticesX, verticesCopyX); std::swap(verticesY, verticesCopyY); std::swap(verticesZ, verticesCopyZ); } composePointArray(verticesCopyX, verticesCopyY, verticesCopyZ, out_smoothedPositions, vertexCount); delete[] verticesX; delete[] verticesY; delete[] verticesZ; delete[] verticesCopyX; delete[] verticesCopyY; delete[] verticesCopyZ; return MStatus::kSuccess; }

Be the first to comment

You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.