__m256 deltaWeightVector = _mm256_set1_ps(deltaWeight);
// The pointers are updated to the next stored value by jumping X doubles ahead depending on the container they are pointing to.
// 4 doubles are for MPointArrays and 3 doubles are for MVectorArrays
for (unsigned int vertexIndex{ start }; vertexIndex < end; vertexIndex+=2, currentVertexPosition += 8, currentDeltaVector += 6 ) {
__m256d vertexPosition = _mm256_load_pd(currentVertexPosition);
__m256d vertexPosition2 = _mm256_load_pd(currentVertexPosition + 4);
// Deltas contains 3 relevant values. 4 doubles are loaded for AVX vectors so the last value must be masked.
// Positive number ( whose highest bit is one ) mask that data to zero.
__m256i deltaMask = _mm256_setr_epi32(-1, -1, -1, 1, -1, -1, -1, 1);
__m256 deltaVector = _mm256_maskload_ps(currentDeltaVector, deltaMask);
__m256d partialResult = _mm256_castps_pd(_mm256_mul_ps(deltaVector, deltaWeightVector));
__m256d resultPosition = _mm256_add_pd(partialResult, vertexPosition);
__m256d resultPosition2 = _mm256_add_pd(partialResult, vertexPosition2);
_mm256_store_pd(currentVertexPosition, resultPosition);
_mm256_store_pd(currentVertexPosition+4, resultPosition2);
}
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.