__m256d deltaWeightVector = _mm256_set1_pd(deltaWeight);
// The pointers are updated to the next stored value by jumping X doubles ahead depending on the container they are pointing to.
// 4 doubles are for MPointArrays and 3 doubles are for MVectorArrays
for (unsigned int vertexIndex{ start }; vertexIndex < end; ++vertexIndex, currentVertexPosition += 4, currentDeltaVector += 3 ) {
__m256d vertexPosition = _mm256_load_pd(currentVertexPosition);
// Deltas contains 3 relevant values. 4 doubles are loaded for AVX vectors so the last value must be masked.
// Positive number ( whose highest bit is one ) mask that data to zero.
__m256i deltaMask = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, 1, 1);
__m256d deltaVector = _mm256_maskload_pd(currentDeltaVector, deltaMask);
__m256d partialResult = _mm256_mul_pd(deltaVector, deltaWeightVector);
__m256d resultPosition = _mm256_add_pd(partialResult, vertexPosition);
_mm256_store_pd(currentVertexPosition, resultPosition);
}
Be the first to comment
You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.