AVX v3

__m256d deltaWeightVector = _mm256_set1_pd(deltaWeight); // The pointers are updated to the next stored value by jumping X doubles ahead depending on the container they are pointing to. // 4 doubles are for MPointArrays and 3 doubles are for MVectorArrays for (unsigned int vertexIndex{ start }; vertexIndex < end; ++vertexIndex, currentVertexPosition += 4, currentDeltaVector += 3 ) { __m256d vertexPosition = _mm256_load_pd(currentVertexPosition); // Deltas contains 3 relevant values. 4 doubles are loaded for AVX vectors so the last value must be masked. // Positive number ( whose highest bit is one ) mask that data to zero. __m256i deltaMask = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, 1, 1); __m256d deltaVector = _mm256_maskload_pd(currentDeltaVector, deltaMask); __m256d partialResult = _mm256_mul_pd(deltaVector, deltaWeightVector); __m256d resultPosition = _mm256_add_pd(partialResult, vertexPosition); _mm256_store_pd(currentVertexPosition, resultPosition); }

Be the first to comment

You can use [html][/html], [css][/css], [php][/php] and more to embed the code. Urls are automatically hyperlinked. Line breaks and paragraphs are automatically generated.