Lecture 3: AVX SOA normalization

Scaffold Head: #include <random> // prng support #include <cstdint> // uint32_t #include <iostream> // std::cout #include <immintrin.h> // AVX intrinsics // timers distributed with this book #include "include/hpc_helpers.hpp" // if we do not have fmad (AVX2), we pretend that we have it #ifndef __AVX2__ #define _mm256_fmadd_ps legacy_fmad __m256 legacy_fmad (__m256 x, __m256 y, __m256 z) { return _mm256_add_ps(_mm256_mul_ps(x, y), z); } #endif // memory aligned arrays can be used as usual void init(float * x, float * y, float * z, uint64_t length) { std::mt19937 engine(42); std::uniform_real_distribution<float> dist(-1.0, +1.0); for (uint64_t i = 0; i < length; i++) { x[i] = dist(engine); y[i] = dist(engine); z[i] = dist(engine); } } void check(float * x, float * y, float * z, uint64_t length) { for (uint64_t i = 0; i < length; i++) { const float rho = x[i]*x[i]+y[i]*y[i]+z[i]*z[i]; if ((rho-1)*(rho-1) > 1E-6) { std::cout << "ERROR: at position " << i << " with norm " << std::sqrt(rho) << std::endl; break; } } } void plain_soa_norm(float * x, float * y, float * z, uint64_t length) { for (uint64_t i = 0; i < length; i++) { const float rho = x[i]*x[i]+y[i]*y[i]+z[i]*z[i]; const float irho = float(1)/std::sqrt(rho); x[i] *= irho; y[i] *= irho; z[i] *= irho; } }
Scaffold Foot: int main () { const uint64_t num_entries = 1UL << 26; const uint64_t num_bytes = num_entries*sizeof(float); TIMERSTART(alloc_memory) auto x = static_cast<float*>(_mm_malloc(num_bytes , 32)); auto y = static_cast<float*>(_mm_malloc(num_bytes , 32)); auto z = static_cast<float*>(_mm_malloc(num_bytes , 32)); TIMERSTOP(alloc_memory) TIMERSTART(plain_init) init(x, y, z, num_entries); TIMERSTOP(plain_init) TIMERSTART(plain_soa_norm) plain_soa_norm(x, y, z, num_entries); TIMERSTOP(plain_soa_norm) TIMERSTART(plain_check) check(x, y, z, num_entries); TIMERSTOP(plain_check) TIMERSTART(avx_init) init(x, y, z, num_entries); TIMERSTOP(avx_init) TIMERSTART(avx_soa_norm) avx_soa_norm(x, y, z, num_entries); TIMERSTOP(avx_soa_norm) TIMERSTART(avx_check) check(x, y, z, num_entries); TIMERSTOP(avx_check) TIMERSTART(free_memory) _mm_free(x); _mm_free(y); _mm_free(z); TIMERSTOP(free_memory) std::cout << "Parallel programming is fun!" << std::endl; }

Start time:: Mo 22 Okt 2018 10:51:00
End time:: Mo 01 Apr 2019 12:00:00

General test timeout:: 10.0 seconds

Tests

Comment prefix	`#`
Given input
Expected output	Parallel programming is fun!

Lecture 3: AVX SOA normalization Assignment

Tests