# Part 2 : AVX Intrinsics - Example answer

``````#include "workshop.h"

#ifdef __AVX__
#include <immintrin.h>
#else
#warning AVX not supported. Code will not compile.
#endif

int main(int argc, char **argv)
{
const int size = 512;

auto a = workshop::Array<float>(size);
auto b = workshop::Array<float>(size);
auto c = workshop::Array<float>(size);

auto avx_a = workshop::AlignedArray<__m256>(size/8);
auto avx_b = workshop::AlignedArray<__m256>(size/8);
auto avx_c = workshop::AlignedArray<__m256>(size/8);

for (int i=0; i<size; ++i)
{
a[i] = 1.0*(i+1);
b[i] = 2.5*(i+1);
c[i] = 0.0;
}

for (int i=0; i<size; i+=8)
{
avx_a[i/8] = _mm256_set_ps(1.0*(i+7+1),
1.0*(i+6+1),
1.0*(i+5+1),
1.0*(i+4+1),
1.0*(i+3+1),
1.0*(i+2+1),
1.0*(i+1+1),
1.0*(i+0+1));

avx_b[i/8] = _mm256_set_ps(2.5*(i+7+1),
2.5*(i+6+1),
2.5*(i+5+1),
2.5*(i+4+1),
2.5*(i+3+1),
2.5*(i+2+1),
2.5*(i+1+1),
2.5*(i+0+1));

avx_c[i/8] = _mm256_set1_ps(0.0);
}

auto timer = workshop::start_timer();

for (int j=0; j<100000; ++j)
{
for (int i=0; i<size; ++i)
{
c[i] = std::sqrt( a[i] + b[i] );
}
}

auto duration = workshop::get_duration(timer);

timer = workshop::start_timer();

for (int j=0; j<100000; ++j)
{
for (int i=0; i<size/8; ++i)
{
avx_c[i] = _mm256_sqrt_ps( _mm256_add_ps(avx_a[i], avx_b[i]) );
}
}

auto vector_duration = workshop::get_duration(timer);

std::cout << "The standard loop took " << duration
<< " microseconds to complete." << std::endl;

std::cout << "The vectorised loop took " << vector_duration
<< " microseconds to complete." << std::endl;

return 0;
}``````

# PreviousUpNext

Quick Links | Home | Courses | Software | Contact