It has float4, double2 and double4 structs, constructors of different kinds, operator overloading and methods for horizontal add, dot product and swizzling.

Code examples:

4x4 matrix multiply with floats and doubles:

// dst = a x b, ~44 cycles with -O3 -funroll-loops

void mmul4x4 (const float *a, const float *b, float *dst)

{

for (int i=0; i<16; i+=4) {

float4 row = float4(a) * float4(b[i]); // float4(a) is {a[0], a[1], a[2], a[3]}

// float4(b[i]) is {b[i], b[i], b[i], b[i]}

for (int j=1; j<4; j++)

row += float4(a+j*4) * float4(b[i+j]);

*(float4*)(&dst[i]) = row;

}

}

void mmul4x4d (const double *a, const double *b, double *dst)

{

for (int i=0; i<16; i+=4) {

double4 row = double4(a) * double4(b[i]);

for (int j=1; j<4; j++)

row += double4(a+j*4) * double4(b[i+j]);

*(double4*)(&dst[i]) = row;

}

}

Dot product of two arrays of vectors:

double dotArrays (const double2 *a, const double2 *b, int len)

{

double2 sum;

for (int i=0; i<len; i++) {

sum += a[i] * b[i];

}

return sum.sum();

}