Update GLM to latest version (0.9.9.3). This includes GLM's change of matrices no longer default initializing to the identity matrix. This commit thus also includes the update of all of LearnOpenGL's code to reflect this: all matrices are now constructor-initialized to the identity matrix where relevant.

This commit is contained in:
Joey de Vries
2018-12-30 14:27:14 +01:00
parent 239c456ae9
commit f4b6763356
474 changed files with 38219 additions and 38025 deletions

240
includes/glm/simd/common.h Normal file
View File

@@ -0,0 +1,240 @@
/// @ref simd
/// @file glm/simd/common.h
#pragma once
#include "platform.h"
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_add(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_add_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_add(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_add_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_sub(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_sub_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_sub(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_sub_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_mul(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_mul_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_mul(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_mul_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_div(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_div_ps(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_div(glm_f32vec4 a, glm_f32vec4 b)
{
return _mm_div_ss(a, b);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_div_lowp(glm_f32vec4 a, glm_f32vec4 b)
{
return glm_vec4_mul(a, _mm_rcp_ps(b));
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_swizzle_xyzw(glm_f32vec4 a)
{
# if GLM_ARCH & GLM_ARCH_AVX2_BIT
return _mm_permute_ps(a, _MM_SHUFFLE(3, 2, 1, 0));
# else
return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 1, 0));
# endif
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_fma(glm_f32vec4 a, glm_f32vec4 b, glm_f32vec4 c)
{
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
return _mm_fmadd_ss(a, b, c);
# else
return _mm_add_ss(_mm_mul_ss(a, b), c);
# endif
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_fma(glm_f32vec4 a, glm_f32vec4 b, glm_f32vec4 c)
{
# if (GLM_ARCH & GLM_ARCH_AVX2_BIT) && !(GLM_COMPILER & GLM_COMPILER_CLANG)
return _mm_fmadd_ps(a, b, c);
# else
return glm_vec4_add(glm_vec4_mul(a, b), c);
# endif
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_abs(glm_f32vec4 x)
{
return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)));
}
GLM_FUNC_QUALIFIER glm_ivec4 glm_ivec4_abs(glm_ivec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSSE3_BIT
return _mm_sign_epi32(x, x);
# else
glm_ivec4 const sgn0 = _mm_srai_epi32(x, 31);
glm_ivec4 const inv0 = _mm_xor_si128(x, sgn0);
glm_ivec4 const sub0 = _mm_sub_epi32(inv0, sgn0);
return sub0;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_sign(glm_vec4 x)
{
glm_vec4 const zro0 = _mm_setzero_ps();
glm_vec4 const cmp0 = _mm_cmplt_ps(x, zro0);
glm_vec4 const cmp1 = _mm_cmpgt_ps(x, zro0);
glm_vec4 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(-1.0f));
glm_vec4 const and1 = _mm_and_ps(cmp1, _mm_set1_ps(1.0f));
glm_vec4 const or0 = _mm_or_ps(and0, and1);;
return or0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_round(glm_vec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
return _mm_round_ps(x, _MM_FROUND_TO_NEAREST_INT);
# else
glm_vec4 const sgn0 = _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)));
glm_vec4 const and0 = _mm_and_ps(sgn0, x);
glm_vec4 const or0 = _mm_or_ps(and0, _mm_set_ps1(8388608.0f));
glm_vec4 const add0 = glm_vec4_add(x, or0);
glm_vec4 const sub0 = glm_vec4_sub(add0, or0);
return sub0;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_floor(glm_vec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
return _mm_floor_ps(x);
# else
glm_vec4 const rnd0 = glm_vec4_round(x);
glm_vec4 const cmp0 = _mm_cmplt_ps(x, rnd0);
glm_vec4 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(1.0f));
glm_vec4 const sub0 = glm_vec4_sub(rnd0, and0);
return sub0;
# endif
}
/* trunc TODO
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_trunc(glm_vec4 x)
{
return glm_vec4();
}
*/
//roundEven
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_roundEven(glm_vec4 x)
{
glm_vec4 const sgn0 = _mm_castsi128_ps(_mm_set1_epi32(int(0x80000000)));
glm_vec4 const and0 = _mm_and_ps(sgn0, x);
glm_vec4 const or0 = _mm_or_ps(and0, _mm_set_ps1(8388608.0f));
glm_vec4 const add0 = glm_vec4_add(x, or0);
glm_vec4 const sub0 = glm_vec4_sub(add0, or0);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_ceil(glm_vec4 x)
{
# if GLM_ARCH & GLM_ARCH_SSE41_BIT
return _mm_ceil_ps(x);
# else
glm_vec4 const rnd0 = glm_vec4_round(x);
glm_vec4 const cmp0 = _mm_cmpgt_ps(x, rnd0);
glm_vec4 const and0 = _mm_and_ps(cmp0, _mm_set1_ps(1.0f));
glm_vec4 const add0 = glm_vec4_add(rnd0, and0);
return add0;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_fract(glm_vec4 x)
{
glm_vec4 const flr0 = glm_vec4_floor(x);
glm_vec4 const sub0 = glm_vec4_sub(x, flr0);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_mod(glm_vec4 x, glm_vec4 y)
{
glm_vec4 const div0 = glm_vec4_div(x, y);
glm_vec4 const flr0 = glm_vec4_floor(div0);
glm_vec4 const mul0 = glm_vec4_mul(y, flr0);
glm_vec4 const sub0 = glm_vec4_sub(x, mul0);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_clamp(glm_vec4 v, glm_vec4 minVal, glm_vec4 maxVal)
{
glm_vec4 const min0 = _mm_min_ps(v, maxVal);
glm_vec4 const max0 = _mm_max_ps(min0, minVal);
return max0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_mix(glm_vec4 v1, glm_vec4 v2, glm_vec4 a)
{
glm_vec4 const sub0 = glm_vec4_sub(_mm_set1_ps(1.0f), a);
glm_vec4 const mul0 = glm_vec4_mul(v1, sub0);
glm_vec4 const mad0 = glm_vec4_fma(v2, a, mul0);
return mad0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_step(glm_vec4 edge, glm_vec4 x)
{
glm_vec4 const cmp = _mm_cmple_ps(x, edge);
return _mm_movemask_ps(cmp) == 0 ? _mm_set1_ps(1.0f) : _mm_setzero_ps();
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_smoothstep(glm_vec4 edge0, glm_vec4 edge1, glm_vec4 x)
{
glm_vec4 const sub0 = glm_vec4_sub(x, edge0);
glm_vec4 const sub1 = glm_vec4_sub(edge1, edge0);
glm_vec4 const div0 = glm_vec4_sub(sub0, sub1);
glm_vec4 const clp0 = glm_vec4_clamp(div0, _mm_setzero_ps(), _mm_set1_ps(1.0f));
glm_vec4 const mul0 = glm_vec4_mul(_mm_set1_ps(2.0f), clp0);
glm_vec4 const sub2 = glm_vec4_sub(_mm_set1_ps(3.0f), mul0);
glm_vec4 const mul1 = glm_vec4_mul(clp0, clp0);
glm_vec4 const mul2 = glm_vec4_mul(mul1, sub2);
return mul2;
}
// Agner Fog method
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_nan(glm_vec4 x)
{
glm_ivec4 const t1 = _mm_castps_si128(x); // reinterpret as 32-bit integer
glm_ivec4 const t2 = _mm_sll_epi32(t1, _mm_cvtsi32_si128(1)); // shift out sign bit
glm_ivec4 const t3 = _mm_set1_epi32(int(0xFF000000)); // exponent mask
glm_ivec4 const t4 = _mm_and_si128(t2, t3); // exponent
glm_ivec4 const t5 = _mm_andnot_si128(t3, t2); // fraction
glm_ivec4 const Equal = _mm_cmpeq_epi32(t3, t4);
glm_ivec4 const Nequal = _mm_cmpeq_epi32(t5, _mm_setzero_si128());
glm_ivec4 const And = _mm_and_si128(Equal, Nequal);
return _mm_castsi128_ps(And); // exponent = all 1s and fraction != 0
}
// Agner Fog method
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_inf(glm_vec4 x)
{
glm_ivec4 const t1 = _mm_castps_si128(x); // reinterpret as 32-bit integer
glm_ivec4 const t2 = _mm_sll_epi32(t1, _mm_cvtsi32_si128(1)); // shift out sign bit
return _mm_castsi128_ps(_mm_cmpeq_epi32(t2, _mm_set1_epi32(int(0xFF000000)))); // exponent is all 1s, fraction is 0
}
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

View File

@@ -0,0 +1,20 @@
/// @ref simd
/// @file glm/simd/experimental.h
#pragma once
#include "platform.h"
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec1_sqrt_lowp(glm_f32vec4 x)
{
return _mm_mul_ss(_mm_rsqrt_ss(x), x);
}
GLM_FUNC_QUALIFIER glm_f32vec4 glm_vec4_sqrt_lowp(glm_f32vec4 x)
{
return _mm_mul_ps(_mm_rsqrt_ps(x), x);
}
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

View File

@@ -0,0 +1,124 @@
/// @ref simd
/// @file glm/simd/geometric.h
#pragma once
#include "common.h"
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
GLM_FUNC_DECL glm_vec4 glm_vec4_dot(glm_vec4 v1, glm_vec4 v2);
GLM_FUNC_DECL glm_vec4 glm_vec1_dot(glm_vec4 v1, glm_vec4 v2);
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_length(glm_vec4 x)
{
glm_vec4 const dot0 = glm_vec4_dot(x, x);
glm_vec4 const sqt0 = _mm_sqrt_ps(dot0);
return sqt0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_distance(glm_vec4 p0, glm_vec4 p1)
{
glm_vec4 const sub0 = _mm_sub_ps(p0, p1);
glm_vec4 const len0 = glm_vec4_length(sub0);
return len0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_dot(glm_vec4 v1, glm_vec4 v2)
{
# if GLM_ARCH & GLM_ARCH_AVX_BIT
return _mm_dp_ps(v1, v2, 0xff);
# elif GLM_ARCH & GLM_ARCH_SSE3_BIT
glm_vec4 const mul0 = _mm_mul_ps(v1, v2);
glm_vec4 const hadd0 = _mm_hadd_ps(mul0, mul0);
glm_vec4 const hadd1 = _mm_hadd_ps(hadd0, hadd0);
return hadd1;
# else
glm_vec4 const mul0 = _mm_mul_ps(v1, v2);
glm_vec4 const swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
glm_vec4 const add0 = _mm_add_ps(mul0, swp0);
glm_vec4 const swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
glm_vec4 const add1 = _mm_add_ps(add0, swp1);
return add1;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec1_dot(glm_vec4 v1, glm_vec4 v2)
{
# if GLM_ARCH & GLM_ARCH_AVX_BIT
return _mm_dp_ps(v1, v2, 0xff);
# elif GLM_ARCH & GLM_ARCH_SSE3_BIT
glm_vec4 const mul0 = _mm_mul_ps(v1, v2);
glm_vec4 const had0 = _mm_hadd_ps(mul0, mul0);
glm_vec4 const had1 = _mm_hadd_ps(had0, had0);
return had1;
# else
glm_vec4 const mul0 = _mm_mul_ps(v1, v2);
glm_vec4 const mov0 = _mm_movehl_ps(mul0, mul0);
glm_vec4 const add0 = _mm_add_ps(mov0, mul0);
glm_vec4 const swp1 = _mm_shuffle_ps(add0, add0, 1);
glm_vec4 const add1 = _mm_add_ss(add0, swp1);
return add1;
# endif
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_cross(glm_vec4 v1, glm_vec4 v2)
{
glm_vec4 const swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
glm_vec4 const swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
glm_vec4 const swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
glm_vec4 const swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
glm_vec4 const mul0 = _mm_mul_ps(swp0, swp3);
glm_vec4 const mul1 = _mm_mul_ps(swp1, swp2);
glm_vec4 const sub0 = _mm_sub_ps(mul0, mul1);
return sub0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_normalize(glm_vec4 v)
{
glm_vec4 const dot0 = glm_vec4_dot(v, v);
glm_vec4 const isr0 = _mm_rsqrt_ps(dot0);
glm_vec4 const mul0 = _mm_mul_ps(v, isr0);
return mul0;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_faceforward(glm_vec4 N, glm_vec4 I, glm_vec4 Nref)
{
glm_vec4 const dot0 = glm_vec4_dot(Nref, I);
glm_vec4 const sgn0 = glm_vec4_sign(dot0);
glm_vec4 const mul0 = _mm_mul_ps(sgn0, _mm_set1_ps(-1.0f));
glm_vec4 const mul1 = _mm_mul_ps(N, mul0);
return mul1;
}
GLM_FUNC_QUALIFIER glm_vec4 glm_vec4_reflect(glm_vec4 I, glm_vec4 N)
{
glm_vec4 const dot0 = glm_vec4_dot(N, I);
glm_vec4 const mul0 = _mm_mul_ps(N, dot0);
glm_vec4 const mul1 = _mm_mul_ps(mul0, _mm_set1_ps(2.0f));
glm_vec4 const sub0 = _mm_sub_ps(I, mul1);
return sub0;
}
GLM_FUNC_QUALIFIER __m128 glm_vec4_refract(glm_vec4 I, glm_vec4 N, glm_vec4 eta)
{
glm_vec4 const dot0 = glm_vec4_dot(N, I);
glm_vec4 const mul0 = _mm_mul_ps(eta, eta);
glm_vec4 const mul1 = _mm_mul_ps(dot0, dot0);
glm_vec4 const sub0 = _mm_sub_ps(_mm_set1_ps(1.0f), mul0);
glm_vec4 const sub1 = _mm_sub_ps(_mm_set1_ps(1.0f), mul1);
glm_vec4 const mul2 = _mm_mul_ps(sub0, sub1);
if(_mm_movemask_ps(_mm_cmplt_ss(mul2, _mm_set1_ps(0.0f))) == 0)
return _mm_set1_ps(0.0f);
glm_vec4 const sqt0 = _mm_sqrt_ps(mul2);
glm_vec4 const mad0 = glm_vec4_fma(eta, dot0, sqt0);
glm_vec4 const mul4 = _mm_mul_ps(mad0, N);
glm_vec4 const mul5 = _mm_mul_ps(eta, I);
glm_vec4 const sub2 = _mm_sub_ps(mul5, mul4);
return sub2;
}
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

115
includes/glm/simd/integer.h Normal file
View File

@@ -0,0 +1,115 @@
/// @ref simd
/// @file glm/simd/integer.h
#pragma once
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
GLM_FUNC_QUALIFIER glm_uvec4 glm_i128_interleave(glm_uvec4 x)
{
glm_uvec4 const Mask4 = _mm_set1_epi32(0x0000FFFF);
glm_uvec4 const Mask3 = _mm_set1_epi32(0x00FF00FF);
glm_uvec4 const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
glm_uvec4 const Mask1 = _mm_set1_epi32(0x33333333);
glm_uvec4 const Mask0 = _mm_set1_epi32(0x55555555);
glm_uvec4 Reg1;
glm_uvec4 Reg2;
// REG1 = x;
// REG2 = y;
//Reg1 = _mm_unpacklo_epi64(x, y);
Reg1 = x;
//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
Reg2 = _mm_slli_si128(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask4);
//REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
//REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
Reg2 = _mm_slli_si128(Reg1, 1);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask3);
//REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
//REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
Reg2 = _mm_slli_epi32(Reg1, 4);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask2);
//REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
//REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
Reg2 = _mm_slli_epi32(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask1);
//REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
//REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
Reg2 = _mm_slli_epi32(Reg1, 1);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask0);
//return REG1 | (REG2 << 1);
Reg2 = _mm_slli_epi32(Reg1, 1);
Reg2 = _mm_srli_si128(Reg2, 8);
Reg1 = _mm_or_si128(Reg1, Reg2);
return Reg1;
}
GLM_FUNC_QUALIFIER glm_uvec4 glm_i128_interleave2(glm_uvec4 x, glm_uvec4 y)
{
glm_uvec4 const Mask4 = _mm_set1_epi32(0x0000FFFF);
glm_uvec4 const Mask3 = _mm_set1_epi32(0x00FF00FF);
glm_uvec4 const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
glm_uvec4 const Mask1 = _mm_set1_epi32(0x33333333);
glm_uvec4 const Mask0 = _mm_set1_epi32(0x55555555);
glm_uvec4 Reg1;
glm_uvec4 Reg2;
// REG1 = x;
// REG2 = y;
Reg1 = _mm_unpacklo_epi64(x, y);
//REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
//REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
Reg2 = _mm_slli_si128(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask4);
//REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
//REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
Reg2 = _mm_slli_si128(Reg1, 1);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask3);
//REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
//REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
Reg2 = _mm_slli_epi32(Reg1, 4);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask2);
//REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
//REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
Reg2 = _mm_slli_epi32(Reg1, 2);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask1);
//REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
//REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
Reg2 = _mm_slli_epi32(Reg1, 1);
Reg1 = _mm_or_si128(Reg2, Reg1);
Reg1 = _mm_and_si128(Reg1, Mask0);
//return REG1 | (REG2 << 1);
Reg2 = _mm_slli_epi32(Reg1, 1);
Reg2 = _mm_srli_si128(Reg2, 8);
Reg1 = _mm_or_si128(Reg1, Reg2);
return Reg1;
}
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

1028
includes/glm/simd/matrix.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,8 @@
/// @ref simd
/// @file glm/simd/packing.h
#pragma once
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

View File

@@ -0,0 +1,357 @@
#pragma once
///////////////////////////////////////////////////////////////////////////////////
// Platform
#define GLM_PLATFORM_UNKNOWN 0x00000000
#define GLM_PLATFORM_WINDOWS 0x00010000
#define GLM_PLATFORM_LINUX 0x00020000
#define GLM_PLATFORM_APPLE 0x00040000
//#define GLM_PLATFORM_IOS 0x00080000
#define GLM_PLATFORM_ANDROID 0x00100000
#define GLM_PLATFORM_CHROME_NACL 0x00200000
#define GLM_PLATFORM_UNIX 0x00400000
#define GLM_PLATFORM_QNXNTO 0x00800000
#define GLM_PLATFORM_WINCE 0x01000000
#define GLM_PLATFORM_CYGWIN 0x02000000
#ifdef GLM_FORCE_PLATFORM_UNKNOWN
# define GLM_PLATFORM GLM_PLATFORM_UNKNOWN
#elif defined(__CYGWIN__)
# define GLM_PLATFORM GLM_PLATFORM_CYGWIN
#elif defined(__QNXNTO__)
# define GLM_PLATFORM GLM_PLATFORM_QNXNTO
#elif defined(__APPLE__)
# define GLM_PLATFORM GLM_PLATFORM_APPLE
#elif defined(WINCE)
# define GLM_PLATFORM GLM_PLATFORM_WINCE
#elif defined(_WIN32)
# define GLM_PLATFORM GLM_PLATFORM_WINDOWS
#elif defined(__native_client__)
# define GLM_PLATFORM GLM_PLATFORM_CHROME_NACL
#elif defined(__ANDROID__)
# define GLM_PLATFORM GLM_PLATFORM_ANDROID
#elif defined(__linux)
# define GLM_PLATFORM GLM_PLATFORM_LINUX
#elif defined(__unix)
# define GLM_PLATFORM GLM_PLATFORM_UNIX
#else
# define GLM_PLATFORM GLM_PLATFORM_UNKNOWN
#endif//
///////////////////////////////////////////////////////////////////////////////////
// Compiler
#define GLM_COMPILER_UNKNOWN 0x00000000
// Intel
#define GLM_COMPILER_INTEL 0x00100000
#define GLM_COMPILER_INTEL14 0x00100040
#define GLM_COMPILER_INTEL15 0x00100050
#define GLM_COMPILER_INTEL16 0x00100060
#define GLM_COMPILER_INTEL17 0x00100070
// Visual C++ defines
#define GLM_COMPILER_VC 0x01000000
#define GLM_COMPILER_VC12 0x01000001
#define GLM_COMPILER_VC14 0x01000002
#define GLM_COMPILER_VC15 0x01000003
#define GLM_COMPILER_VC15_3 0x01000004
#define GLM_COMPILER_VC15_5 0x01000005
#define GLM_COMPILER_VC15_6 0x01000006
#define GLM_COMPILER_VC15_7 0x01000007
// GCC defines
#define GLM_COMPILER_GCC 0x02000000
#define GLM_COMPILER_GCC46 0x020000D0
#define GLM_COMPILER_GCC47 0x020000E0
#define GLM_COMPILER_GCC48 0x020000F0
#define GLM_COMPILER_GCC49 0x02000100
#define GLM_COMPILER_GCC5 0x02000200
#define GLM_COMPILER_GCC6 0x02000300
#define GLM_COMPILER_GCC7 0x02000400
#define GLM_COMPILER_GCC8 0x02000500
// CUDA
#define GLM_COMPILER_CUDA 0x10000000
#define GLM_COMPILER_CUDA70 0x100000A0
#define GLM_COMPILER_CUDA75 0x100000B0
#define GLM_COMPILER_CUDA80 0x100000C0
// Clang
#define GLM_COMPILER_CLANG 0x20000000
#define GLM_COMPILER_CLANG34 0x20000050
#define GLM_COMPILER_CLANG35 0x20000060
#define GLM_COMPILER_CLANG36 0x20000070
#define GLM_COMPILER_CLANG37 0x20000080
#define GLM_COMPILER_CLANG38 0x20000090
#define GLM_COMPILER_CLANG39 0x200000A0
#define GLM_COMPILER_CLANG40 0x200000B0
#define GLM_COMPILER_CLANG41 0x200000C0
#define GLM_COMPILER_CLANG42 0x200000D0
// Build model
#define GLM_MODEL_32 0x00000010
#define GLM_MODEL_64 0x00000020
// Force generic C++ compiler
#ifdef GLM_FORCE_COMPILER_UNKNOWN
# define GLM_COMPILER GLM_COMPILER_UNKNOWN
#elif defined(__INTEL_COMPILER)
# if (__INTEL_COMPILER < 1400)
# error "GLM requires ICC 2013 SP1 or newer"
# elif __INTEL_COMPILER == 1400
# define GLM_COMPILER GLM_COMPILER_INTEL14
# elif __INTEL_COMPILER == 1500
# define GLM_COMPILER GLM_COMPILER_INTEL15
# elif __INTEL_COMPILER == 1600
# define GLM_COMPILER GLM_COMPILER_INTEL16
# elif __INTEL_COMPILER >= 1700
# define GLM_COMPILER GLM_COMPILER_INTEL17
# endif
// CUDA
#elif defined(__CUDACC__)
# if !defined(CUDA_VERSION) && !defined(GLM_FORCE_CUDA)
# include <cuda.h> // make sure version is defined since nvcc does not define it itself!
# endif
# if CUDA_VERSION < 7000
# error "GLM requires CUDA 7.0 or higher"
# elif (CUDA_VERSION >= 7000 && CUDA_VERSION < 7500)
# define GLM_COMPILER GLM_COMPILER_CUDA70
# elif (CUDA_VERSION >= 7500 && CUDA_VERSION < 8000)
# define GLM_COMPILER GLM_COMPILER_CUDA75
# elif (CUDA_VERSION >= 8000)
# define GLM_COMPILER GLM_COMPILER_CUDA80
# endif
// Clang
#elif defined(__clang__)
# if defined(__apple_build_version__)
# if (__clang_major__ < 6)
# error "GLM requires Clang 3.4 / Apple Clang 6.0 or higher"
# elif __clang_major__ == 6 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_CLANG35
# elif __clang_major__ == 6 && __clang_minor__ >= 1
# define GLM_COMPILER GLM_COMPILER_CLANG36
# elif __clang_major__ >= 7
# define GLM_COMPILER GLM_COMPILER_CLANG37
# endif
# else
# if ((__clang_major__ == 3) && (__clang_minor__ < 4)) || (__clang_major__ < 3)
# error "GLM requires Clang 3.4 or higher"
# elif __clang_major__ == 3 && __clang_minor__ == 4
# define GLM_COMPILER GLM_COMPILER_CLANG34
# elif __clang_major__ == 3 && __clang_minor__ == 5
# define GLM_COMPILER GLM_COMPILER_CLANG35
# elif __clang_major__ == 3 && __clang_minor__ == 6
# define GLM_COMPILER GLM_COMPILER_CLANG36
# elif __clang_major__ == 3 && __clang_minor__ == 7
# define GLM_COMPILER GLM_COMPILER_CLANG37
# elif __clang_major__ == 3 && __clang_minor__ == 8
# define GLM_COMPILER GLM_COMPILER_CLANG38
# elif __clang_major__ == 3 && __clang_minor__ >= 9
# define GLM_COMPILER GLM_COMPILER_CLANG39
# elif __clang_major__ == 4 && __clang_minor__ == 0
# define GLM_COMPILER GLM_COMPILER_CLANG40
# elif __clang_major__ == 4 && __clang_minor__ == 1
# define GLM_COMPILER GLM_COMPILER_CLANG41
# elif __clang_major__ == 4 && __clang_minor__ >= 2
# define GLM_COMPILER GLM_COMPILER_CLANG42
# elif __clang_major__ >= 4
# define GLM_COMPILER GLM_COMPILER_CLANG42
# endif
# endif
// Visual C++
#elif defined(_MSC_VER)
# if _MSC_VER < 1800
# error "GLM requires Visual C++ 12 - 2013 or higher"
# elif _MSC_VER == 1800
# define GLM_COMPILER GLM_COMPILER_VC12
# elif _MSC_VER == 1900
# define GLM_COMPILER GLM_COMPILER_VC14
# elif _MSC_VER == 1910
# define GLM_COMPILER GLM_COMPILER_VC15
# elif _MSC_VER == 1911
# define GLM_COMPILER GLM_COMPILER_VC15_3
# elif _MSC_VER == 1912
# define GLM_COMPILER GLM_COMPILER_VC15_5
# elif _MSC_VER == 1913
# define GLM_COMPILER GLM_COMPILER_VC15_6
# elif _MSC_VER >= 1914
# define GLM_COMPILER GLM_COMPILER_VC15_7
# endif//_MSC_VER
// G++
#elif defined(__GNUC__) || defined(__MINGW32__)
# if ((__GNUC__ == 4) && (__GNUC_MINOR__ < 6)) || (__GNUC__ < 4)
# error "GLM requires GCC 4.7 or higher"
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
# define GLM_COMPILER (GLM_COMPILER_GCC46)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 7)
# define GLM_COMPILER (GLM_COMPILER_GCC47)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ == 8)
# define GLM_COMPILER (GLM_COMPILER_GCC48)
# elif (__GNUC__ == 4) && (__GNUC_MINOR__ >= 9)
# define GLM_COMPILER (GLM_COMPILER_GCC49)
# elif (__GNUC__ == 5)
# define GLM_COMPILER (GLM_COMPILER_GCC5)
# elif (__GNUC__ == 6)
# define GLM_COMPILER (GLM_COMPILER_GCC6)
# elif (__GNUC__ == 7)
# define GLM_COMPILER (GLM_COMPILER_GCC7)
# elif (__GNUC__ >= 8)
# define GLM_COMPILER (GLM_COMPILER_GCC8)
# endif
#else
# define GLM_COMPILER GLM_COMPILER_UNKNOWN
#endif
#ifndef GLM_COMPILER
# error "GLM_COMPILER undefined, your compiler may not be supported by GLM. Add #define GLM_COMPILER 0 to ignore this message."
#endif//GLM_COMPILER
///////////////////////////////////////////////////////////////////////////////////
// Instruction sets
// User defines: GLM_FORCE_PURE GLM_FORCE_SSE2 GLM_FORCE_SSE3 GLM_FORCE_AVX GLM_FORCE_AVX2 GLM_FORCE_AVX2
#define GLM_ARCH_MIPS_BIT (0x10000000)
#define GLM_ARCH_PPC_BIT (0x20000000)
#define GLM_ARCH_ARM_BIT (0x40000000)
#define GLM_ARCH_X86_BIT (0x80000000)
#define GLM_ARCH_SIMD_BIT (0x00001000)
#define GLM_ARCH_NEON_BIT (0x00000001)
#define GLM_ARCH_SSE_BIT (0x00000002)
#define GLM_ARCH_SSE2_BIT (0x00000004)
#define GLM_ARCH_SSE3_BIT (0x00000008)
#define GLM_ARCH_SSSE3_BIT (0x00000010)
#define GLM_ARCH_SSE41_BIT (0x00000020)
#define GLM_ARCH_SSE42_BIT (0x00000040)
#define GLM_ARCH_AVX_BIT (0x00000080)
#define GLM_ARCH_AVX2_BIT (0x00000100)
#define GLM_ARCH_UNKNOWN (0)
#define GLM_ARCH_X86 (GLM_ARCH_X86_BIT)
#define GLM_ARCH_SSE (GLM_ARCH_SSE_BIT | GLM_ARCH_SIMD_BIT | GLM_ARCH_X86)
#define GLM_ARCH_SSE2 (GLM_ARCH_SSE2_BIT | GLM_ARCH_SSE)
#define GLM_ARCH_SSE3 (GLM_ARCH_SSE3_BIT | GLM_ARCH_SSE2)
#define GLM_ARCH_SSSE3 (GLM_ARCH_SSSE3_BIT | GLM_ARCH_SSE3)
#define GLM_ARCH_SSE41 (GLM_ARCH_SSE41_BIT | GLM_ARCH_SSSE3)
#define GLM_ARCH_SSE42 (GLM_ARCH_SSE42_BIT | GLM_ARCH_SSE41)
#define GLM_ARCH_AVX (GLM_ARCH_AVX_BIT | GLM_ARCH_SSE42)
#define GLM_ARCH_AVX2 (GLM_ARCH_AVX2_BIT | GLM_ARCH_AVX)
#define GLM_ARCH_ARM (GLM_ARCH_ARM_BIT)
#define GLM_ARCH_NEON (GLM_ARCH_NEON_BIT | GLM_ARCH_SIMD_BIT | GLM_ARCH_ARM)
#define GLM_ARCH_MIPS (GLM_ARCH_MIPS_BIT)
#define GLM_ARCH_PPC (GLM_ARCH_PPC_BIT)
#ifdef GLM_FORCE_ARCH_UNKNOWN
# define GLM_ARCH GLM_ARCH_UNKNOWN
#elif defined(GLM_FORCE_PURE) || defined(GLM_FORCE_XYZW_ONLY)
# if defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86) || defined(__i386__)
# define GLM_ARCH (GLM_ARCH_X86)
# elif defined(__arm__ ) || defined(_M_ARM)
# define GLM_ARCH (GLM_ARCH_ARM)
# elif defined(__powerpc__ ) || defined(_M_PPC)
# define GLM_ARCH (GLM_ARCH_PPC)
# elif defined(__mips__ )
# define GLM_ARCH (GLM_ARCH_MIPS)
# else
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
# endif
#elif defined(GLM_FORCE_NEON)
# define GLM_ARCH (GLM_ARCH_NEON)
#elif defined(GLM_FORCE_AVX2)
# define GLM_ARCH (GLM_ARCH_AVX2)
#elif defined(GLM_FORCE_AVX)
# define GLM_ARCH (GLM_ARCH_AVX)
#elif defined(GLM_FORCE_SSE42)
# define GLM_ARCH (GLM_ARCH_SSE42)
#elif defined(GLM_FORCE_SSE41)
# define GLM_ARCH (GLM_ARCH_SSE41)
#elif defined(GLM_FORCE_SSSE3)
# define GLM_ARCH (GLM_ARCH_SSSE3)
#elif defined(GLM_FORCE_SSE3)
# define GLM_ARCH (GLM_ARCH_SSE3)
#elif defined(GLM_FORCE_SSE2)
# define GLM_ARCH (GLM_ARCH_SSE2)
#elif defined(GLM_FORCE_SSE)
# define GLM_ARCH (GLM_ARCH_SSE)
#else
# if defined(__AVX2__)
# define GLM_ARCH (GLM_ARCH_AVX2)
# elif defined(__AVX__)
# define GLM_ARCH (GLM_ARCH_AVX)
# elif defined(__SSE4_2__)
# define GLM_ARCH (GLM_ARCH_SSE42)
# elif defined(__SSE4_1__)
# define GLM_ARCH (GLM_ARCH_SSE41)
# elif defined(__SSSE3__)
# define GLM_ARCH (GLM_ARCH_SSSE3)
# elif defined(__SSE3__)
# define GLM_ARCH (GLM_ARCH_SSE3)
# elif defined(__SSE2__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_IX86_FP)
# define GLM_ARCH (GLM_ARCH_SSE2)
# elif defined(__i386__)
# define GLM_ARCH (GLM_ARCH_X86)
# elif defined(__ARM_NEON)
# define GLM_ARCH (GLM_ARCH_ARM | GLM_ARCH_NEON)
# elif defined(__arm__ ) || defined(_M_ARM)
# define GLM_ARCH (GLM_ARCH_ARM)
# elif defined(__mips__ )
# define GLM_ARCH (GLM_ARCH_MIPS)
# elif defined(__powerpc__ ) || defined(_M_PPC)
# define GLM_ARCH (GLM_ARCH_PPC)
# else
# define GLM_ARCH (GLM_ARCH_UNKNOWN)
# endif
#endif
#if GLM_ARCH & GLM_ARCH_AVX2_BIT
# include <immintrin.h>
#elif GLM_ARCH & GLM_ARCH_AVX_BIT
# include <immintrin.h>
#elif GLM_ARCH & GLM_ARCH_SSE42_BIT
# if GLM_COMPILER & GLM_COMPILER_CLANG
# include <popcntintrin.h>
# endif
# include <nmmintrin.h>
#elif GLM_ARCH & GLM_ARCH_SSE41_BIT
# include <smmintrin.h>
#elif GLM_ARCH & GLM_ARCH_SSSE3_BIT
# include <tmmintrin.h>
#elif GLM_ARCH & GLM_ARCH_SSE3_BIT
# include <pmmintrin.h>
#elif GLM_ARCH & GLM_ARCH_SSE2_BIT
# include <emmintrin.h>
#endif//GLM_ARCH
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
typedef __m128 glm_f32vec4;
typedef __m128i glm_i32vec4;
typedef __m128i glm_u32vec4;
typedef __m128d glm_f64vec2;
typedef __m128i glm_i64vec2;
typedef __m128i glm_u64vec2;
typedef glm_f32vec4 glm_vec4;
typedef glm_i32vec4 glm_ivec4;
typedef glm_u32vec4 glm_uvec4;
typedef glm_f64vec2 glm_dvec2;
#endif
#if GLM_ARCH & GLM_ARCH_AVX_BIT
typedef __m256d glm_f64vec4;
typedef glm_f64vec4 glm_dvec4;
#endif
#if GLM_ARCH & GLM_ARCH_AVX2_BIT
typedef __m256i glm_i64vec4;
typedef __m256i glm_u64vec4;
#endif

View File

@@ -0,0 +1,9 @@
/// @ref simd
/// @file glm/simd/trigonometric.h
#pragma once
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT

View File

@@ -0,0 +1,8 @@
/// @ref simd
/// @file glm/simd/vector_relational.h
#pragma once
#if GLM_ARCH & GLM_ARCH_SSE2_BIT
#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT