From bba265f47d16f1056e8c9b24ba97a87f38e5324d Mon Sep 17 00:00:00 2001 From: slipher Date: Thu, 16 May 2024 00:12:54 -0300 Subject: [PATCH 1/3] Replace ALIGNED macro was C++11 alignas Besides requiring non-standard extensions, the former had a hideous syntax. I used std::array in some cases because I couldn't be sure that alignas(16) int x[4] will make an aligned array of ints rather than an array of aligned ints. --- src/common/Compiler.h | 5 ----- src/engine/qcommon/q_math.cpp | 4 ++-- src/engine/qcommon/q_shared.h | 32 +++++++++++++++--------------- src/engine/renderer/tr_local.h | 6 +++--- src/engine/renderer/tr_shade.cpp | 2 +- src/engine/renderer/tr_surface.cpp | 2 +- src/engine/renderer/tr_types.h | 4 ++-- 7 files changed, 25 insertions(+), 30 deletions(-) diff --git a/src/common/Compiler.h b/src/common/Compiler.h index c3649c5a7a..81347aa8a7 100644 --- a/src/common/Compiler.h +++ b/src/common/Compiler.h @@ -70,9 +70,6 @@ int CountTrailingZeroes(unsigned long long x); // Marks this function as memory allocator #define ALLOCATOR -// Align the address of a variable to a certain value -#define ALIGNED(a, x) x __attribute__((__aligned__(a))) - // Shared library function import/export #ifdef _WIN32 #define DLLEXPORT __attribute__((__dllexport__)) @@ -167,7 +164,6 @@ inline int CountTrailingZeroes(unsigned long long x) { return __builtin_ctzll(x) #define ALLOCATOR #endif #define MALLOC_LIKE ALLOCATOR __declspec(restrict) -#define ALIGNED(a,x) __declspec(align(a)) x #define DLLEXPORT __declspec(dllexport) #define DLLIMPORT __declspec(dllimport) #define BREAKPOINT() __debugbreak() @@ -205,7 +201,6 @@ inline int CountTrailingZeroes(unsigned long long x) #define PRINTF_TRANSLATE_ARG(a) #define MALLOC_LIKE #define ALLOCATOR -#define ALIGNED(a,x) x #define DLLEXPORT #define DLLIMPORT #define BREAKPOINT() diff --git a/src/engine/qcommon/q_math.cpp b/src/engine/qcommon/q_math.cpp index 4d166b199b..65251f4768 100644 --- a/src/engine/qcommon/q_math.cpp +++ b/src/engine/qcommon/q_math.cpp @@ -751,8 +751,8 @@ int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p ) auto pmax = _mm_max_ps( prod0, prod1 ); auto pmin = _mm_min_ps( prod0, prod1 ); - ALIGNED( 16, vec4_t pmaxv ); - ALIGNED( 16, vec4_t pminv ); + alignas(16) vec4_t pmaxv; + alignas(16) vec4_t pminv; _mm_store_ps( pmaxv, pmax ); _mm_store_ps( pminv, pmin ); diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h index 095af5442a..64d0b3e012 100644 --- a/src/engine/qcommon/q_shared.h +++ b/src/engine/qcommon/q_shared.h @@ -256,7 +256,7 @@ void Com_Free_Aligned( void *ptr ); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" #endif - ALIGNED(16, union transform_t { + union alignas(16) transform_t { struct { quat_t rot; vec3_t trans; @@ -266,16 +266,16 @@ void Com_Free_Aligned( void *ptr ); __m128 sseRot; __m128 sseTransScale; }; - }); + }; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif #else - ALIGNED(16, struct transform_t { + struct alignas(16) transform_t { quat_t rot; vec3_t trans; vec_t scale; - }); + }; #endif using fixed4_t = int; @@ -1041,29 +1041,29 @@ inline float DotProduct( const vec3_t x, const vec3_t y ) return *(__m128 *)vec; } inline __m128 mask_0000() { - static const ALIGNED(16, int vec[4]) = { 0, 0, 0, 0 }; - return sseLoadInts( vec ); + alignas(16) static const std::array vec = { 0, 0, 0, 0 }; + return sseLoadInts( vec.data() ); } inline __m128 mask_000W() { - static const ALIGNED(16, int vec[4]) = { 0, 0, 0, -1 }; - return sseLoadInts( vec ); + alignas(16) static const std::array vec = { 0, 0, 0, -1 }; + return sseLoadInts( vec.data() ); } inline __m128 mask_XYZ0() { - static const ALIGNED(16, int vec[4]) = { -1, -1, -1, 0 }; - return sseLoadInts( vec ); + alignas(16) static const std::array vec = { -1, -1, -1, 0 }; + return sseLoadInts( vec.data() ); } inline __m128 sign_000W() { - static const ALIGNED(16, int vec[4]) = { 0, 0, 0, 1<<31 }; - return sseLoadInts( vec ); + alignas(16) static const std::array vec = { 0, 0, 0, 1<<31 }; + return sseLoadInts( vec.data() ); } inline __m128 sign_XYZ0() { - static const ALIGNED(16, int vec[4]) = { 1<<31, 1<<31, 1<<31, 0 }; - return sseLoadInts( vec ); + alignas(16) static const std::array vec = { 1<<31, 1<<31, 1<<31, 0 }; + return sseLoadInts( vec.data() ); } inline __m128 sign_XYZW() { - static const ALIGNED(16, int vec[4]) = { 1<<31, 1<<31, 1<<31, 1<<31 }; - return sseLoadInts( vec ); + alignas(16) static const std::array vec = { 1<<31, 1<<31, 1<<31, 1<<31 }; + return sseLoadInts( vec.data() ); } inline __m128 sseDot4( __m128 a, __m128 b ) { diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index 7693d77051..b0308fa398 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -2180,7 +2180,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; }; // align for sse skinning - ALIGNED(16, struct md5Vertex_t + struct alignas(16) md5Vertex_t { vec4_t position; vec4_t tangent; @@ -2194,7 +2194,7 @@ enum class dynamicLightRenderer_t { LEGACY, TILED }; uint32_t boneIndexes[ MAX_WEIGHTS ]; float boneWeights[ MAX_WEIGHTS ]; - }); + }; struct md5Surface_t { @@ -3395,7 +3395,7 @@ inline bool checkGLErrors() #endif }; - extern shaderCommands_t tess; + alignas(16) extern shaderCommands_t tess; void GLSL_InitGPUShaders(); void GLSL_ShutdownGPUShaders(); diff --git a/src/engine/renderer/tr_shade.cpp b/src/engine/renderer/tr_shade.cpp index cc305c10a5..0886fb4903 100644 --- a/src/engine/renderer/tr_shade.cpp +++ b/src/engine/renderer/tr_shade.cpp @@ -415,7 +415,7 @@ SURFACE SHADERS ============================================================= */ -ALIGNED( 16, shaderCommands_t tess ); +alignas(16) shaderCommands_t tess; /* ================ diff --git a/src/engine/renderer/tr_surface.cpp b/src/engine/renderer/tr_surface.cpp index 8eff30e39a..994dca3824 100644 --- a/src/engine/renderer/tr_surface.cpp +++ b/src/engine/renderer/tr_surface.cpp @@ -38,7 +38,7 @@ use the shader system. ============================================================================== */ -static ALIGNED( 16, transform_t bones[ MAX_BONES ] ); +static transform_t bones[ MAX_BONES ]; /* ============== diff --git a/src/engine/renderer/tr_types.h b/src/engine/renderer/tr_types.h index 09b0bdbe9d..610db62ff7 100644 --- a/src/engine/renderer/tr_types.h +++ b/src/engine/renderer/tr_types.h @@ -149,7 +149,7 @@ enum class refSkeletonType_t SK_ABSOLUTE }; -ALIGNED(16, struct refSkeleton_t +struct alignas(16) refSkeleton_t { refSkeletonType_t type; // skeleton has been reset @@ -159,7 +159,7 @@ ALIGNED(16, struct refSkeleton_t vec_t scale; refBone_t bones[ MAX_BONES ]; -}); +}; // XreaL END From d959f8b638776b6cad2e319421dfc1d8d1de16df Mon Sep 17 00:00:00 2001 From: slipher Date: Thu, 16 May 2024 14:20:27 -0300 Subject: [PATCH 2/3] Partially implement USE_CPU_RECOMMENDED_FEATURES on MSVC --- cmake/DaemonFlags.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmake/DaemonFlags.cmake b/cmake/DaemonFlags.cmake index 6d5be42cde..383e1e82bc 100644 --- a/cmake/DaemonFlags.cmake +++ b/cmake/DaemonFlags.cmake @@ -116,6 +116,8 @@ macro(try_linker_flag PROP FLAG) endif() endmacro() +option(USE_CPU_RECOMMENDED_FEATURES "Use some common hardware features like SSE2, NEON, VFP, MCX16, etc." ON) + if(MINGW AND USE_BREAKPAD) set_linker_flag("-Wl,--build-id") endif() @@ -140,7 +142,11 @@ if (MSVC) set_c_cxx_flag("/W4") if (ARCH STREQUAL "i686") - set_c_cxx_flag("/arch:SSE2") + if (USE_CPU_RECOMMENDED_FEATURES) + set_c_cxx_flag("/arch:SSE2") # This is the default + else() + set_c_cxx_flag("/arch:IA32") # minimum + endif() endif() if (USE_LTO) @@ -234,7 +240,6 @@ else() endif() endif() - option(USE_CPU_RECOMMENDED_FEATURES "Enforce usage of hardware features like SSE, NEON, VFP, MCX16, etc." ON) if (USE_CPU_RECOMMENDED_FEATURES) if (ARCH STREQUAL "amd64") # CMPXCHG16B minimum (x86-64-v2): AMD64 revision F. From 552c1d3ad0877677f0c1c878cb15ea6b10471373 Mon Sep 17 00:00:00 2001 From: slipher Date: Thu, 16 May 2024 14:56:51 -0300 Subject: [PATCH 3/3] Disable MSVC warning about struct holes --- src/common/Compiler.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/Compiler.h b/src/common/Compiler.h index 81347aa8a7..5653e0e687 100644 --- a/src/common/Compiler.h +++ b/src/common/Compiler.h @@ -141,6 +141,8 @@ inline int CountTrailingZeroes(unsigned long long x) { return __builtin_ctzll(x) #pragma warning(disable : 4244) // 'XXX': conversion from 'YYY' to 'ZZZ', possible loss of data #pragma warning(disable : 4267) // 'initializing' : conversion from 'size_t' to 'int', possible loss of data +#pragma warning(disable : 4324) // 'refBone_t': structure was padded due to alignment specifier + #pragma warning(disable : 4458) // declaration of 'XXX' hides class member #pragma warning(disable : 4459) // declaration of 'XXX' hides global declaration