Skip to content

Commit 498d560

Browse files
authored
feat(vectordb): integrate KRL for ARM Kunpeng vector search optimization (#256)
- Add third_party/krl: Kunpeng Retrieval Library (KRL) source with ARM NEON/SIMD-optimized L2 and inner-product distance routines - vector_base.h: add ARM platform macros OV_PLATFORM_ARM, OV_SIMD_NEON, OV_SIMD_SVE - space_l2.h: on ARM use krl_L2sqr in l2_sqr_neon instead of scalar path - space_ip.h: on ARM use krl_ipdis in inner_product_neon instead of scalar path - CMakeLists.txt: enable OV_PLATFORM_ARM on aarch64, build and link KRL static library On ARM, vectordb uses KRL-optimized paths; on x86 the existing AVX/SSE implementations are unchanged. Co-authored-by: mijamind719 <mijamind@163.com>
1 parent ec7afc6 commit 498d560

File tree

11 files changed

+5876
-2
lines changed

11 files changed

+5876
-2
lines changed

src/CMakeLists.txt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ set(Python3_ARCH_INCLUDE_DIR "/usr/include/${CMAKE_SYSTEM_PROCESSOR}-linux-gnu/"
3939

4040
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
4141

42+
# On Linux, pybind11 modules don't need to link against libpython
43+
# This prevents issues with static libpython that wasn't built with -fPIC
44+
if(UNIX AND NOT APPLE)
45+
set(Python3_LIBRARIES "")
46+
endif()
47+
4248
find_package(pybind11 REQUIRED)
4349
find_package(Threads REQUIRED)
4450

@@ -59,11 +65,24 @@ endif()
5965

6066
add_subdirectory(../third_party/spdlog-1.14.1 ${CMAKE_BINARY_DIR}/spdlog_build)
6167

68+
# ARM platform detection and KRL integration
69+
set(OV_PLATFORM_ARM OFF)
70+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64")
71+
set(OV_PLATFORM_ARM ON)
72+
message(STATUS "Building for ARM platform with KRL support")
73+
add_subdirectory(../third_party/krl ${CMAKE_BINARY_DIR}/krl_build)
74+
endif()
75+
6276
include_directories(.)
6377
include_directories(../third_party/)
6478
include_directories(../third_party/leveldb-1.23/include/)
6579
include_directories(../third_party/spdlog-1.14.1/include/)
6680

81+
# Add KRL include directory for ARM platform
82+
if(OV_PLATFORM_ARM)
83+
include_directories(../third_party/krl/include/)
84+
endif()
85+
6786
if(NOT DEFINED Python3_INCLUDE_DIRS)
6887
set(Python3_INCLUDE_DIRS
6988
${Python3_ARCH_INCLUDE_DIR}
@@ -88,6 +107,11 @@ target_link_libraries(engine_impl PRIVATE
88107
leveldb
89108
)
90109

110+
# Link KRL library for ARM platform
111+
if(OV_PLATFORM_ARM)
112+
target_link_libraries(engine_impl PRIVATE krl)
113+
endif()
114+
91115
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
92116
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
93117
target_link_libraries(engine_impl PRIVATE stdc++fs)

src/index/detail/vector/common/space_ip.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,27 @@ static float inner_product_sse(const void* v1, const void* v2,
108108
}
109109
#endif
110110

111+
#if defined(OV_SIMD_NEON)
112+
#include "krl.h"
113+
114+
// ARM NEON optimized inner product using KRL library
115+
static float inner_product_neon(const void* v1, const void* v2,
116+
const void* params) {
117+
const float* pv1 = static_cast<const float*>(v1);
118+
const float* pv2 = static_cast<const float*>(v2);
119+
size_t dim = *static_cast<const size_t*>(params);
120+
float dis = 0;
121+
krl_ipdis(pv1, pv2, dim, &dis, 1);
122+
return dis;
123+
}
124+
#endif
125+
111126
class InnerProductSpace : public VectorSpace<float> {
112127
public:
113128
explicit InnerProductSpace(size_t dim) : dim_(dim) {
114-
#if defined(OV_SIMD_AVX512)
129+
#if defined(OV_SIMD_NEON)
130+
metric_func_ = inner_product_neon;
131+
#elif defined(OV_SIMD_AVX512)
115132
metric_func_ = inner_product_avx512;
116133
#elif defined(OV_SIMD_AVX)
117134
metric_func_ = inner_product_avx;

src/index/detail/vector/common/space_l2.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,28 @@ static float l2_sqr_sse(const void* v1, const void* v2, const void* params) {
121121
}
122122
#endif
123123

124+
#if defined(OV_SIMD_NEON)
125+
#include "krl.h"
126+
127+
// ARM NEON optimized L2 squared distance using KRL library
128+
static float l2_sqr_neon(const void* v1, const void* v2, const void* params) {
129+
const float* pv1 = static_cast<const float*>(v1);
130+
const float* pv2 = static_cast<const float*>(v2);
131+
size_t dim = *static_cast<const size_t*>(params);
132+
float dis = 0;
133+
krl_L2sqr(pv1, pv2, dim, &dis, 1);
134+
return 1.0f - dis;
135+
}
136+
#endif
137+
124138
class L2Space : public VectorSpace<float> {
125139
public:
126140
explicit L2Space(size_t dim) : dim_(dim) {
127141
// Select best implementation at runtime based on compile-time flags
128142
// In a real scenario, we might want dynamic dispatch based on CPUID
129-
#if defined(OV_SIMD_AVX512)
143+
#if defined(OV_SIMD_NEON)
144+
metric_func_ = l2_sqr_neon;
145+
#elif defined(OV_SIMD_AVX512)
130146
metric_func_ = l2_sqr_avx512;
131147
#elif defined(OV_SIMD_AVX)
132148
metric_func_ = l2_sqr_avx;

src/index/detail/vector/common/vector_base.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@
2121
#endif
2222
#endif
2323

24+
// ARM Platform Detection
25+
#if defined(__aarch64__) || defined(_M_ARM64)
26+
#define OV_PLATFORM_ARM
27+
#define OV_SIMD_NEON
28+
#if defined(__ARM_FEATURE_SVE)
29+
#define OV_SIMD_SVE
30+
#endif
31+
#endif
32+
2433
// Memory Alignment Macros
2534
#if defined(_MSC_VER)
2635
#define OV_ALIGN_32 __declspec(align(32))

third_party/krl/CMakeLists.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
cmake_minimum_required(VERSION 3.12)
2+
3+
project(krl CXX)
4+
5+
# Only build on ARM platform
6+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64")
7+
# Set C++ standard
8+
set(CMAKE_CXX_STANDARD 17)
9+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
10+
11+
# ARM compile options - use more conservative flags
12+
add_compile_options(-O2 -fPIC -fvisibility=hidden)
13+
14+
# Minimal set for OpenViking: only krl_L2sqr and krl_ipdis (float, single-vector)
15+
# C++ sources following OpenViking code style
16+
set(KRL_SOURCES
17+
${CMAKE_CURRENT_SOURCE_DIR}/src/L2distance_simd.cpp
18+
${CMAKE_CURRENT_SOURCE_DIR}/src/IPdistance_simd.cpp
19+
)
20+
21+
# Create static library
22+
add_library(krl STATIC ${KRL_SOURCES})
23+
24+
# Include directories
25+
target_include_directories(krl PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
26+
27+
# PIC for static library
28+
set_target_properties(krl PROPERTIES POSITION_INDEPENDENT_CODE ON)
29+
30+
message(STATUS "KRL library configured for ARM platform (core distance functions only)")
31+
else()
32+
message(STATUS "KRL library skipped - not ARM platform")
33+
endif()

0 commit comments

Comments
 (0)