Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions include/distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,7 @@ namespace diskann {
}
};

// Gopal. Slow implementations of the distance functions to get diskann to
// work in v14 machines that do not have AVX2 support. Performance here is not
// a concern, so we are using the simplest possible implementation.
// Slow implementations of the distance functions for machines without AVX2
template<typename T>
class SlowDistanceL2Int : public Distance<T> {
virtual float compare(const T *a, const T *b, unsigned length) const {
Expand Down
7 changes: 7 additions & 0 deletions tests/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ if(MSVC)
target_link_libraries(ivecs_to_bin optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib)
endif()

add_executable(tsv_to_bin tsv_to_bin.cpp)
if(MSVC)
target_link_options(tsv_to_bin PRIVATE /MACHINE:x64)
target_link_libraries(tsv_to_bin debug ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}/diskann_dll.lib)
target_link_libraries(tsv_to_bin optimized ${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}/diskann_dll.lib)
endif()

add_executable(int8_to_float int8_to_float.cpp)
if(MSVC)
target_link_options(int8_to_float PRIVATE /MACHINE:x64)
Expand Down
27 changes: 6 additions & 21 deletions tests/utils/compute_groundtruth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,7 @@ void exact_knn(const size_t dim, const size_t k,
const float *const queries) // queries in Col major
{
float *points_l2sq = new float[npoints];
// std::cout<<"jere"<<std::endl;
float *queries_l2sq = new float[nqueries];
// std::cout<<"jere "<<npoints<<" " <<dim << " " << nqueries <<std::endl;
compute_l2sq(points_l2sq, points, npoints, dim);
compute_l2sq(queries_l2sq, queries, nqueries, dim);

Expand Down Expand Up @@ -164,14 +162,6 @@ void exact_knn(const size_t dim, const size_t k,
assert(std::is_sorted(
dist_closest_points + (ptrdiff_t) q * (ptrdiff_t) k,
dist_closest_points + (ptrdiff_t)(q + 1) * (ptrdiff_t) k));
/*std::sort(point_dist.begin(), point_dist.end(),
[](const auto &l, const auto &r) {return l.second < r.second; });
for (int l = 0; l < k; ++l) {
closest_points[(ptrdiff_t)l + (ptrdiff_t)q * (ptrdiff_t)k] =
point_dist[l].first;
dist_closest_points[(ptrdiff_t)l + (ptrdiff_t)q * (ptrdiff_t)k] =
point_dist[l].second;
}*/
}
std::cout << "Computed exact k-NN for queries: [" << q_b << "," << q_e
<< ")" << std::endl;
Expand Down Expand Up @@ -219,12 +209,10 @@ inline void load_bin_as_float(const char *filename, float *&data, size_t &npts,

reader.seekg(start_id * ndims * sizeof(T) + 2 * sizeof(uint32_t),
std::ios::beg);
// data = new T[nptsuint64_t * ndimsuint64_t];
T *data_T = new T[nptsuint64_t * ndimsuint64_t];
reader.read((char *) data_T, sizeof(T) * nptsuint64_t * ndimsuint64_t);
std::cout << "Finished reading part of the bin file." << std::endl;
reader.close();
// data = (nptsuint64_t*ndimsuint64_t, ALIGNMENT);
data = aligned_malloc<float>(nptsuint64_t * ndimsuint64_t, ALIGNMENT);
#pragma omp parallel for schedule(dynamic, 32768)
for (int64_t i = 0; i < (int64_t) nptsuint64_t; i++) {
Expand Down Expand Up @@ -278,11 +266,7 @@ inline void save_groundtruth_as_one_file(const std::string filename,

template<typename T>
int aux_main(int argv, char **argc) {
if (argv != 6) {
command_line_help();
return -1;
}


size_t npoints, nqueries, dim;
std::string base_file(argc[2]);
std::string query_file(argc[3]);
Expand Down Expand Up @@ -331,10 +315,6 @@ int aux_main(int argv, char **argc) {
}
}

// save_bin<int>(gt_file + std::string("_ids.bin"), closest_points, nqueries,
// k);
// save_bin<float>(gt_file + std::string("_dist.bin"), dist_closest_points,
// nqueries, k);
save_groundtruth_as_one_file(gt_file, closest_points, dist_closest_points,
nqueries, k);
diskann::aligned_free(query_data);
Expand All @@ -344,6 +324,11 @@ int aux_main(int argv, char **argc) {
}

int main(int argc, char **argv) {
if (argc != 6) {
command_line_help();
return -1;
}

if (std::string(argv[1]) == std::string("float"))
aux_main<float>(argc, argv);
if (std::string(argv[1]) == std::string("int8"))
Expand Down
72 changes: 72 additions & 0 deletions tests/utils/tsv_to_bin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.

#include <iostream>
#include "utils.h"

template<class T>
void block_convert(std::ifstream& reader, std::ofstream& writer, _u64 npts,
_u64 ndims) {
auto read_buf = new T[4 * npts * (ndims + 1)];

auto cursor = read_buf;
T val;

for (_u64 i = 0; i < npts; i++) {
for (_u64 d = 0; d < ndims; ++d) {
reader >> val;
*cursor = val;
cursor++;
}
}
writer.write((char*) read_buf, npts * ndims * sizeof(T));
delete[] read_buf;
}

int main(int argc, char** argv) {
if (argc != 6) {
std::cout << argv[0]
<< "<float/int8/uint8> input_filename.tsv output_filename.bin dim num_pts>"
<< std::endl;
exit(-1);
}

if (std::string(argv[1]) != std::string("float") &&
std::string(argv[1]) != std::string("int8") &&
std::string(argv[1]) != std::string("uint8")) {
std::cout << "Unsupported type. float, int8 and uint8 types are supported."
<< std::endl;
}

_u64 ndims = atoi(argv[4]);
_u64 npts = atoi(argv[5]);

std::ifstream reader(argv[2], std::ios::binary | std::ios::ate);
// _u64 fsize = reader.tellg();
reader.seekg(0, std::ios::beg);
reader.seekg(0, std::ios::beg);

_u64 blk_size = 131072;
_u64 nblks = ROUND_UP(npts, blk_size) / blk_size;
std::cout << "# blks: " << nblks << std::endl;
std::ofstream writer(argv[3], std::ios::binary);
auto npts_s32 = (_u32) npts;
auto ndims_s32 = (_u32) ndims;
writer.write((char*) &npts_s32, sizeof(_u32));
writer.write((char*) &ndims_s32, sizeof(_u32));

for (_u64 i = 0; i < nblks; i++) {
_u64 cblk_size = std::min(npts - i * blk_size, blk_size);
if (std::string(argv[1]) == std::string("float")) {
block_convert<float>(reader, writer, cblk_size, ndims);
} else if (std::string(argv[1]) == std::string("int8")) {
block_convert<int8_t>(reader, writer, cblk_size, ndims);
} else if (std::string(argv[1]) == std::string("uint8")) {
block_convert<uint8_t>(reader, writer, cblk_size, ndims);
}
std::cout << "Block #" << i << " written" << std::endl;
}

reader.close();
writer.close();
}