Skip to content

Commit 327fb71

Browse files
committed
Implement binsparse reading
1 parent 97f5ee2 commit 327fb71

File tree

7 files changed

+24936
-0
lines changed

7 files changed

+24936
-0
lines changed

examples/Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
CXX = g++-12
3+
4+
SOURCES += $(wildcard *.cpp)
5+
TARGETS := $(patsubst %.cpp, %, $(SOURCES))
6+
7+
BINSPARSE_DIR=../include
8+
9+
CXXFLAGS = -std=c++20 -O3 -I$(BINSPARSE_DIR)
10+
11+
# Update HDF5 Flags for your HDF5 installation
12+
HDF5_CXXFLAGS ?= -I/opt/homebrew/Cellar/hdf5/1.12.2_2/include
13+
HDF5_LD_FLAGS ?= -L/opt/homebrew/Cellar/hdf5/1.12.2_2/lib -lhdf5_hl_cpp -lhdf5_cpp -lhdf5_hl -lhdf5
14+
15+
CXXFLAGS += $(HDF5_CXXFLAGS)
16+
LD_FLAGS += $(HDF5_LD_FLAGS)
17+
18+
all: $(TARGETS)
19+
20+
%: %.cpp
21+
$(CXX) $(CXXFLAGS) -o $@ $^ $(LD_FLAGS)
22+
23+
clean:
24+
rm -fv $(TARGETS)

examples/data/matrix.hdf5

4.27 KB
Binary file not shown.

examples/simple_io.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <iostream>
2+
#include <binsparse/binsparse.hpp>
3+
4+
int main(int argc, char** argv) {
5+
printf("Hello, world!\n");
6+
auto mat = binsparse::read_coo_matrix<float>("data/matrix.hdf5");
7+
8+
for (size_t i = 0; i < mat.nnz; i++) {
9+
std::cout << mat.rowind[i] << ", " << mat.colind[i] << ": " << mat.values[i] << std::endl;
10+
}
11+
12+
return 0;
13+
}

include/binsparse/binsparse.hpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#pragma once
2+
3+
#include <binsparse/containers/matrices.hpp>
4+
#include <nlohmann/json.hpp>
5+
#include <binsparse/containers/matrices.hpp>
6+
#include "hdf5_tools.hpp"
7+
#include <memory>
8+
9+
namespace binsparse {
10+
11+
/*
12+
template <typename T, typename I>
13+
void write_matrix(std::string fname,
14+
const std::vector<std::tuple<std::tuple<I, I>, T>>& tuples) {
15+
16+
using value_type = T
17+
using index_type = I
18+
19+
std::vector<std::ranges::range_value_t<M>> tuples(matrix.begin(), matrix.end());
20+
21+
auto sort_fn = [](const auto& a, const auto& b) {
22+
auto&& [a_index, a_value] = a;
23+
auto&& [b_index, b_value] = b;
24+
auto&& [a_i, a_j] = a_index;
25+
auto&& [b_i, b_j] = b_index;
26+
if (a_i < b_i) {
27+
return true;
28+
}
29+
else if (a_i == b_i) {
30+
if (a_j < b_j) {
31+
return true;
32+
}
33+
}
34+
return false;
35+
};
36+
37+
std::sort(tuples.begin(), tuples.end(), sort_fn);
38+
39+
std::vector<grb::matrix_index_t<M>> rows;
40+
std::vector<grb::matrix_index_t<M>> cols;
41+
std::vector<grb::matrix_scalar_t<M>> vals;
42+
43+
rows.reserve(matrix.size());
44+
cols.reserve(matrix.size());
45+
vals.reserve(matrix.size());
46+
47+
for (auto&& [index, value] : tuples) {
48+
auto&& [row, col] = index;
49+
rows.push_back(row);
50+
cols.push_back(col);
51+
vals.push_back(value);
52+
}
53+
54+
H5::H5File f(fname.c_str(), H5F_ACC_TRUNC);
55+
56+
hdf5_tools::write_dataset(f, "values", vals);
57+
hdf5_tools::write_dataset(f, "indices_0", rows);
58+
hdf5_tools::write_dataset(f, "indices_1", cols);
59+
60+
std::string json_string =
61+
"{\n"
62+
" \"format\": \"COO\",\n"
63+
" \"shape\": [";
64+
json_string += std::to_string(matrix.shape()[0]) + ", " + std::to_string(matrix.shape()[1]) +
65+
"],\n" +
66+
" \"nnz\": " + std::to_string(matrix.size()) + "\n" +
67+
"}\n";
68+
69+
hdf5_tools::write_dataset(f, "metadata", json_string);
70+
71+
f.close();
72+
}
73+
*/
74+
75+
template <typename T, typename I, typename Allocator>
76+
coo_matrix<T, I> read_coo_matrix(std::string fname, Allocator&& alloc) {
77+
H5::H5File f(fname.c_str(), H5F_ACC_RDWR);
78+
79+
auto metadata = hdf5_tools::read_dataset<char>(f, "metadata");
80+
81+
using json = nlohmann::json;
82+
auto data = json::parse(metadata);
83+
84+
if (data["format"] == "COO") {
85+
auto nrows = data["shape"][0];
86+
auto ncols = data["shape"][1];
87+
auto nnz = data["nnz"];
88+
89+
typename std::allocator_traits<Allocator>:: template rebind_alloc<I> i_alloc(alloc);
90+
91+
auto values = hdf5_tools::read_dataset<T>(f, "values", alloc);
92+
auto rows = hdf5_tools::read_dataset<I>(f, "indices_0", i_alloc);
93+
auto cols = hdf5_tools::read_dataset<I>(f, "indices_1", i_alloc);
94+
95+
return coo_matrix<T, I>{values.data(), rows.data(), cols.data(), nrows, ncols, nnz};
96+
} else {
97+
assert(false);
98+
}
99+
}
100+
101+
template <typename T, typename I = std::size_t>
102+
coo_matrix<T, I> read_coo_matrix(std::string fname) {
103+
return read_coo_matrix<T, I>(fname, std::allocator<T>{});
104+
}
105+
106+
} // end binsparse
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#pragma once
2+
3+
namespace binsparse {
4+
5+
struct row_major {
6+
constexpr bool operator==(row_major) { return true; }
7+
template <typename T>
8+
constexpr bool operator==(T&&) { return false; }
9+
};
10+
11+
struct column_major {
12+
constexpr bool operator==(column_major) { return true; }
13+
template <typename T>
14+
constexpr bool operator==(T&&) { return false; }
15+
};
16+
17+
template <typename T, typename I>
18+
struct csr_matrix {
19+
T* values;
20+
I* colind;
21+
I* row_ptr;
22+
23+
I m, n, nnz;
24+
};
25+
26+
template <typename T, typename I>
27+
struct coo_matrix {
28+
T* values;
29+
I* rowind;
30+
I* colind;
31+
32+
I m, n, nnz;
33+
};
34+
35+
template <typename T, typename I, typename Order>
36+
struct dense_matrix {
37+
T* values;
38+
39+
I m, n;
40+
41+
using order = Order;
42+
};
43+
44+
} // end binsparse

include/binsparse/hdf5_tools.hpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#pragma once
2+
3+
#include <vector>
4+
#include <ranges>
5+
#include <H5Cpp.h>
6+
7+
namespace hdf5_tools {
8+
9+
template <typename U>
10+
inline H5::PredType get_hdf5_native_type() {
11+
using T = std::decay_t<U>;
12+
if constexpr(std::is_same_v<T, char>) {
13+
return H5::PredType::NATIVE_CHAR;
14+
} else if constexpr (std::is_same_v<T, unsigned char>) {
15+
return H5::PredType::NATIVE_UCHAR;
16+
} else if constexpr (std::is_same_v<T, short>) {
17+
return H5::PredType::NATIVE_SHORT;
18+
} else if constexpr (std::is_same_v<T, unsigned short>) {
19+
return H5::PredType::NATIVE_USHORT;
20+
} else if constexpr (std::is_same_v<T, int>) {
21+
return H5::PredType::NATIVE_INT;
22+
} else if constexpr (std::is_same_v<T, unsigned int>) {
23+
return H5::PredType::NATIVE_UINT;
24+
} else if constexpr (std::is_same_v<T, long>) {
25+
return H5::PredType::NATIVE_LONG;
26+
} else if constexpr (std::is_same_v<T, unsigned long>) {
27+
return H5::PredType::NATIVE_ULONG;
28+
} else if constexpr (std::is_same_v<T, long long>) {
29+
return H5::PredType::NATIVE_LLONG;
30+
} else if constexpr (std::is_same_v<T, unsigned long long>) {
31+
return H5::PredType::NATIVE_ULLONG;
32+
} else if constexpr (std::is_same_v<T, float>) {
33+
return H5::PredType::NATIVE_FLOAT;
34+
} else if constexpr (std::is_same_v<T, double>) {
35+
return H5::PredType::NATIVE_DOUBLE;
36+
} else if constexpr (std::is_same_v<T, long double>) {
37+
return H5::PredType::NATIVE_LDOUBLE;
38+
} else {
39+
assert(false);
40+
}
41+
}
42+
43+
template <typename U>
44+
inline H5::PredType get_hdf5_standard_type() {
45+
using T = std::decay_t<U>;
46+
if constexpr(std::is_same_v<T, char>) {
47+
return H5::PredType::STD_I8LE;
48+
} else if constexpr(std::is_same_v<T, unsigned char>) {
49+
return H5::PredType::STD_U8LE;
50+
} else if constexpr(std::is_same_v<T, int8_t>) {
51+
return H5::PredType::STD_I8LE;
52+
} else if constexpr (std::is_same_v<T, uint8_t>) {
53+
return H5::PredType::STD_U8LE;
54+
} else if constexpr (std::is_same_v<T, int16_t>) {
55+
return H5::PredType::STD_I16LE;
56+
} else if constexpr (std::is_same_v<T, uint16_t>) {
57+
return H5::PredType::STD_U16LE;
58+
} else if constexpr (std::is_same_v<T, int32_t>) {
59+
return H5::PredType::STD_I32LE;
60+
} else if constexpr (std::is_same_v<T, uint32_t>) {
61+
return H5::PredType::STD_U32LE;
62+
} else if constexpr (std::is_same_v<T, int64_t>) {
63+
return H5::PredType::STD_I64LE;
64+
} else if constexpr (std::is_same_v<T, uint64_t>) {
65+
return H5::PredType::STD_U64LE;
66+
} else if constexpr (std::is_same_v<T, std::size_t>) {
67+
return H5::PredType::STD_U64LE;
68+
} else if constexpr (std::is_same_v<T, float>) {
69+
return H5::PredType::IEEE_F32LE;
70+
} else if constexpr (std::is_same_v<T, double>) {
71+
return H5::PredType::IEEE_F64LE;
72+
} else {
73+
assert(false);
74+
}
75+
}
76+
77+
template <std::ranges::contiguous_range R>
78+
void write_dataset(H5::H5File& f, const std::string& label, R&& r) {
79+
using T = std::ranges::range_value_t<R>;
80+
hsize_t size = std::ranges::size(r);
81+
H5::DataSpace dataspace(1, &size);
82+
auto dataset = f.createDataSet(label.c_str(), get_hdf5_standard_type<T>(), dataspace);
83+
84+
dataset.write(std::ranges::data(r), get_hdf5_native_type<T>());
85+
dataset.close();
86+
dataspace.close();
87+
}
88+
89+
template <typename T, typename Allocator>
90+
std::span<T> read_dataset(H5::H5File& f, const std::string& label, Allocator&& alloc) {
91+
H5::DataSet dataset = f.openDataSet(label.c_str());
92+
H5::DataSpace space = dataset.getSpace();
93+
hsize_t ndims = space.getSimpleExtentNdims();
94+
assert(ndims == 1);
95+
hsize_t dims;
96+
space.getSimpleExtentDims(&dims, &ndims);
97+
T* data = alloc.allocate(dims);
98+
dataset.read(data, get_hdf5_native_type<T>());
99+
dataset.close();
100+
space.close();
101+
return std::span<T>(data, dims);
102+
}
103+
104+
template <typename T>
105+
std::span<T> read_dataset(H5::H5File& f, const std::string& label) {
106+
return read_dataset<T>(f, label, std::allocator<T>{});
107+
}
108+
109+
} // end hdf_tools

0 commit comments

Comments
 (0)