Skip to content

Commit c83496c

Browse files
committed
Read COO matrix into Tim's binsparse matrix format
1 parent e05b668 commit c83496c

File tree

7 files changed

+140
-25
lines changed

7 files changed

+140
-25
lines changed

examples/c_style.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,7 @@ int main(int argc, char** argv) {
55

66
binsparse::allocator_wrapper<float> alloc(malloc, free);
77

8-
auto mat = binsparse::read_coo_matrix<float, std::size_t>("data/matrix.hdf5", alloc);
9-
10-
for (size_t i = 0; i < mat.nnz; i++) {
11-
std::cout << mat.rowind[i] << ", " << mat.colind[i] << ": " << mat.values[i] << std::endl;
12-
}
13-
14-
// binsparse::write_coo_matrix("new_matrix.hdf5", mat);
8+
bc_read_matrix("new_matrix.hdf5");
159

1610
return 0;
1711
}

examples/simple_io.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include <binsparse/binsparse.hpp>
33

44
int main(int argc, char** argv) {
5-
auto mat = binsparse::read_coo_matrix<float>("data/matrix.hdf5");
5+
auto mat = binsparse::read_coo_matrix<float, std::size_t>("data/matrix.hdf5");
66

77
for (size_t i = 0; i < mat.nnz; i++) {
88
std::cout << mat.rowind[i] << ", " << mat.colind[i] << ": " << mat.values[i] << std::endl;

include/binsparse/binsparse.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ coo_matrix<T, I> read_coo_matrix(std::string fname, Allocator&& alloc) {
5353
auto ncols = data["shape"][1];
5454
auto nnz = data["nnz"];
5555

56-
typename std::allocator_traits<std::remove_cvref_t<Allocator>>:: template rebind_alloc<I> i_alloc(alloc);
56+
typename std::allocator_traits<std::remove_cvref_t<Allocator>>
57+
:: template rebind_alloc<I> i_alloc(alloc);
5758

5859
auto values = hdf5_tools::read_dataset<T>(f, "values", alloc);
5960
auto rows = hdf5_tools::read_dataset<I>(f, "indices_0", i_alloc);
@@ -71,3 +72,5 @@ coo_matrix<T, I> read_coo_matrix(std::string fname) {
7172
}
7273

7374
} // end binsparse
75+
76+
#include <binsparse/c_bindings/bc_read_matrix.hpp>
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#pragma once
2+
3+
#include <binsparse/c_bindings/binsparse_matrix.h>
4+
#include <binsparse/c_bindings/allocator_wrapper.hpp>
5+
#include <binsparse/binsparse.hpp>
6+
#include <cstdio>
7+
8+
extern "C" {
9+
10+
bc_matrix_struct bc_read_matrix(const char* fname) {
11+
H5::H5File f(fname, H5F_ACC_RDWR);
12+
13+
auto metadata = hdf5_tools::read_dataset<char>(f, "metadata");
14+
15+
using json = nlohmann::json;
16+
auto data = json::parse(metadata);
17+
18+
bc_matrix_struct matrix_struct;
19+
20+
if (data["format"] == "COO") {
21+
22+
auto value_type = hdf5_tools::dataset_type(f, "values");
23+
auto index_type = hdf5_tools::dataset_type(f, "indices_0");
24+
25+
if (value_type == H5::PredType::IEEE_F32LE &&
26+
index_type == H5::PredType::STD_U64LE) {
27+
using T = float;
28+
using I = uint64_t;
29+
auto matrix = binsparse::read_coo_matrix<T, I>(fname);
30+
31+
matrix_struct.value_type = bc_type_fp32;
32+
matrix_struct.pointer_type = bc_type_uint64;
33+
matrix_struct.index_type = bc_type_uint64;
34+
35+
matrix_struct.rank = 2;
36+
matrix_struct.iso_valued = false;
37+
matrix_struct.type_size = sizeof(T);
38+
matrix_struct.values = matrix.values;
39+
matrix_struct.values_size = matrix.nnz*sizeof(T);
40+
matrix_struct.nvals = matrix.nnz;
41+
42+
matrix_struct.axis = new bc_axis_struct[2];
43+
44+
matrix_struct.axis[0].order = 0;
45+
matrix_struct.axis[0].dimension = matrix.m;
46+
matrix_struct.axis[0].in_order = true;
47+
matrix_struct.axis[0].index = matrix.rowind;
48+
matrix_struct.axis[0].nindex = matrix.nnz;
49+
matrix_struct.axis[0].index_size = matrix.nnz*sizeof(I);
50+
51+
matrix_struct.axis[1].order = 0;
52+
matrix_struct.axis[1].dimension = matrix.n;
53+
matrix_struct.axis[1].in_order = true;
54+
matrix_struct.axis[1].index = matrix.colind;
55+
matrix_struct.axis[1].nindex = matrix.nnz;
56+
matrix_struct.axis[1].index_size = matrix.nnz*sizeof(I);
57+
58+
return matrix_struct;
59+
}
60+
} else {
61+
fprintf(stderr, "Error: file format not supported.\n");
62+
assert(false);
63+
}
64+
assert(false);
65+
}
66+
67+
}

include/binsparse/c_bindings/binsparse_matrix.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
#include <stddef.h>
1818
#include <stdbool.h>
1919

20+
// If compiling in C++, use `extern "C"` to declare C linkage.
21+
#ifdef __cplusplus
22+
extern "C"
23+
{
24+
#endif
25+
2026
//------------------------------------------------------------------------------
2127
// type codes
2228
//------------------------------------------------------------------------------
@@ -39,9 +45,9 @@ typedef enum
3945
bc_type_uint64 = 8, // uint64_t
4046
// signed types
4147
bc_type_int8 = 9, // int8_t
42-
bc_type_int16 = 10 // int16_t
43-
bc_type_int32 = 11 // int32_t
44-
bc_type_int64 = 12 // int64_t
48+
bc_type_int16 = 10, // int16_t
49+
bc_type_int32 = 11, // int32_t
50+
bc_type_int64 = 12, // int64_t
4551
bc_type_fp32 = 13, // float
4652
bc_type_fp64 = 14, // double
4753
bc_type_fc32 = 15, // float complex
@@ -465,5 +471,9 @@ bc_matrix_struct ;
465471
// a bc_matrix is a pointer to the bc_matrix_struct shown above
466472
typedef bc_matrix_struct *bc_matrix ;
467473

474+
#ifdef __cplusplus
475+
}
476+
#endif
477+
468478
#endif
469479

include/binsparse/c_bindings/read_coo_matrix.hpp

Lines changed: 0 additions & 12 deletions
This file was deleted.

include/binsparse/hdf5_tools.hpp

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,50 @@ inline H5::PredType get_hdf5_standard_type() {
7474
}
7575
}
7676

77+
inline H5::PredType get_type(H5::DataSet& dataset) {
78+
H5T_class_t type_class = dataset.getTypeClass();
79+
80+
if (type_class == H5T_INTEGER) {
81+
H5::IntType intype = dataset.getIntType();
82+
83+
H5std_string order_string;
84+
H5T_order_t order = intype.getOrder(order_string);
85+
86+
assert(order == H5T_ORDER_LE);
87+
88+
size_t size = intype.getSize();
89+
90+
if (intype.getSign() == H5T_SGN_NONE &&
91+
size == sizeof(std::uint64_t)) {
92+
return H5::PredType::STD_U64LE;
93+
} else if (intype.getSign() == H5T_SGN_2 &&
94+
size == sizeof(std::int64_t)) {
95+
return H5::PredType::STD_I64LE;
96+
} else {
97+
assert(false);
98+
}
99+
} else if (type_class == H5T_FLOAT) {
100+
H5::FloatType floatype = dataset.getFloatType();
101+
102+
H5std_string order_string;
103+
H5T_order_t order = floatype.getOrder(order_string);
104+
105+
assert(order == H5T_ORDER_LE);
106+
107+
size_t size = floatype.getSize();
108+
109+
if (size == sizeof(float)) {
110+
return H5::PredType::IEEE_F32LE;
111+
} else if (size == sizeof(double)) {
112+
return H5::PredType::IEEE_F64LE;
113+
} else {
114+
assert(false);
115+
}
116+
} else {
117+
assert(false);
118+
}
119+
}
120+
77121
template <std::ranges::contiguous_range R>
78122
void write_dataset(H5::H5File& f, const std::string& label, R&& r) {
79123
using T = std::ranges::range_value_t<R>;
@@ -89,15 +133,17 @@ void write_dataset(H5::H5File& f, const std::string& label, R&& r) {
89133
template <typename T, typename Allocator>
90134
std::span<T> read_dataset(H5::H5File& f, const std::string& label, Allocator&& alloc) {
91135
H5::DataSet dataset = f.openDataSet(label.c_str());
136+
92137
H5::DataSpace space = dataset.getSpace();
93138
hsize_t ndims = space.getSimpleExtentNdims();
94139
assert(ndims == 1);
95140
hsize_t dims;
96141
space.getSimpleExtentDims(&dims, &ndims);
142+
space.close();
143+
97144
T* data = alloc.allocate(dims);
98145
dataset.read(data, get_hdf5_native_type<T>());
99146
dataset.close();
100-
space.close();
101147
return std::span<T>(data, dims);
102148
}
103149

@@ -106,4 +152,11 @@ std::span<T> read_dataset(H5::H5File& f, const std::string& label) {
106152
return read_dataset<T>(f, label, std::allocator<T>{});
107153
}
108154

155+
inline H5::PredType dataset_type(H5::H5File& f, const std::string& label) {
156+
H5::DataSet dataset = f.openDataSet(label.c_str());
157+
auto type = get_type(dataset);
158+
dataset.close();
159+
return type;
160+
}
161+
109162
} // end hdf_tools

0 commit comments

Comments
 (0)