Skip to content

Commit e0dd2e7

Browse files
committed
Implement convert_binsparse
1 parent 79ffd9f commit e0dd2e7

File tree

6 files changed

+308
-26
lines changed

6 files changed

+308
-26
lines changed

examples/convert_binsparse.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#include <binsparse/binsparse.hpp>
2+
#include <grb/grb.hpp>
3+
#include <iostream>
4+
#include <concepts>
5+
#include <complex>
6+
7+
template <typename T, typename I>
8+
void convert(std::string input_file, std::string output_file, std::string format, std::string comment) {
9+
if (format == "CSR") {
10+
std::cout << "Reading in " << input_file << "...\n";
11+
grb::matrix<T, I> x("mouse_gene.mtx");
12+
binsparse::csr_matrix<T, I> matrix{x.backend_.values_.data(), x.backend_.colind_.data(), x.backend_.rowptr_.data(), x.shape()[0], x.shape()[1], I(x.size())};
13+
binsparse::write_csr_matrix(output_file, matrix);
14+
std::cout << "Writing to binsparse file " << output_file << " using " << format << " format...\n";
15+
} else {
16+
assert(false);
17+
/*
18+
std::cout << "Reading in " << input_file << "...\n";
19+
grb::matrix<T, I, grb::coordinate> x("mouse_gene.mtx");
20+
binsparse::coo_matrix<T, I> matrix{x.backend_.values_.data(), x.backend_.rowptr_.data(), x.backend_.colind_.data(), x.shape()[0], x.shape()[1], I(x.size())};
21+
binsparse::write_coo_matrix(output_file, matrix);
22+
std::cout << "Writing to binsparse file " << output_file << " using " << format << " format...\n";
23+
*/
24+
}
25+
}
26+
27+
template <typename I>
28+
void convert(std::string input_file, std::string output_file, std::string type,
29+
std::string format, std::string comment) {
30+
if (type == "real") {
31+
convert<float, I>(input_file, output_file, format, comment);
32+
} else if (type == "complex") {
33+
assert(false);
34+
// convert<std::complex<float>, I>(input_file, output_file, format, comment);
35+
} else if (type == "integer") {
36+
convert<int64_t, I>(input_file, output_file, format, comment);
37+
} else if (type == "pattern") {
38+
convert<bool, I>(input_file, output_file, format, comment);
39+
}
40+
}
41+
42+
43+
int main(int argc, char** argv) {
44+
45+
if (argc < 3) {
46+
std::cout << "usage: ./convert_binsparse [input_file.mtx] [output_file.hdf5] [optional: format {CSR, COO}]\n";
47+
return 1;
48+
}
49+
50+
std::string input_file(argv[1]);
51+
std::string output_file(argv[2]);
52+
53+
std::string format;
54+
55+
if (argc >= 4) {
56+
format = argv[3];
57+
58+
for (auto&& c : format) {
59+
c = std::toupper(c);
60+
}
61+
} else {
62+
format = "CSR";
63+
}
64+
65+
auto [m, n, nnz, type, comment] = binsparse::mmread_metadata(input_file);
66+
67+
std::cout << "Matrix is " << m << " x " << n << " with " << nnz << " values.\n";
68+
std::cout << "Type: " << type << std::endl;
69+
std::cout << "Comment:\n";
70+
std::cout << comment;
71+
72+
assert(format == "COO" || format == "CSR");
73+
74+
auto max_size = std::max({m, n, nnz});
75+
76+
if (max_size + 1 <= std::numeric_limits<uint8_t>::max()) {
77+
convert<uint8_t>(input_file, output_file, type, format, comment);
78+
} else if (max_size + 1 <= std::numeric_limits<uint16_t>::max()) {
79+
convert<uint16_t>(input_file, output_file, type, format, comment);
80+
} else if (max_size + 1 <= std::numeric_limits<uint32_t>::max()) {
81+
convert<uint32_t>(input_file, output_file, type, format, comment);
82+
} else if (max_size + 1 <= std::numeric_limits<uint64_t>::max()) {
83+
convert<uint64_t>(input_file, output_file, type, format, comment);
84+
} else {
85+
throw std::runtime_error("Error! Matrix dimensions or NNZ too large to handle.");
86+
}
87+
88+
return 0;
89+
}

include/binsparse/binsparse.hpp

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
#include <nlohmann/json.hpp>
55
#include <binsparse/containers/matrices.hpp>
66
#include "hdf5_tools.hpp"
7+
#include "type_info.hpp"
78
#include <memory>
89
#include <type_traits>
910

1011
#include <binsparse/c_bindings/allocator_wrapper.hpp>
12+
#include <binsparse/matrix_market/matrix_market.hpp>
13+
14+
#include <iostream>
1115

1216
namespace binsparse {
1317

@@ -27,16 +31,17 @@ void write_csr_matrix(std::string fname,
2731
hdf5_tools::write_dataset(f, "indices_1", colind);
2832
hdf5_tools::write_dataset(f, "pointers_to_1", row_ptr);
2933

30-
std::string json_string =
31-
"{\n"
32-
" \"format\": \"CSR\",\n"
33-
" \"shape\": [";
34-
json_string += std::to_string(m.m) + ", " + std::to_string(m.n) +
35-
"],\n" +
36-
" \"nnz\": " + std::to_string(m.nnz) + "\n" +
37-
"}\n";
34+
using json = nlohmann::json;
35+
json j;
36+
j["binsparse"]["version"] = 0.5;
37+
j["binsparse"]["format"] = "CSR";
38+
j["binsparse"]["shape"] = {m.m, m.n};
39+
j["binsparse"]["nnz"] = m.nnz;
40+
j["binsparse"]["data_types"]["pointers_to_1"] = type_info<I>::label();
41+
j["binsparse"]["data_types"]["indices_1"] = type_info<I>::label();
42+
j["binsparse"]["data_types"]["values"] = type_info<T>::label();
3843

39-
hdf5_tools::write_dataset(f, "metadata", json_string);
44+
hdf5_tools::write_dataset(f, "metadata", j.dump(2));
4045

4146
f.close();
4247
}
@@ -50,10 +55,10 @@ csr_matrix<T, I> read_csr_matrix(std::string fname, Allocator&& alloc) {
5055
using json = nlohmann::json;
5156
auto data = json::parse(metadata);
5257

53-
if (data["format"] == "CSR") {
54-
auto nrows = data["shape"][0];
55-
auto ncols = data["shape"][1];
56-
auto nnz = data["nnz"];
58+
if (data["binsparse"]["format"] == "CSR") {
59+
auto nrows = data["binsparse"]["shape"][0];
60+
auto ncols = data["binsparse"]["shape"][1];
61+
auto nnz = data["binsparse"]["nnz"];
5762

5863
typename std::allocator_traits<std::remove_cvref_t<Allocator>>
5964
:: template rebind_alloc<I> i_alloc(alloc);
@@ -89,16 +94,17 @@ void write_coo_matrix(std::string fname,
8994
hdf5_tools::write_dataset(f, "indices_0", rowind);
9095
hdf5_tools::write_dataset(f, "indices_1", colind);
9196

92-
std::string json_string =
93-
"{\n"
94-
" \"format\": \"COO\",\n"
95-
" \"shape\": [";
96-
json_string += std::to_string(m.m) + ", " + std::to_string(m.n) +
97-
"],\n" +
98-
" \"nnz\": " + std::to_string(m.nnz) + "\n" +
99-
"}\n";
97+
using json = nlohmann::json;
98+
json j;
99+
j["binsparse"]["version"] = 0.5;
100+
j["binsparse"]["format"] = "COO";
101+
j["binsparse"]["shape"] = {m.m, m.n};
102+
j["binsparse"]["nnz"] = m.nnz;
103+
j["binsparse"]["data_types"]["indices_0"] = type_info<I>::label();
104+
j["binsparse"]["data_types"]["indices_1"] = type_info<I>::label();
105+
j["binsparse"]["data_types"]["values"] = type_info<T>::label();
100106

101-
hdf5_tools::write_dataset(f, "metadata", json_string);
107+
hdf5_tools::write_dataset(f, "metadata", j.dump(2));
102108

103109
f.close();
104110
}
@@ -112,10 +118,10 @@ coo_matrix<T, I> read_coo_matrix(std::string fname, Allocator&& alloc) {
112118
using json = nlohmann::json;
113119
auto data = json::parse(metadata);
114120

115-
if (data["format"] == "COO") {
116-
auto nrows = data["shape"][0];
117-
auto ncols = data["shape"][1];
118-
auto nnz = data["nnz"];
121+
if (data["binsparse"]["format"] == "COO") {
122+
auto nrows = data["binsparse"]["shape"][0];
123+
auto ncols = data["binsparse"]["shape"][1];
124+
auto nnz = data["binsparse"]["nnz"];
119125

120126
typename std::allocator_traits<std::remove_cvref_t<Allocator>>
121127
:: template rebind_alloc<I> i_alloc(alloc);

include/binsparse/hdf5_tools.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <cassert>
34
#include <vector>
45
#include <ranges>
56
#include <H5Cpp.h>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#pragma once
2+
3+
#include "matrix_market_inspector.hpp"
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#pragma once
2+
3+
#include <iostream>
4+
#include <fstream>
5+
#include <sstream>
6+
#include <string>
7+
8+
namespace binsparse {
9+
10+
// Read metadata from Matrix Market file.
11+
// Returns a tuple holding 5 values.
12+
// 0 - number of rows in matrix
13+
// 1 - number of columns in matrix
14+
// 2 - number of values in matrix
15+
// 3 - type of the matrix (real / integer / complex / pattern)
16+
// 4 - comments
17+
auto mmread_metadata(std::string file_path) {
18+
std::string type;
19+
20+
std::ifstream f;
21+
22+
f.open(file_path.c_str());
23+
24+
if (!f.is_open()) {
25+
// TODO better choice of exception.
26+
throw std::runtime_error("mmread_metadata: cannot open " + file_path);
27+
}
28+
29+
std::string buf;
30+
31+
// Read in first line.
32+
std::getline(f, buf);
33+
std::istringstream ss(buf);
34+
35+
std::string item;
36+
37+
// Check file is Matrix Market format.
38+
ss >> item;
39+
if (item != "%%MatrixMarket") {
40+
throw std::runtime_error(file_path + " could not be parsed as a Matrix Market file.");
41+
}
42+
43+
// Read in "matrix"
44+
ss >> item;
45+
if (item != "matrix") {
46+
throw std::runtime_error(file_path + " could not be parsed as a Matrix Market file.");
47+
}
48+
49+
// Read in coordinate / array
50+
ss >> item;
51+
52+
// Read in type of matrix (real / integer / complex / pattern)
53+
ss >> item;
54+
type = item;
55+
56+
// Read in general / symmetric / skew-symmetric / Hermitian
57+
ss >> item;
58+
59+
std::string comment;
60+
61+
bool outOfComments = false;
62+
while (!outOfComments) {
63+
std::getline(f, buf);
64+
65+
comment += buf + "\n";
66+
67+
if (buf[0] != '%') {
68+
outOfComments = true;
69+
}
70+
}
71+
72+
std::size_t m, n, nnz;
73+
74+
ss.clear();
75+
ss.str(buf);
76+
ss >> m >> n >> nnz;
77+
78+
return std::tuple(m, n, nnz, type, comment);
79+
}
80+
81+
} // end binsparse

include/binsparse/type_info.hpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#pragma once
2+
3+
#include <cassert>
4+
5+
namespace binsparse {
6+
7+
template <typename T>
8+
struct type_info;
9+
10+
template <typename T>
11+
requires(std::is_const_v<T> || std::is_volatile_v<T>)
12+
struct type_info<T> {
13+
static constexpr auto label() noexcept {
14+
return type_info<std::remove_cv_t<T>>::label();
15+
}
16+
};
17+
18+
template <>
19+
struct type_info<uint8_t> {
20+
static constexpr auto label() noexcept {
21+
return "uint8";
22+
}
23+
};
24+
25+
template <>
26+
struct type_info<uint16_t> {
27+
static constexpr auto label() noexcept {
28+
return "uint16";
29+
}
30+
};
31+
32+
template <>
33+
struct type_info<uint32_t> {
34+
static constexpr auto label() noexcept {
35+
return "uint32";
36+
}
37+
};
38+
39+
template <>
40+
struct type_info<uint64_t> {
41+
static constexpr auto label() noexcept {
42+
return "uint64";
43+
}
44+
};
45+
46+
template <>
47+
struct type_info<std::size_t> {
48+
static constexpr auto label() noexcept {
49+
return "uint64";
50+
}
51+
};
52+
53+
template <>
54+
struct type_info<int8_t> {
55+
static constexpr auto label() noexcept {
56+
return "int8";
57+
}
58+
};
59+
60+
template <>
61+
struct type_info<int16_t> {
62+
static constexpr auto label() noexcept {
63+
return "int16";
64+
}
65+
};
66+
67+
template <>
68+
struct type_info<int32_t> {
69+
static constexpr auto label() noexcept {
70+
return "int32";
71+
}
72+
};
73+
74+
template <>
75+
struct type_info<int64_t> {
76+
static constexpr auto label() noexcept {
77+
return "int64";
78+
}
79+
};
80+
81+
template <>
82+
struct type_info<float> {
83+
static constexpr auto label() noexcept {
84+
return "float32";
85+
}
86+
};
87+
88+
template <>
89+
struct type_info<double> {
90+
static constexpr auto label() noexcept {
91+
return "float64";
92+
}
93+
};
94+
95+
template <>
96+
struct type_info<bool> {
97+
static constexpr auto label() noexcept {
98+
return "bint8";
99+
}
100+
};
101+
102+
} // end binsparse

0 commit comments

Comments
 (0)