Skip to content

Commit 0011eb1

Browse files
rethinking
1 parent 36b998f commit 0011eb1

File tree

1 file changed

+103
-73
lines changed

1 file changed

+103
-73
lines changed

include/binsparse/c_bindings/binsparse_matrix.h

Lines changed: 103 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,26 @@ bc_type_code ;
5959
//
6060
// NULL non-NULL "Index": some entries present.
6161
// indices need not be in order, nor unique.
62+
// size of index [k] array is nindex [k].
63+
// in_order [k] can be true or false.
6264
//
6365
// non-NULL non-NULL "Hyper": some entries present.
64-
// indices must be in order and unique. pointer [k]
65-
// has size npointer [k]+1 and must be monotonically
66-
// non-decreasing. index [k] has size npointer [k]
66+
// indices must be in order and unique.
67+
// index [k] has size nindex [k]
68+
// pointer [k] has size nindex [k]+1 and must be
69+
// monotonically non-decreasing.
70+
// in_order [k] must be true.
6771
//
6872
// non-NULL NULL "Sparse": all entries present.
6973
// pointer [k] has size dimension [axis_order[k]]+1.
70-
// pointer [k] has size npointer [k]+1 and must be
71-
// monotonically non-decreasing.
74+
// nindex [k] not used (or can be set to
75+
// dimension [axis_order[k]] for consistency).
76+
// in_order [k] must be true.
7277
//
73-
// NULL NULL "Full": all entries present,
78+
// NULL NULL "Full": all entries present,
79+
// nindex [k] not used (or can be set to
80+
// dimension [axis_order[k]] for consistency).
81+
// in_order [k] must be true.
7482

7583
// The matrix format is determined by the presence of pointer [0:rank-1]
7684
// and index [0:rank-1] (NULL or non-NULL). There need not be any format
@@ -138,9 +146,9 @@ bc_type_code ;
138146
// DCSR (hypersparse by-row): Format is (Hyper, Index)
139147
//
140148
// axis_order = { 0, 1 }, stored by-row
141-
// pointer [0] = non-NULL, of size npointer [0]+1
149+
// pointer [0] = non-NULL, of size nindex [0]+1
142150
// pointer [1] = NULL
143-
// index [0] = non-NULL, of size npointer [0]
151+
// index [0] = non-NULL, of size nindex [0]
144152
// index [1] = col indices, size nvals
145153
// in_order [0] = true
146154
// in_order [1] = true if index [1] in ascending order, false otherwise
@@ -149,9 +157,9 @@ bc_type_code ;
149157
// DCSC format (hypersparse by-col): Format is (Hyper, Index)
150158
//
151159
// axis_order = { 1, 0 }, stored by-col
152-
// pointer [0] = non-NULL, of size npointer [0]+1
160+
// pointer [0] = non-NULL, of size nindex [0]+1
153161
// pointer [1] = NULL
154-
// index [0] = non-NULL, of size npointer [0]
162+
// index [0] = non-NULL, of size nindex [0]
155163
// index [1] = row indices, size nvals
156164
// in_order [0] = true
157165
// in_order [1] = true if index [1] in ascending order, false otherwise
@@ -179,27 +187,32 @@ bc_type_code ;
179187
// in_order [1] = true
180188
// values: size nvals = m*n, or 1 if iso
181189
//
182-
// Hyper-Full format (held by row: each row is either full or all empty)
183-
// Format is (Hyper, Full)
190+
// Index-Full format (held by row: each row is either full or all empty)
191+
// Format is (Index, Full)
184192
//
185193
// axis_order = { 0, 1 }, stored by-row
186-
// pointer [0] = non-NULL, of size npointer [0]+1
194+
// pointer [0] = non-NULL, of size nindex [0]+1
187195
// pointer [1] = NULL
188-
// index [0] = non-NULL, of size npointer [0]
196+
// index [0] = non-NULL, of size nindex [0]
189197
// index [1] = NULL
190-
// values: size nvals = npointer [0]*n, or 1 if iso
198+
// in_order [0] = true
199+
// in_order [1] = true
200+
// values: size nvals = nindex [0]*n, or 1 if iso
191201
//
192-
// Hyper-Full format (held by col: each col is either full or all empty)
193-
// Format is (Hyper, Full)
202+
// Index-Full format (held by col: each col is either full or all empty)
203+
// Format is (Index, Full)
194204
//
195205
// axis_order = { 1, 0 }, stored by-col
196-
// pointer [0] = non-NULL, of size npointer [0]+1
206+
// pointer [0] = non-NULL, of size nindex [0]+1
197207
// pointer [1] = NULL
198-
// index [0] = non-NULL, of size npointer [0]
208+
// index [0] = non-NULL, of size nindex [0]
199209
// index [1] = NULL
200-
// values: size nvals = m * npointer [0], or 1 if iso
210+
// in_order [0] = true
211+
// in_order [1] = true
212+
// values: size nvals = m * nindex [0], or 1 if iso
213+
214+
// Are all 16 formats possible? NO. I think there are only 5.
201215

202-
// Are all 16 formats possible?
203216
// (Full, Sparse, Hyper, Index) x (Full, Sparse, Hyper, Index) ?
204217
// I think the last dimension must be Full or Index, which leads to
205218
// 8 formats: (Full, Sparse, Hyper, Index) x (Full, Index).
@@ -209,16 +222,16 @@ bc_type_code ;
209222
// (Sparse, Index) CSR and CSC
210223
// (Hyper, Index) hypersparse by row or col
211224
// (Full, Full) full
212-
// (Hyper, Full) hyper-full
225+
// (Index, Full) can be defined, looks useful. An unordered
226+
// set of full vectors.
213227
//
214-
// not described above:
228+
// not described above: either not useful or not valid
215229
//
216-
// (Index, Full) can be defined, looks useful. An unorderd
217-
// set of full vectors.
230+
// (Hyper, Full) not useful (rule 4)
218231
// (Sparse, Full) can be defined but not useful?
232+
// see rule 5 below
219233
// (Full, Index) huh? See Rule (2) below.
220234
//
221-
222235
// bitmap format: held as two full bc_matrices with same dimension and
223236
// axis_order. The first matrix ('bitmap' pattern) is always bool.
224237
// The second full matrix holds the values.
@@ -230,55 +243,71 @@ bc_type_code ;
230243
// formats must all be Full.
231244
//
232245
// (2) the last format must be "Index" or "Full".
233-
246+
//
247+
// (3) (... ,Sparse, Full, ...) can be defined but is not useful. It would be
248+
// the less storage cost and same representation as ( ... Full, Full, ...).
249+
// So do not support this format.
250+
//
251+
// (4) (... , Hyper, Full, ...) can be defined but is not useful. The
252+
// same thing can be done with (... , Index, Full, ...) where the Index
253+
// dimension is sorted. There's no need for the pointer for the Hyper
254+
// dimension, since all objects to the right have the same size.
255+
//
256+
// (5) Like rule 1, once "Index" appears, the remaining formats to the right
257+
// must be "Index" or "Full". This is because "Index" has no pointer so
258+
// all formats to the right must have a known size, or be a list like
259+
// (Index, Index, Full) where the total size (nvals) is enough.
234260

235261
// rank = 3?
236262
//
237-
// describe some for future extensions. Possible formats, a subset
238-
// of (4 x 4 x 2) = 32 formats (rule 1) minus the 10 formats that break
239-
// rule 2. Some of these can be described but are not useful.
240-
// This results in 22 possible formats ... I think. Of those,
241-
// at least 4 are not useful (, ... Sparse, Full, ...)
242-
243-
// (Index , Index , Index)
244-
// (Index , Hyper , Index)
245-
// (Index , Sparse, Index)
246-
// (Index , Full, Index) not possible (rule 2)
247-
248-
// (Hyper , Index , Index)
249-
// (Hyper , Hyper , Index)
250-
// (Hyper , Sparse, Index)
251-
// (Hyper , Full , Index) not possible (rule 2)
252-
253-
// (Sparse, Index , Index)
254-
// (Sparse, Hyper , Index)
255-
// (Sparse, Sparse, Index)
256-
// (Sparse, Full , Index) not possible (rule 2)
257-
258-
// (Full , Index , Index) not possible (rule 2)
259-
// (Full , Hyper , Index) not possible (rule 2)
260-
// (Full , Sparse, Index) not possible (rule 2)
261-
// (Full , Full , Index) not possible (rule 2)
262-
263-
// (Index , Index , Full )
264-
// (Index , Hyper , Full )
265-
// (Index , Sparse, Full ) ok but not useful?
266-
// (Index , Full , Full )
267-
268-
// (Hyper , Index , Full )
269-
// (Hyper , Hyper , Full )
270-
// (Hyper , Sparse, Full ) ok but not useful?
271-
// (Hyper , Full , Full )
272-
273-
// (Sparse, Index , Full )
274-
// (Sparse, Hyper , Full )
275-
// (Sparse, Sparse, Full ) ok but not useful?
276-
// (Sparse, Full , Full ) ok but not useful?
277-
278-
// (Full , Index , Full ) not possible (rule 2)
279-
// (Full , Hyper , Full ) not possible (rule 2)
280-
// (Full , Sparse, Full ) not possible (rule 2)
281-
// (Full , Full , Full )
263+
// describe some for future extensions. Possible formats, a subset of
264+
// (4 x 4 x 2) = 32 formats (rule 2) minus the 10 formats that break rule
265+
// 1. Some of these can be described but are not useful (rules 3 to 5),
266+
// This results in a total of 12 useful formats that could be supported.
267+
268+
// * (Index , Index , Index) all COO, nice
269+
// (Index , Hyper , Index) unordered list of 2D hypersparse matrices?
270+
// but the first Index dimension has no pointer
271+
// so it can't be used to find each 2D slice.
272+
// Seems we need a rule 5.
273+
// (Index , Sparse, Index) see rule 5
274+
// (Index , Full, Index) not possible (rule 1)
275+
276+
// * (Hyper , Index , Index) 1D hyperlist of 2D COO matrices. Nice.
277+
// * (Hyper , Hyper , Index) 1D hyperlist of 2D hypersparse mtx. Nice
278+
// * (Hyper , Sparse, Index) 1D hyper list of 2D CSR/CSC matrices. Nice
279+
// (Hyper , Full , Index) not possible (rule 1), also rule 4
280+
281+
// * (Sparse, Index , Index) 1D dense array of 2D COO matrices. Nice.
282+
// * (Sparse, Hyper , Index) 1D dense array of 2D hpyersparse. Nice.
283+
// * (Sparse, Sparse, Index) 1D dense array of 2D CSR/CSR mtx, Nice.
284+
// (Sparse, Full , Index) not possible (rule 1)
285+
286+
// (Full , Index , Index) not possible (rule 1)
287+
// (Full , Hyper , Index) not possible (rule 1)
288+
// (Full , Sparse, Index) not possible (rule 1)
289+
// (Full , Full , Index) not possible (rule 1)
290+
291+
// * (Index , Index , Full ) like COO, but each "entry" is an entire
292+
// dense vector
293+
// (Index , Hyper , Full ) ok but not useful (rule 4)
294+
// (Index , Sparse, Full ) ok but not useful? (rule 3)
295+
// * (Index , Full , Full ) Unordered list of 2D full matrices
296+
297+
// * (Hyper , Index , Full ) 2D hypersparse, each entry a full vec. Nice
298+
// (Hyper , Hyper , Full ) ok but not useful (rule 4)
299+
// (Hyper , Sparse, Full ) ok but not useful? (rule 3)
300+
// (Hyper , Full , Full ) ok but not useful (rule 4)
301+
302+
// * (Sparse, Index , Full ) 1D dense array of 2D (Index,Full) Odd but ok
303+
// (Sparse, Hyper , Full ) ok but not useful (rule 4)
304+
// (Sparse, Sparse, Full ) ok but not useful? (rule 3)
305+
// (Sparse, Full , Full ) ok but not useful? (rule 3)
306+
307+
// (Full , Index , Full ) not possible (rule 1)
308+
// (Full , Hyper , Full ) not possible (rule 1), also rule 4
309+
// (Full , Sparse, Full ) not possible (rule 1)
310+
// * (Full , Full , Full )
282311

283312

284313

@@ -328,10 +357,11 @@ typedef struct
328357
// matrix content
329358

330359
void *pointer [KMAX] ; // set of pointers, of type pointer_type
331-
uint64_t npointer [KMAX] ; // pointer [k] has npointer [k]+1 entries
332360
size_t pointer_size [KMAX] ;// allocated size of each pointer[k] array
361+
// pointer [k] has nindex [k]+1 entries.
333362

334363
void *index [KMAX] ; // array of indices, of type index_type
364+
uint64_t nindex [KMAX] ; // index [k] has nindex [k] entries
335365
size_t index_size [KMAX] ; // allocated size of each index[k] array
336366

337367
void *values ; // array of values, of type value_type

0 commit comments

Comments
 (0)