-
Notifications
You must be signed in to change notification settings - Fork 22
Chore: support all basic types #144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -52,14 +52,11 @@ pub trait BinaryWriter { | |||||
|
|
||||||
| fn write_binary(&mut self, bytes: &[u8], length: usize); | ||||||
|
|
||||||
| // TODO Decimal type | ||||||
| // fn write_decimal(&mut self, pos: i32, value: f64); | ||||||
| fn write_decimal(&mut self, value: &rust_decimal::Decimal, precision: u32); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||||||
|
|
||||||
| // TODO Timestamp type | ||||||
| // fn write_timestamp_ntz(&mut self, pos: i32, value: i64); | ||||||
| fn write_timestamp_ntz(&mut self, value: i64, precision: u32); | ||||||
|
|
||||||
| // TODO Timestamp type | ||||||
| // fn write_timestamp_ltz(&mut self, pos: i32, value: i64); | ||||||
| fn write_timestamp_ltz(&mut self, value: i64, precision: u32); | ||||||
|
|
||||||
| // TODO InternalArray, ArraySerializer | ||||||
| // fn write_array(&mut self, pos: i32, value: i64); | ||||||
|
|
@@ -125,7 +122,11 @@ pub enum InnerValueWriter { | |||||
| BigInt, | ||||||
| Float, | ||||||
| Double, | ||||||
| // TODO Decimal, Date, TimeWithoutTimeZone, TimestampWithoutTimeZone, TimestampWithLocalTimeZone, Array, Row | ||||||
| Decimal(u32), // precision | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| Date, | ||||||
| TimestampNtz(u32), // precision | ||||||
| TimestampLtz(u32), // precision | ||||||
| // TODO TimeWithoutTimeZone, Array, Row | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we also support |
||||||
| } | ||||||
|
|
||||||
| /// Accessor for writing the fields/elements of a binary writer during runtime, the | ||||||
|
|
@@ -147,6 +148,10 @@ impl InnerValueWriter { | |||||
| DataType::BigInt(_) => Ok(InnerValueWriter::BigInt), | ||||||
| DataType::Float(_) => Ok(InnerValueWriter::Float), | ||||||
| DataType::Double(_) => Ok(InnerValueWriter::Double), | ||||||
| DataType::Decimal(d) => Ok(InnerValueWriter::Decimal(d.precision())), | ||||||
| DataType::Date(_) => Ok(InnerValueWriter::Date), | ||||||
| DataType::Timestamp(t) => Ok(InnerValueWriter::TimestampNtz(t.precision())), | ||||||
| DataType::TimestampLTz(t) => Ok(InnerValueWriter::TimestampLtz(t.precision())), | ||||||
| _ => unimplemented!( | ||||||
| "ValueWriter for DataType {:?} is currently not implemented", | ||||||
| data_type | ||||||
|
|
@@ -194,6 +199,18 @@ impl InnerValueWriter { | |||||
| (InnerValueWriter::Double, Datum::Float64(v)) => { | ||||||
| writer.write_double(v.into_inner()); | ||||||
| } | ||||||
| (InnerValueWriter::Decimal(p), Datum::Decimal(v)) => { | ||||||
| writer.write_decimal(v, *p); | ||||||
| } | ||||||
| (InnerValueWriter::Date, Datum::Date(d)) => { | ||||||
| writer.write_int(d.get_inner()); | ||||||
| } | ||||||
| (InnerValueWriter::TimestampNtz(p), Datum::Timestamp(ts)) => { | ||||||
| writer.write_timestamp_ntz(ts.get_inner(), *p); | ||||||
| } | ||||||
| (InnerValueWriter::TimestampLtz(p), Datum::TimestampTz(ts)) => { | ||||||
| writer.write_timestamp_ltz(ts.get_inner(), *p); | ||||||
| } | ||||||
| _ => { | ||||||
| return Err(IllegalArgument { | ||||||
| message: format!("{self:?} used to write value {value:?}"), | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -126,6 +126,18 @@ impl InternalRow for ColumnarRow { | |||||
| .value(self.row_id) | ||||||
| } | ||||||
|
|
||||||
| fn get_date(&self, pos: usize) -> i32 { | ||||||
| self.get_int(pos) | ||||||
| } | ||||||
|
|
||||||
| fn get_timestamp_ntz(&self, pos: usize) -> i64 { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can return |
||||||
| self.get_long(pos) | ||||||
| } | ||||||
|
|
||||||
| fn get_timestamp_ltz(&self, pos: usize) -> i64 { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| self.get_long(pos) | ||||||
| } | ||||||
|
|
||||||
| fn get_char(&self, pos: usize, _length: usize) -> &str { | ||||||
| let array = self | ||||||
| .record_batch | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -153,4 +153,21 @@ impl CompactedRowWriter { | |
| pub fn write_double(&mut self, value: f64) { | ||
| self.write_raw(&value.to_ne_bytes()); | ||
| } | ||
|
|
||
| pub fn write_decimal(&mut self, value: &rust_decimal::Decimal, _precision: u32) { | ||
| // For now, serialize decimal to its string representation and write as bytes | ||
| // TODO: implement compact decimal encoding based on precision similar to Java implementation | ||
| let s = value.to_string(); | ||
| self.write_bytes(s.as_bytes()); | ||
|
Comment on lines
+158
to
+161
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should really have an implementation is consistent with Java's for this task.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
| } | ||
|
|
||
| pub fn write_timestamp_ntz(&mut self, value: i64, _precision: u32) { | ||
| // Currently write timestamp as a long (epoch millis or other unit depending on upstream) | ||
| self.write_long(value); | ||
|
Comment on lines
+165
to
+166
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should really have an implementation is consistent with Java's for this task.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
| } | ||
|
|
||
| pub fn write_timestamp_ltz(&mut self, value: i64, _precision: u32) { | ||
| // Currently write timestamp as a long (epoch millis or other unit depending on upstream) | ||
| self.write_long(value); | ||
|
Comment on lines
+170
to
+171
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should really have an implementation is consistent with Java's for this task.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -247,14 +247,14 @@ mod tests { | |
| DataTypes::bigint(), | ||
| DataTypes::float(), | ||
| DataTypes::double(), | ||
| // TODO Date | ||
| // TODO Time | ||
| DataTypes::date(), | ||
| // TIME is not yet represented in Datum | ||
| DataTypes::timestamp(), | ||
| DataTypes::binary(20), | ||
| DataTypes::bytes(), | ||
| DataTypes::char(2), | ||
| DataTypes::string(), | ||
| // TODO Decimal | ||
| // TODO Timestamp | ||
| // TODO Timestamp LTZ | ||
| // TODO Array of Int | ||
| // TODO Array of Float | ||
|
|
@@ -270,14 +270,13 @@ mod tests { | |
| Datum::from(-6101065172474983726i64), // from Java test case: new BigInteger("12345678901234567890").longValue() | ||
| Datum::from(13.2f32), | ||
| Datum::from(15.21f64), | ||
| // TODO Date | ||
| // TODO Time | ||
| Datum::Date(crate::row::datum::Date::new(5)), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does 5 mean? I suggest using the date 2023-10-25 to keep test case parity with java side. |
||
| Datum::Timestamp(crate::row::datum::Timestamp::new(13)), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest using 09:30:00.0 to keep test case parity with java side. |
||
| Datum::from("1234567890".as_bytes()), | ||
| Datum::from("20".as_bytes()), | ||
| Datum::from("1"), | ||
| Datum::from("hello"), | ||
| // TODO Decimal | ||
| // TODO Timestamp | ||
| // TODO Timestamp LTZ | ||
| // TODO Array of Int | ||
| // TODO Array of Float | ||
|
|
@@ -304,6 +303,10 @@ mod tests { | |
| expected.extend(vec![0x33, 0x33, 0x53, 0x41]); | ||
| // DOUBLE: 15.21 | ||
| expected.extend(vec![0xEC, 0x51, 0xB8, 0x1E, 0x85, 0x6B, 0x2E, 0x40]); | ||
| // DATE: 5 | ||
| expected.extend(vec![0x05]); | ||
| // TIMESTAMP: 13 | ||
| expected.extend(vec![0x0D]); | ||
| // BINARY(20): "1234567890".getBytes() | ||
| expected.extend(vec![ | ||
| 0x0A, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x30, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,11 +65,14 @@ pub trait InternalRow { | |
| // /// Returns the decimal value at the given position | ||
| // fn get_decimal(&self, pos: usize, precision: usize, scale: usize) -> Decimal; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you please also support this method? |
||
|
|
||
| // /// Returns the timestamp value at the given position | ||
| // fn get_timestamp_ntz(&self, pos: usize, precision: usize) -> TimestampNtz; | ||
| /// Returns the date value at the given position (date as days since epoch) | ||
| fn get_date(&self, pos: usize) -> i32; | ||
|
|
||
| // /// Returns the timestamp value at the given position | ||
| // fn get_timestamp_ltz(&self, pos: usize, precision: usize) -> TimestampLtz; | ||
| /// Returns the timestamp value at the given position (timestamp without timezone) | ||
| fn get_timestamp_ntz(&self, pos: usize) -> i64; | ||
|
|
||
| /// Returns the timestamp value at the given position (timestamp with local timezone) | ||
| fn get_timestamp_ltz(&self, pos: usize) -> i64; | ||
|
|
||
| /// Returns the binary value at the given position with fixed length | ||
| fn get_binary(&self, pos: usize, length: usize) -> &[u8]; | ||
|
|
@@ -114,6 +117,30 @@ impl<'a> InternalRow for GenericRow<'a> { | |
| self.values.get(_pos).unwrap().try_into().unwrap() | ||
| } | ||
|
|
||
| fn get_date(&self, pos: usize) -> i32 { | ||
| match self.values.get(pos).unwrap() { | ||
| Datum::Date(d) => d.get_inner(), | ||
| Datum::Int32(i) => *i, | ||
| other => panic!("Expected Date or Int32 at pos {pos:?}, got {other:?}"), | ||
| } | ||
| } | ||
|
|
||
| fn get_timestamp_ntz(&self, pos: usize) -> i64 { | ||
| match self.values.get(pos).unwrap() { | ||
| Datum::Timestamp(t) => t.get_inner(), | ||
| Datum::Int64(i) => *i, | ||
| other => panic!("Expected Timestamp or Int64 at pos {pos:?}, got {other:?}"), | ||
| } | ||
| } | ||
|
|
||
| fn get_timestamp_ltz(&self, pos: usize) -> i64 { | ||
| match self.values.get(pos).unwrap() { | ||
| Datum::TimestampTz(t) => t.get_inner(), | ||
| Datum::Int64(i) => *i, | ||
| other => panic!("Expected TimestampTz or Int64 at pos {pos:?}, got {other:?}"), | ||
| } | ||
| } | ||
|
|
||
| fn get_float(&self, pos: usize) -> f32 { | ||
| self.values.get(pos).unwrap().try_into().unwrap() | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry for miss that, can we also support write_time in this pr?