Skip to content

Commit a0e8388

Browse files
committed
deps: drop 'bstr'
It's now just a dev dependency. It wasn't really carrying its weight.
1 parent 9e1126a commit a0e8388

File tree

4 files changed

+120
-14
lines changed

4 files changed

+120
-14
lines changed

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ license = "Unlicense/MIT"
1212
categories = ["encoding", "parser-implementations"]
1313
exclude = ["/.github", "/ci/*", "/scripts/*"]
1414
edition = "2021"
15+
resolver = "2"
1516

1617
[workspace]
1718
members = ["csv-core", "csv-index"]
@@ -20,13 +21,13 @@ members = ["csv-core", "csv-index"]
2021
bench = false
2122

2223
[dependencies]
23-
bstr = { version = "0.2.1", features = ["serde1"] }
24-
csv-core = { path = "csv-core", version = "0.1.6" }
24+
csv-core = { path = "csv-core", version = "0.1.10" }
2525
itoa = "1"
2626
ryu = "1"
2727
serde = "1.0.55"
2828

2929
[dev-dependencies]
30+
bstr = { version = "1.2.0", default-features = false, features = ["alloc", "serde"] }
3031
serde = { version = "1.0.55", features = ["derive"] }
3132

3233
[profile.release]

src/byte_record.rs

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@ use std::{
55
result,
66
};
77

8-
use {
9-
bstr::{BString, ByteSlice},
10-
serde::de::Deserialize,
11-
};
8+
use serde::de::Deserialize;
129

1310
use crate::{
1411
deserializer::deserialize_byte_record,
@@ -73,11 +70,12 @@ impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
7370

7471
impl fmt::Debug for ByteRecord {
7572
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76-
let mut fields = vec![];
77-
for field in self {
78-
fields.push(BString::from(field.to_vec()));
79-
}
80-
write!(f, "ByteRecord({:?})", fields)
73+
write!(f, "ByteRecord(")?;
74+
f.debug_list()
75+
.entries(self.iter().map(crate::debug::Bytes))
76+
.finish()?;
77+
write!(f, ")")?;
78+
Ok(())
8179
}
8280
}
8381

@@ -375,8 +373,8 @@ impl ByteRecord {
375373
let mut trimmed =
376374
ByteRecord::with_capacity(self.as_slice().len(), self.len());
377375
trimmed.set_position(self.position().cloned());
378-
for field in &*self {
379-
trimmed.push_field(field.trim());
376+
for field in self.iter() {
377+
trimmed.push_field(trim_ascii(field));
380378
}
381379
*self = trimmed;
382380
}
@@ -552,7 +550,7 @@ impl ByteRecord {
552550
// Otherwise, we must check each field individually to ensure that
553551
// it's valid UTF-8.
554552
for (i, field) in self.iter().enumerate() {
555-
if let Err(err) = field.to_str() {
553+
if let Err(err) = std::str::from_utf8(field) {
556554
return Err(new_utf8_error(i, err.valid_up_to()));
557555
}
558556
}
@@ -857,6 +855,32 @@ impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
857855
}
858856
}
859857

858+
fn trim_ascii(bytes: &[u8]) -> &[u8] {
859+
trim_ascii_start(trim_ascii_end(bytes))
860+
}
861+
862+
fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
863+
while let [first, rest @ ..] = bytes {
864+
if first.is_ascii_whitespace() {
865+
bytes = rest;
866+
} else {
867+
break;
868+
}
869+
}
870+
bytes
871+
}
872+
873+
fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
874+
while let [rest @ .., last] = bytes {
875+
if last.is_ascii_whitespace() {
876+
bytes = rest;
877+
} else {
878+
break;
879+
}
880+
}
881+
bytes
882+
}
883+
860884
#[cfg(test)]
861885
mod tests {
862886
use crate::string_record::StringRecord;

src/debug.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/// A type that provides a human readable debug impl for arbitrary bytes.
2+
///
3+
/// This generally works best when the bytes are presumed to be mostly UTF-8,
4+
/// but will work for anything.
5+
///
6+
/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
7+
pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
8+
9+
impl<'a> core::fmt::Debug for Bytes<'a> {
10+
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
11+
write!(f, "\"")?;
12+
// This is a sad re-implementation of a similar impl found in bstr.
13+
let mut bytes = self.0;
14+
while let Some(result) = utf8_decode(bytes) {
15+
let ch = match result {
16+
Ok(ch) => ch,
17+
Err(byte) => {
18+
write!(f, r"\x{:02x}", byte)?;
19+
bytes = &bytes[1..];
20+
continue;
21+
}
22+
};
23+
bytes = &bytes[ch.len_utf8()..];
24+
match ch {
25+
'\0' => write!(f, "\\0")?,
26+
// ASCII control characters except \0, \n, \r, \t
27+
'\x01'..='\x08'
28+
| '\x0b'
29+
| '\x0c'
30+
| '\x0e'..='\x19'
31+
| '\x7f' => {
32+
write!(f, "\\x{:02x}", u32::from(ch))?;
33+
}
34+
'\n' | '\r' | '\t' | _ => {
35+
write!(f, "{}", ch.escape_debug())?;
36+
}
37+
}
38+
}
39+
write!(f, "\"")?;
40+
Ok(())
41+
}
42+
}
43+
44+
/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
45+
///
46+
/// If no valid encoding of a codepoint exists at the beginning of the given
47+
/// byte slice, then the first byte is returned instead.
48+
///
49+
/// This returns `None` if and only if `bytes` is empty.
50+
pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
51+
fn len(byte: u8) -> Option<usize> {
52+
if byte <= 0x7F {
53+
return Some(1);
54+
} else if byte & 0b1100_0000 == 0b1000_0000 {
55+
return None;
56+
} else if byte <= 0b1101_1111 {
57+
Some(2)
58+
} else if byte <= 0b1110_1111 {
59+
Some(3)
60+
} else if byte <= 0b1111_0111 {
61+
Some(4)
62+
} else {
63+
None
64+
}
65+
}
66+
67+
if bytes.is_empty() {
68+
return None;
69+
}
70+
let len = match len(bytes[0]) {
71+
None => return Some(Err(bytes[0])),
72+
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
73+
Some(1) => return Some(Ok(char::from(bytes[0]))),
74+
Some(len) => len,
75+
};
76+
match core::str::from_utf8(&bytes[..len]) {
77+
Ok(s) => Some(Ok(s.chars().next().unwrap())),
78+
Err(_) => Some(Err(bytes[0])),
79+
}
80+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ pub use crate::{
164164

165165
mod byte_record;
166166
pub mod cookbook;
167+
mod debug;
167168
mod deserializer;
168169
mod error;
169170
mod reader;

0 commit comments

Comments
 (0)