Skip to content

Commit a855a77

Browse files
committed
faster wc bytes
1 parent 6519e71 commit a855a77

File tree

1 file changed

+28
-27
lines changed

1 file changed

+28
-27
lines changed

text/wc.rs

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct CountInfo {
4242
words: usize,
4343
chars: usize,
4444
nl: usize,
45+
was_space: bool,
4546
}
4647

4748
impl CountInfo {
@@ -50,6 +51,7 @@ impl CountInfo {
5051
words: 0,
5152
chars: 0,
5253
nl: 0,
54+
was_space: true,
5355
}
5456
}
5557

@@ -60,6 +62,20 @@ impl CountInfo {
6062
}
6163
}
6264

65+
fn is_space(c: usize) -> bool {
66+
( c > 8 && c < 14) || (c == 32)
67+
}
68+
69+
fn create_table() -> [bool; 256] {
70+
let mut table = [false; 256];
71+
72+
for i in 0..256 {
73+
table[i] = is_space(i)
74+
}
75+
table
76+
}
77+
78+
6379
fn build_display_str(args: &Args, count: &CountInfo, filename: &OsStr) -> String {
6480
let mut output = String::with_capacity(filename.len() + (3 * 10));
6581

@@ -109,11 +125,11 @@ fn build_display_str(args: &Args, count: &CountInfo, filename: &OsStr) -> String
109125
output
110126
}
111127

112-
fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf) -> io::Result<()> {
128+
fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf, table: &[bool; 256]) -> io::Result<()> {
113129
let mut file = plib::io::input_stream(pathname, false)?;
114130

115131
let mut buffer = [0; plib::BUFSZ];
116-
let mut in_word = false;
132+
let mut was_space = count.was_space;
117133

118134
loop {
119135
let n_read = file.read(&mut buffer[..])?;
@@ -126,30 +142,13 @@ fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf) -> io::Result<()> {
126142
let bufslice = &buffer[0..n_read];
127143

128144
for ch_u8 in bufslice {
129-
let ch = *ch_u8 as char;
130-
131-
if ch == '\n' {
132-
count.nl = count.nl + 1;
133-
if in_word {
134-
in_word = false;
135-
count.words = count.words + 1;
136-
}
137-
} else if ch.is_whitespace() {
138-
if in_word {
139-
in_word = false;
140-
count.words = count.words + 1;
141-
}
142-
} else {
143-
if !in_word {
144-
in_word = true;
145-
}
146-
}
145+
let is_space = table[*ch_u8 as usize];
146+
count.nl += (ch_u8 == &10) as usize;
147+
count.words += (!is_space && was_space) as usize;
148+
was_space = is_space;
147149
}
148150
}
149-
150-
if in_word {
151-
count.words = count.words + 1;
152-
}
151+
count.was_space = was_space;
153152

154153
Ok(())
155154
}
@@ -196,11 +195,12 @@ fn wc_file(
196195
chars_mode: bool,
197196
pathname: &PathBuf,
198197
count: &mut CountInfo,
198+
table: &[bool; 256],
199199
) -> io::Result<()> {
200200
if chars_mode {
201201
wc_file_chars(args, count, pathname)?;
202202
} else {
203-
wc_file_bytes(count, pathname)?;
203+
wc_file_bytes(count, pathname, table)?;
204204
}
205205

206206
let output = build_display_str(args, count, pathname.as_os_str());
@@ -231,13 +231,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
231231
bind_textdomain_codeset(PROJECT_NAME, "UTF-8")?;
232232

233233
let mut exit_code = 0;
234+
let table = create_table();
234235
let mut totals = CountInfo::new();
235236

236237
// input via stdin
237238
if args.files.is_empty() {
238239
let mut count = CountInfo::new();
239240

240-
if let Err(e) = wc_file(&args, chars_mode, &PathBuf::new(), &mut count) {
241+
if let Err(e) = wc_file(&args, chars_mode, &PathBuf::new(), &mut count, &table) {
241242
exit_code = 1;
242243
eprintln!("stdin: {}", e);
243244
}
@@ -247,7 +248,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
247248
for filename in &args.files {
248249
let mut count = CountInfo::new();
249250

250-
if let Err(e) = wc_file(&args, chars_mode, filename, &mut count) {
251+
if let Err(e) = wc_file(&args, chars_mode, filename, &mut count, &table) {
251252
exit_code = 1;
252253
eprintln!("{}: {}", filename.display(), e);
253254
}

0 commit comments

Comments
 (0)