Skip to content

Commit bab885c

Browse files
committed
[wc] count chars from bytes
1 parent baa0cba commit bab885c

File tree

1 file changed

+12
-46
lines changed

1 file changed

+12
-46
lines changed

text/wc.rs

Lines changed: 12 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use clap::Parser;
1111
use gettextrs::{bind_textdomain_codeset, setlocale, textdomain, LocaleCategory};
1212
use plib::PROJECT_NAME;
1313
use std::ffi::OsStr;
14-
use std::io::{self, BufRead, Read};
14+
use std::io::{self, Read};
1515
use std::path::PathBuf;
1616

1717
/// wc - word, line, and byte or character count
@@ -122,7 +122,7 @@ fn build_display_str(args: &Args, count: &CountInfo, filename: &OsStr) -> String
122122
output
123123
}
124124

125-
fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf, table: &[bool; 256]) -> io::Result<()> {
125+
fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf, table: &[bool; 256], chars_mode: bool) -> io::Result<()> {
126126
let mut file = plib::io::input_stream(pathname, false)?;
127127

128128
let mut buffer = [0; plib::BUFSZ];
@@ -134,10 +134,17 @@ fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf, table: &[bool; 256])
134134
break;
135135
}
136136

137-
count.chars = count.chars + n_read;
138-
139137
let bufslice = &buffer[0..n_read];
140138

139+
if !chars_mode {
140+
// number of bytes read
141+
count.chars = count.chars + n_read;
142+
} else {
143+
// number of UTF-8 unicode codepoints in this slice of bytes
144+
count.chars += bufslice.iter().filter(|&ch| (ch >> 6) != 0b10).count();
145+
}
146+
147+
141148
for ch_u8 in bufslice {
142149
let is_space = table[*ch_u8 as usize];
143150
count.nl += (ch_u8 == &10) as usize;
@@ -149,55 +156,14 @@ fn wc_file_bytes(count: &mut CountInfo, pathname: &PathBuf, table: &[bool; 256])
149156
Ok(())
150157
}
151158

152-
fn wc_file_chars(args: &Args, count: &mut CountInfo, pathname: &PathBuf) -> io::Result<()> {
153-
let mut reader = plib::io::input_reader(pathname, false)?;
154-
155-
loop {
156-
let mut buffer = String::new();
157-
let n_read = reader.read_line(&mut buffer)?;
158-
if n_read == 0 {
159-
break;
160-
}
161-
162-
count.nl = count.nl + 1;
163-
count.chars = count.chars + n_read;
164-
165-
if args.words {
166-
let mut in_word = false;
167-
168-
for ch in buffer.chars() {
169-
if ch.is_whitespace() {
170-
if in_word {
171-
in_word = false;
172-
count.words = count.words + 1;
173-
}
174-
} else {
175-
if !in_word {
176-
in_word = true;
177-
}
178-
}
179-
}
180-
if in_word {
181-
count.words = count.words + 1;
182-
}
183-
}
184-
}
185-
186-
Ok(())
187-
}
188-
189159
fn wc_file(
190160
args: &Args,
191161
chars_mode: bool,
192162
pathname: &PathBuf,
193163
count: &mut CountInfo,
194164
table: &[bool; 256],
195165
) -> io::Result<()> {
196-
if chars_mode {
197-
wc_file_chars(args, count, pathname)?;
198-
} else {
199-
wc_file_bytes(count, pathname, table)?;
200-
}
166+
wc_file_bytes(count, pathname, table, chars_mode)?;
201167

202168
let output = build_display_str(args, count, pathname.as_os_str());
203169

0 commit comments

Comments
 (0)