Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 52 additions & 74 deletions text/asa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,12 @@
// file in the root directory of this project.
// SPDX-License-Identifier: MIT
//
// TODO:
// - fix correctness
// - add tests
//

use std::io::{self, BufRead};
use std::path::PathBuf;

use clap::Parser;
use gettextrs::{bind_textdomain_codeset, gettext, setlocale, textdomain, LocaleCategory};
use gettextrs::{bind_textdomain_codeset, setlocale, textdomain, LocaleCategory};
use plib::io::input_reader;

/// asa - interpret carriage-control characters
Expand All @@ -26,102 +22,84 @@ struct Args {
files: Vec<PathBuf>,
}

struct AsaState {
first_line: bool,
lines: Vec<String>,
}

impl Default for AsaState {
fn default() -> Self {
Self {
first_line: true,
lines: Default::default(),
}
}
}

impl AsaState {
fn push(&mut self, line: &str) {
self.lines.push(line.to_string());
if self.first_line {
self.first_line = false;
}
}

fn formfeed(&mut self) {
if !self.first_line {
print!("\x0c"); // formfeed
}
}

fn flush(&mut self) {
let mut nl = String::new();
for line in &self.lines {
print!("{}{}", nl, line);

// do not prefix with newline on first line
if nl.is_empty() {
nl = "\n".to_string();
}
}

self.lines.clear();
}
}

fn asa_file(pathname: &PathBuf) -> io::Result<()> {
let mut reader = input_reader(pathname, false)?;
let mut line_no: usize = 0;
let mut state = AsaState::default();
let mut first_line = true;
let mut had_output = false;

loop {
line_no += 1;

let mut raw_line = String::new();
let n_read = reader.read_line(&mut raw_line)?;
if n_read == 0 {
break;
}

if raw_line.len() < 2 {
eprintln!("{} {}", gettext("malformed line"), line_no);
continue;
}
// Get first character as control character
let ch = match raw_line.chars().next() {
Some(c) => c,
None => continue, // empty line shouldn't happen, but handle gracefully
};

let ch = raw_line.chars().next().unwrap();
// Extract line content: skip first char, exclude trailing newline
let line_end = if raw_line.ends_with('\n') {
raw_line.len() - 1
} else {
raw_line.len()
};
let line = if line_end > 1 {
&raw_line[1..line_end]
Comment on lines +49 to +50
Copy link

Copilot AI Nov 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The line extraction logic has two issues with multi-byte UTF-8 characters:

  1. It uses hardcoded byte index 1 and comparison line_end > 1, assuming the first character is always single-byte ASCII
  2. If a multi-byte UTF-8 character is encountered as the control character, raw_line[1..line_end] will panic with a "byte index is not a char boundary" error

While the ASA format typically uses ASCII control characters, the code should handle this gracefully. The fix should:

  • Replace 1 with ch.len_utf8() in the slice
  • Replace line_end > 1 with line_end > ch.len_utf8()

Example: let line = if line_end > ch.len_utf8() { &raw_line[ch.len_utf8()..line_end] } else { "" };

Suggested change
let line = if line_end > 1 {
&raw_line[1..line_end]
let line = if line_end > ch.len_utf8() {
&raw_line[ch.len_utf8()..line_end]

Copilot uses AI. Check for mistakes.
} else {
"" // control char only, no content
};

// exclude first char, and trailing newline
let mut line_len = raw_line.len() - 1;
if raw_line.ends_with('\n') {
line_len -= 1;
}
let line = &raw_line[1..line_len];
// POSIX: '+' as first character in input is equivalent to space
let effective_ch = if first_line && ch == '+' { ' ' } else { ch };

match ch {
match effective_ch {
'+' => {
state.push(line);
// Overprint: return to column 1 of current line
print!("\r{}", line);
}
'0' => {
state.flush();
// Double-space: newline before content (blank line)
if !first_line {
println!();
}
println!();
state.push(line);
print!("{}", line);
}
'-' => {
state.flush();
// Triple-space (non-POSIX extension): two blank lines before
if !first_line {
println!();
}
println!();
println!();
state.push(line);
print!("{}", line);
}
'1' => {
state.flush();
state.formfeed();
state.push(line);
// New page: form-feed
if !first_line {
println!();
}
print!("\x0c{}", line);
}
_ => {
state.flush();
state.push(line);
// Space and other chars: normal single-spaced output
if !first_line {
println!();
}
print!("{}", line);
}
};

first_line = false;
had_output = true;
}

// Final newline if we had any output
if had_output {
println!();
}

Ok(())
Expand Down
164 changes: 164 additions & 0 deletions text/tests/asa/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//
// Copyright (c) 2024 Hemi Labs, Inc.
//
// This file is part of the posixutils-rs project covered under
// the MIT License. For the full license text, please see the LICENSE
// file in the root directory of this project.
// SPDX-License-Identifier: MIT
//

use plib::testing::{run_test, TestPlan};

fn asa_test(test_data: &str, expected_output: &str) {
run_test(TestPlan {
cmd: String::from("asa"),
args: vec![],
stdin_data: String::from(test_data),
expected_out: String::from(expected_output),
expected_err: String::from(""),
expected_exit_code: 0,
});
}

// Test empty input
#[test]
fn asa_empty() {
asa_test("", "");
}

// Test basic space control character (normal single-spacing)
#[test]
fn asa_space_single_line() {
asa_test(" hello\n", "hello\n");
}

#[test]
fn asa_space_multiple_lines() {
asa_test(" line1\n line2\n line3\n", "line1\nline2\nline3\n");
}

// Test '0' control character (double-spacing - blank line before)
#[test]
fn asa_zero_first_line() {
// '0' as first line: outputs blank line, then content
asa_test("0hello\n", "\nhello\n");
}

#[test]
fn asa_zero_second_line() {
// '0' on second line: previous line, blank line, then content
asa_test(" line1\n0line2\n", "line1\n\nline2\n");
}

#[test]
fn asa_zero_multiple() {
asa_test("0first\n0second\n", "\nfirst\n\nsecond\n");
}

// Test '1' control character (form-feed/new page)
#[test]
fn asa_one_first_line() {
// '1' as first line: form-feed, then content
asa_test("1page1\n", "\x0cpage1\n");
}

#[test]
fn asa_one_second_line() {
// '1' on second line: previous line ends, form-feed, then content
asa_test(" line1\n1page2\n", "line1\n\x0cpage2\n");
}

// Test '+' control character (overprint - carriage return)
#[test]
fn asa_plus_overprint() {
// '+' causes overprint: carriage return instead of newline
asa_test(" line1\n+over\n", "line1\rover\n");
}

#[test]
fn asa_plus_multiple_overprint() {
// Multiple overprints on same logical line
asa_test(" base\n+mid\n+top\n", "base\rmid\rtop\n");
}

#[test]
fn asa_plus_first_line() {
// POSIX: '+' as first character in input is equivalent to space
asa_test("+first\n", "first\n");
}

#[test]
fn asa_plus_first_line_then_normal() {
// '+' as first, then normal lines
asa_test("+first\n line2\n", "first\nline2\n");
}

// Test '-' control character (triple-spacing - non-POSIX extension)
#[test]
fn asa_dash_first_line() {
asa_test("-content\n", "\n\ncontent\n");
}

#[test]
fn asa_dash_second_line() {
asa_test(" line1\n-line2\n", "line1\n\n\nline2\n");
}

// Test other/unknown control characters (treated as space)
#[test]
fn asa_other_char() {
// Unknown control chars treated as space (normal single-spacing)
asa_test("Xhello\n", "hello\n");
}

#[test]
fn asa_digit_as_control() {
// '2' is not a special control, treated as space
asa_test("2hello\n", "hello\n");
}

// Test empty content (control char only)
#[test]
fn asa_space_empty_content() {
asa_test(" \n", "\n");
}

#[test]
fn asa_zero_empty_content() {
asa_test("0\n", "\n\n");
}

// Test mixed control characters
#[test]
fn asa_mixed_controls() {
asa_test(
" line1\n0double\n1newpage\n+over\n line2\n",
"line1\n\ndouble\n\x0cnewpage\rover\nline2\n",
);
}

// Test content without trailing newline (EOF without newline)
#[test]
fn asa_no_trailing_newline() {
asa_test(" hello", "hello\n");
}

#[test]
fn asa_plus_no_trailing_newline() {
asa_test(" line1\n+over", "line1\rover\n");
}

// Test lines with only control character (no content, no newline)
#[test]
fn asa_control_only_no_newline() {
asa_test(" ", "\n");
}

// Test complex FORTRAN-style output simulation
#[test]
fn asa_fortran_style_report() {
// Simulate a simple FORTRAN report with page header and data
let input = "1REPORT TITLE\n \n DATA LINE 1\n DATA LINE 2\n0SECTION 2\n DATA LINE 3\n";
let expected = "\x0cREPORT TITLE\n\nDATA LINE 1\nDATA LINE 2\n\nSECTION 2\nDATA LINE 3\n";
asa_test(input, expected);
}
1 change: 1 addition & 0 deletions text/tests/text-tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// SPDX-License-Identifier: MIT
//

mod asa;
mod comm;
mod csplit;
mod cut;
Expand Down