Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions awk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ libc.workspace = true
pest = "2.7"
pest_derive = "2.7"
lexical = { version = "6.1", features = ["format"] }
plib = { path = "../plib" }
rand = { version = "0.8", default-features = false, features = ["small_rng"] }

[dev-dependencies]
Expand Down
118 changes: 48 additions & 70 deletions awk/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,14 @@
// SPDX-License-Identifier: MIT
//

use plib::regex::{Match, Regex as PlibRegex, RegexFlags};
use std::ffi::CString;
use std::ptr;

fn regex_compilation_result(
status_integer: libc::c_int,
regex: &libc::regex_t,
) -> Result<(), String> {
if status_integer != 0 {
let mut error_buffer = vec![b'\0'; 128];
unsafe {
libc::regerror(
status_integer,
ptr::from_ref(regex),
error_buffer.as_mut_ptr() as *mut libc::c_char,
128,
)
};
let error = CString::from_vec_with_nul(error_buffer)
.expect("error message returned from `libc::regerror` is an invalid CString");
Err(error
.into_string()
.expect("error message from `libc::regerror' contains invalid utf-8"))
} else {
Ok(())
}
}

/// A regex wrapper that provides CString-compatible API for AWK.
/// Internally uses plib::regex for POSIX ERE support.
pub struct Regex {
raw_regex: libc::regex_t,
regex_string: CString,
inner: PlibRegex,
pattern_string: String,
}

#[cfg_attr(test, derive(Debug))]
Expand All @@ -46,95 +24,95 @@ pub struct RegexMatch {
pub end: usize,
}

impl From<Match> for RegexMatch {
fn from(m: Match) -> Self {
RegexMatch {
start: m.start,
end: m.end,
}
}
}

/// Iterator over regex matches in a string.
/// Owns the input CString to preserve lifetimes.
pub struct MatchIter<'re> {
string: CString,
// Store the string as owned String to avoid lifetime issues
string: String,
next_start: usize,
regex: &'re Regex,
}

impl Iterator for MatchIter<'_> {
type Item = RegexMatch;
fn next(&mut self) -> Option<Self::Item> {
if self.next_start >= self.string.as_bytes().len() {
return None;
}
let mut match_range = libc::regmatch_t {
rm_so: -1,
rm_eo: -1,
};
let exec_status = unsafe {
libc::regexec(
ptr::from_ref(&self.regex.raw_regex),
self.string.as_ptr().add(self.next_start),
1,
ptr::from_mut(&mut match_range),
0,
)
};
if exec_status == libc::REG_NOMATCH {
if self.next_start >= self.string.len() {
return None;
}

// Find match starting from current offset
let substring = &self.string[self.next_start..];
let m = self.regex.inner.find(substring)?;

let result = RegexMatch {
start: self.next_start + match_range.rm_so as usize,
end: self.next_start + match_range.rm_eo as usize,
start: self.next_start + m.start,
end: self.next_start + m.end,
};
self.next_start += match_range.rm_eo as usize;

// Move past this match for next iteration
// Ensure we make progress even on zero-width matches
self.next_start = if m.end > 0 {
self.next_start + m.end
} else {
self.next_start + 1
};

Some(result)
}
}

impl Regex {
pub fn new(regex: CString) -> Result<Self, String> {
let mut raw = unsafe { std::mem::zeroed::<libc::regex_t>() };
let compilation_status =
unsafe { libc::regcomp(ptr::from_mut(&mut raw), regex.as_ptr(), libc::REG_EXTENDED) };
regex_compilation_result(compilation_status, &raw)?;
let pattern = regex.to_str().map_err(|e| e.to_string())?;
let inner = PlibRegex::new(pattern, RegexFlags::ere()).map_err(|e| e.to_string())?;
Ok(Self {
raw_regex: raw,
regex_string: regex,
inner,
pattern_string: pattern.to_string(),
})
}

/// Returns an iterator over all match locations in the string.
/// Takes ownership of the CString.
pub fn match_locations(&self, string: CString) -> MatchIter {
let s = string.into_string().unwrap_or_default();
MatchIter {
next_start: 0,
regex: self,
string,
string: s,
}
}

pub fn matches(&self, string: &CString) -> bool {
let exec_status = unsafe {
libc::regexec(
ptr::from_ref(&self.raw_regex),
string.as_ptr(),
0,
ptr::null_mut(),
0,
)
};
exec_status != libc::REG_NOMATCH
let s = string.to_str().unwrap_or("");
self.inner.is_match(s)
}
}

impl Drop for Regex {
fn drop(&mut self) {
unsafe {
libc::regfree(ptr::from_mut(&mut self.raw_regex));
}
// plib::regex handles cleanup internally
}
}

#[cfg(test)]
impl core::fmt::Debug for Regex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
writeln!(f, "/{}/", self.regex_string.to_str().unwrap())
writeln!(f, "/{}/", self.pattern_string)
}
}

impl PartialEq for Regex {
fn eq(&self, other: &Self) -> bool {
self.regex_string == other.regex_string
self.pattern_string == other.pattern_string
}
}

Expand Down
1 change: 1 addition & 0 deletions display/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ clap.workspace = true
clap.features = ["env"]
gettext-rs.workspace = true
libc.workspace = true
plib = { path = "../plib" }
termion = "4.0"
thiserror = "1.0"

Expand Down
70 changes: 18 additions & 52 deletions display/more.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,15 @@

use clap::Parser;
use gettextrs::{bind_textdomain_codeset, gettext, setlocale, textdomain, LocaleCategory};
use libc::{
getegid, getgid, getuid, regcomp, regex_t, regexec, setgid, setuid, REG_ICASE, REG_NOMATCH,
};
use libc::{getegid, getgid, getuid, setgid, setuid};
use plib::regex::{Regex, RegexFlags};
use std::collections::HashMap;
use std::ffi::CString;
use std::fs::File;
use std::io::{stdout, BufRead, BufReader, Cursor, Read, Seek, SeekFrom, Write};
use std::ops::{Not, Range};
use std::os::fd::AsRawFd;
use std::path::PathBuf;
use std::process::{exit, ExitStatus};
use std::ptr;
use std::str::FromStr;
use std::sync::mpsc::{channel, Receiver, TryRecvError};
use std::sync::Mutex;
Expand Down Expand Up @@ -942,8 +939,8 @@ struct SourceContext {
/// Current search pattern
current_pattern: String,
/// Last search settings
last_search: Option<(regex_t, bool, Direction)>,
/// Storage for marks that were set durring current [`Source`] processing
last_search: Option<(Regex, bool, Direction)>,
/// Storage for marks that were set during current [`Source`] processing
marked_positions: HashMap<char, usize>,
/// Flag that [`true`] if input files count is more that 1
is_many_files: bool,
Expand Down Expand Up @@ -1206,7 +1203,7 @@ impl SourceContext {
pub fn search(
&mut self,
count: Option<usize>,
pattern: regex_t,
pattern: Regex,
is_not: bool,
direction: Direction,
) -> Result<(), MoreError> {
Expand All @@ -1222,21 +1219,10 @@ impl SourceContext {
Direction::Backward => haystack + &last_string,
};
}
let c_input = CString::new(haystack)
.map_err(|_| MoreError::StringParse(self.current_source.name()))?;
let has_match = unsafe {
regexec(
&pattern as *const regex_t,
c_input.as_ptr(),
0,
ptr::null_mut(),
0,
)
};
let has_match = if is_not {
has_match == REG_NOMATCH
!pattern.is_match(&haystack)
} else {
has_match != REG_NOMATCH
pattern.is_match(&haystack)
};
if has_match {
let Some((rows, _)) = self.terminal_size else {
Expand Down Expand Up @@ -1291,7 +1277,7 @@ impl SourceContext {
} else {
direction.clone()
};
self.search(count, *pattern, *is_not, direction)
self.search(count, pattern.clone(), *is_not, direction)
} else {
Err(MoreError::SourceContext(
SourceContextError::MissingLastSearch,
Expand Down Expand Up @@ -1647,39 +1633,19 @@ impl Prompt {
}
}

/// Compiles [`pattern`] as [`regex_t`]
fn compile_regex(pattern: String, ignore_case: bool) -> Result<regex_t, MoreError> {
#[cfg(target_os = "macos")]
let mut pattern = pattern.replace("\\\\", "\\");
#[cfg(all(unix, not(target_os = "macos")))]
/// Compiles [`pattern`] as a POSIX BRE regex
fn compile_regex(pattern: String, ignore_case: bool) -> Result<Regex, MoreError> {
// Normalize backslash escapes
let pattern = pattern.replace("\\\\", "\\");
let mut cflags = 0;
if ignore_case {
cflags |= REG_ICASE;
}

// macOS version of [regcomp](regcomp) from `libc` provides additional check
// for empty regex. In this case, an error
// [REG_EMPTY](https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/regcomp.3.html)
// will be returned. Therefore, an empty pattern is replaced with ".*".
#[cfg(target_os = "macos")]
{
pattern = if pattern == "" {
String::from(".*")
} else {
pattern
};
}

let c_pattern =
CString::new(pattern.clone()).map_err(|_| MoreError::StringParse(pattern.clone()))?;
let mut regex = unsafe { std::mem::zeroed::<regex_t>() };

if unsafe { regcomp(&mut regex, c_pattern.as_ptr(), cflags) } == 0 {
Ok(regex)
let flags = if ignore_case {
RegexFlags::bre().ignore_case()
} else {
Err(MoreError::StringParse(pattern))
}
RegexFlags::bre()
};

// plib::regex handles macOS empty pattern workaround internally
Regex::new(&pattern, flags).map_err(|_| MoreError::StringParse(pattern))
}

/// More state
Expand Down
Loading