Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions crates/integration-tests/src/tests/run_ephemeral_ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,3 +398,62 @@ fn test_run_ephemeral_dns_resolution() -> Result<()> {
Ok(())
}
integration_test!(test_run_ephemeral_dns_resolution);

/// Test SSH timeout behavior when SSH is unavailable
///
/// Verifies that ephemeral run-ssh properly times out when SSH is masked.
/// Uses systemd.mask=sshd.service to disable SSH, triggering the timeout mechanism.
///
/// Note: This tests the ephemeral timeout (~240s), not the libvirt SSH timeout (~60s).
/// The libvirt SSH timeout (60s) is used by `bcvk libvirt ssh` and would require
/// creating a libvirt VM to test properly.
fn test_run_ephemeral_ssh_timeout() -> Result<()> {
eprintln!("Testing SSH timeout with masked sshd.service...");
eprintln!("This test takes ~240 seconds to complete...");

let start = Instant::now();

let output = run_bcvk(&[
"ephemeral",
"run-ssh",
"--label",
INTEGRATION_TEST_LABEL,
"--karg",
"systemd.mask=sshd.service",
&get_test_image(),
"--",
"echo",
"should not reach here",
])?;

let elapsed = start.elapsed();

// Command should fail (SSH timeout)
assert!(
!output.success(),
"Expected ephemeral run-ssh to fail with SSH timeout, but it succeeded"
);

// Verify the error message mentions timeout or readiness failure
assert!(
output.stderr.contains("Timeout waiting for readiness")
|| output.stderr.contains("timeout")
|| output.stderr.contains("failed after"),
"Expected error about timeout, got: {}",
output.stderr
);

// Verify timeout duration is approximately 240 seconds (±20s tolerance for CI variability)
let timeout_secs = elapsed.as_secs();
assert!(
timeout_secs >= 220 && timeout_secs <= 260,
"Expected timeout around 240 seconds, but got {} seconds. \
This suggests the ephemeral SSH timeout may not be working correctly.",
timeout_secs
);

eprintln!("✓ SSH timeout worked correctly ({}s)", timeout_secs);

Ok(())
}
integration_test!(test_run_ephemeral_ssh_timeout);
193 changes: 121 additions & 72 deletions crates/kit/src/libvirt/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ use std::io::Write;
use std::os::unix::fs::PermissionsExt as _;
use std::os::unix::process::CommandExt;
use std::process::Command;
use std::time::{Duration, Instant};
use tempfile;
use tracing::debug;

// SSH retry configuration
const SSH_RETRY_TIMEOUT_SECS: u64 = 60; // Total time to retry SSH connections
const SSH_POLL_DELAY_SECS: u64 = 1; // Delay between SSH attempts
const SSH_SERVER_ALIVE_INTERVAL: u32 = 60; // Server alive interval in seconds

/// Configuration options for SSH connection to libvirt domain
#[derive(Debug, Parser)]
pub struct LibvirtSshOpts {
Expand All @@ -36,7 +42,7 @@ pub struct LibvirtSshOpts {
pub strict_host_keys: bool,

/// SSH connection timeout in seconds
#[clap(long, default_value = "30")]
#[clap(long, default_value = "5")]
pub timeout: u32,

/// SSH log level
Expand Down Expand Up @@ -236,8 +242,39 @@ impl LibvirtSshOpts {
Ok(temp_key)
}

/// Execute SSH connection to domain
fn connect_ssh(&self, ssh_config: &DomainSshConfig) -> Result<()> {
/// Build SSH command with configured options
fn build_ssh_command(
&self,
ssh_config: &DomainSshConfig,
temp_key: &tempfile::NamedTempFile,
parsed_extra_options: Vec<(String, String)>,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit functions should take slices not Vec

) -> Command {
let mut ssh_cmd = Command::new("ssh");
ssh_cmd
.arg("-i")
.arg(temp_key.path())
.arg("-p")
.arg(ssh_config.ssh_port.to_string());

let common_opts = crate::ssh::CommonSshOptions {
strict_host_keys: self.strict_host_keys,
connect_timeout: self.timeout,
server_alive_interval: SSH_SERVER_ALIVE_INTERVAL,
log_level: self.log_level.clone(),
extra_options: parsed_extra_options,
};
common_opts.apply_to_command(&mut ssh_cmd);
ssh_cmd.arg(format!("{}@127.0.0.1", self.user));

ssh_cmd
}

/// Execute SSH connection to domain with retries
fn connect_ssh(
&self,
_global_opts: &crate::libvirt::LibvirtOptions,
ssh_config: &DomainSshConfig,
) -> Result<()> {
debug!(
"Connecting to domain '{}' via SSH on port {} (user: {})",
self.domain_name, ssh_config.ssh_port, self.user
Expand All @@ -250,17 +287,7 @@ impl LibvirtSshOpts {
// Create temporary SSH key file
let temp_key = self.create_temp_ssh_key(ssh_config)?;

// Build SSH command
let mut ssh_cmd = Command::new("ssh");

// Add SSH key and port
ssh_cmd
.arg("-i")
.arg(temp_key.path())
.arg("-p")
.arg(ssh_config.ssh_port.to_string());

// Parse extra options from key=value format
// Parse extra options
let mut parsed_extra_options = Vec::new();
for option in &self.extra_options {
if let Some((key, value)) = option.split_once('=') {
Expand All @@ -273,76 +300,98 @@ impl LibvirtSshOpts {
}
}

// Apply common SSH options
let common_opts = crate::ssh::CommonSshOptions {
strict_host_keys: self.strict_host_keys,
connect_timeout: self.timeout,
server_alive_interval: 60,
log_level: self.log_level.clone(),
extra_options: parsed_extra_options,
};
common_opts.apply_to_command(&mut ssh_cmd);
let start_time = Instant::now();
let timeout = Duration::from_secs(SSH_RETRY_TIMEOUT_SECS);

// Target host
ssh_cmd.arg(format!("{}@127.0.0.1", self.user));
// First, do connectivity check with retries (for both interactive and command)
debug!("Testing SSH connectivity before session");

// Add command if specified - use the same argument escaping logic as container SSH
if !self.command.is_empty() {
ssh_cmd.arg("--");
if self.command.len() > 1 {
// Multiple arguments need proper shell escaping
let combined_command = crate::ssh::shell_escape_command(&self.command)
.map_err(|e| eyre!("Failed to escape shell command: {}", e))?;
debug!("Combined escaped command: {}", combined_command);
ssh_cmd.arg(combined_command);
} else {
// Single argument can be passed directly
ssh_cmd.args(&self.command);
// Create progress bar for user feedback (only shown in terminals)
let pb = crate::boot_progress::create_boot_progress_bar();
pb.set_message("Waiting for SSH to be ready...");

loop {
let mut test_cmd =
self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options.clone());
test_cmd.arg("--").arg("true"); // Simple test command

let output = test_cmd.output().context("Failed to spawn SSH command")?;

if output.status.success() {
debug!(
"SSH connectivity confirmed after {:.1}s",
start_time.elapsed().as_secs_f64()
);
pb.finish_and_clear();
break;
}
}

debug!("Executing SSH command: {:?}", ssh_cmd);
// Check if we've exceeded timeout
if start_time.elapsed() >= timeout {
pb.finish_and_clear();
if !self.suppress_output {
let stderr_str = String::from_utf8_lossy(&output.stderr);
eprint!("{}", stderr_str);
eprintln!(
"\nSSH connection failed after {:.1}s. To see VM console output, run: virsh console {}",
start_time.elapsed().as_secs_f64(),
self.domain_name
);
}
return Err(eyre!("SSH connection failed after timeout"));
}

// For commands (non-interactive SSH), capture output
// For interactive SSH (no command), exec to replace current process
if self.command.is_empty() {
// Interactive SSH - exec to replace the current process
// This provides the cleanest terminal experience
debug!("Executing interactive SSH session via exec");
std::thread::sleep(Duration::from_secs(SSH_POLL_DELAY_SECS));
}

// SSH is ready - now do the actual operation (oneshot)
if self.command.is_empty() {
// Interactive: exec directly
debug!("SSH ready, launching interactive session");
let mut ssh_cmd = self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options);
let error = ssh_cmd.exec();
// exec() only returns on error
return Err(eyre!("Failed to exec SSH command: {}", error));
}

// Command execution: single attempt since we already confirmed connectivity
debug!("SSH ready, executing command");
let mut ssh_cmd = self.build_ssh_command(ssh_config, &temp_key, parsed_extra_options);

// Add command
ssh_cmd.arg("--");
if self.command.len() > 1 {
let combined_command = crate::ssh::shell_escape_command(&self.command)
.map_err(|e| eyre!("Failed to escape shell command: {}", e))?;
ssh_cmd.arg(combined_command);
} else {
// Command execution - capture and forward output
let output = ssh_cmd
.output()
.map_err(|e| eyre!("Failed to execute SSH command: {}", e))?;
ssh_cmd.args(&self.command);
}

if !output.stdout.is_empty() {
if !self.suppress_output {
// Forward stdout to parent process
print!("{}", String::from_utf8_lossy(&output.stdout));
}
debug!("SSH stdout: {}", String::from_utf8_lossy(&output.stdout));
}
if !output.stderr.is_empty() {
if !self.suppress_output {
// Forward stderr to parent process
eprint!("{}", String::from_utf8_lossy(&output.stderr));
}
debug!("SSH stderr: {}", String::from_utf8_lossy(&output.stderr));
}
// Execute command
let output = ssh_cmd
.output()
.map_err(|e| eyre!("Failed to execute SSH command: {}", e))?;

if !output.status.success() {
return Err(eyre!(
"SSH connection failed with exit code: {}",
output.status.code().unwrap_or(-1)
));
if output.status.success() {
if !output.stdout.is_empty() && !self.suppress_output {
print!("{}", String::from_utf8_lossy(&output.stdout));
}
debug!(
"Command completed successfully after {:.1}s total",
start_time.elapsed().as_secs_f64()
);
return Ok(());
}

Ok(())
// Command failed
if !self.suppress_output {
let stderr_str = String::from_utf8_lossy(&output.stderr);
eprint!("{}", stderr_str);
}
Err(eyre!(
"SSH command failed with exit code: {:?}",
output.status.code()
))
}
}

Expand Down Expand Up @@ -377,8 +426,8 @@ pub fn run_ssh_impl(
// Extract SSH configuration from domain metadata
let ssh_config = opts.extract_ssh_config(global_opts)?;

// Connect via SSH
opts.connect_ssh(&ssh_config)?;
// Connect via SSH with retries
opts.connect_ssh(global_opts, &ssh_config)?;

Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions crates/kit/src/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ impl Default for CommonSshOptions {
fn default() -> Self {
Self {
strict_host_keys: false,
connect_timeout: 30,
connect_timeout: 1,
server_alive_interval: 60,
log_level: "ERROR".to_string(),
extra_options: vec![],
Expand Down Expand Up @@ -363,7 +363,7 @@ mod tests {
fn test_ssh_connection_options() {
// Test default options
let default_opts = SshConnectionOptions::default();
assert_eq!(default_opts.common.connect_timeout, 30);
assert_eq!(default_opts.common.connect_timeout, 1);
assert!(default_opts.allocate_tty);
assert_eq!(default_opts.common.log_level, "ERROR");
assert!(default_opts.common.extra_options.is_empty());
Expand Down