Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 72 additions & 21 deletions crates/audit/src/archiver.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use crate::metrics::Metrics;
use crate::reader::EventReader;
use crate::storage::EventWriter;
use crate::metrics::{
EventType, increment_events_processed, record_archive_event_duration, record_event_age,
record_kafka_commit_duration, record_kafka_read_duration,
};
use crate::reader::{EventReader, UserOpEventReader};
use crate::storage::{EventWriter, UserOpEventWriter};
use anyhow::Result;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use tokio::time::sleep;
Expand All @@ -13,7 +16,6 @@ where
{
reader: R,
writer: W,
metrics: Metrics,
}

impl<R, W> KafkaAuditArchiver<R, W>
Expand All @@ -22,11 +24,7 @@ where
W: EventWriter + Clone + Send + 'static,
{
pub fn new(reader: R, writer: W) -> Self {
Self {
reader,
writer,
metrics: Metrics::default(),
}
Self { reader, writer }
}

pub async fn run(&mut self) -> Result<()> {
Expand All @@ -36,39 +34,32 @@ where
let read_start = Instant::now();
match self.reader.read_event().await {
Ok(event) => {
self.metrics
.kafka_read_duration
.record(read_start.elapsed().as_secs_f64());
record_kafka_read_duration(read_start.elapsed(), EventType::Bundle);

let now_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
let event_age_ms = now_ms.saturating_sub(event.timestamp);
self.metrics.event_age.record(event_age_ms as f64);
record_event_age(event_age_ms as f64, EventType::Bundle);

// TODO: the integration test breaks because Minio doesn't support etag
let writer = self.writer.clone();
let metrics = self.metrics.clone();
tokio::spawn(async move {
let archive_start = Instant::now();
if let Err(e) = writer.archive_event(event).await {
error!(error = %e, "Failed to write event");
} else {
metrics
.archive_event_duration
.record(archive_start.elapsed().as_secs_f64());
metrics.events_processed.increment(1);
record_archive_event_duration(archive_start.elapsed(), EventType::Bundle);
increment_events_processed(EventType::Bundle);
}
});

let commit_start = Instant::now();
if let Err(e) = self.reader.commit().await {
error!(error = %e, "Failed to commit message");
}
self.metrics
.kafka_commit_duration
.record(commit_start.elapsed().as_secs_f64());
record_kafka_commit_duration(commit_start.elapsed(), EventType::Bundle);
}
Err(e) => {
error!(error = %e, "Error reading events");
Expand All @@ -78,3 +69,63 @@ where
}
}
}

pub struct KafkaUserOpAuditArchiver<R, W>
where
R: UserOpEventReader,
W: UserOpEventWriter + Clone + Send + 'static,
{
reader: R,
writer: W,
}

impl<R, W> KafkaUserOpAuditArchiver<R, W>
where
R: UserOpEventReader,
W: UserOpEventWriter + Clone + Send + 'static,
{
pub fn new(reader: R, writer: W) -> Self {
Self { reader, writer }
}

pub async fn run(&mut self) -> Result<()> {
info!("Starting Kafka UserOp archiver");

loop {
let read_start = Instant::now();
match self.reader.read_event().await {
Ok(event) => {
record_kafka_read_duration(read_start.elapsed(), EventType::UserOp);

let now_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as i64;
let event_age_ms = now_ms.saturating_sub(event.timestamp);
record_event_age(event_age_ms as f64, EventType::UserOp);

let writer = self.writer.clone();
tokio::spawn(async move {
let archive_start = Instant::now();
if let Err(e) = writer.archive_userop_event(event).await {
error!(error = %e, "Failed to write UserOp event");
} else {
record_archive_event_duration(archive_start.elapsed(), EventType::UserOp);
increment_events_processed(EventType::UserOp);
}
});

let commit_start = Instant::now();
if let Err(e) = self.reader.commit().await {
error!(error = %e, "Failed to commit message");
}
record_kafka_commit_duration(commit_start.elapsed(), EventType::UserOp);
}
Err(e) => {
error!(error = %e, "Error reading UserOp events");
sleep(Duration::from_secs(1)).await;
}
}
}
}
}
53 changes: 39 additions & 14 deletions crates/audit/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,49 @@
use metrics::{Counter, Histogram};
use metrics_derive::Metrics;
use std::time::Duration;

#[derive(Metrics, Clone)]
#[metrics(scope = "tips_audit")]
pub struct Metrics {
#[metric(describe = "Duration of archive_event")]
pub archive_event_duration: Histogram,
/// Event type tag for metrics differentiation
#[derive(Clone, Copy)]
pub enum EventType {
Bundle,
UserOp,
}

#[metric(describe = "Age of event when processed (now - event timestamp)")]
pub event_age: Histogram,
impl EventType {
pub fn as_str(&self) -> &'static str {
match self {
EventType::Bundle => "bundle",
EventType::UserOp => "userop",
}
}
}

#[metric(describe = "Duration of Kafka read_event")]
pub kafka_read_duration: Histogram,
pub fn record_archive_event_duration(duration: Duration, event_type: EventType) {
metrics::histogram!("tips_audit_archive_event_duration", "type" => event_type.as_str())
.record(duration.as_secs_f64());
}

pub fn record_event_age(age_ms: f64, event_type: EventType) {
metrics::histogram!("tips_audit_event_age", "type" => event_type.as_str()).record(age_ms);
}

#[metric(describe = "Duration of Kafka commit")]
pub kafka_commit_duration: Histogram,
pub fn record_kafka_read_duration(duration: Duration, event_type: EventType) {
metrics::histogram!("tips_audit_kafka_read_duration", "type" => event_type.as_str())
.record(duration.as_secs_f64());
}

pub fn record_kafka_commit_duration(duration: Duration, event_type: EventType) {
metrics::histogram!("tips_audit_kafka_commit_duration", "type" => event_type.as_str())
.record(duration.as_secs_f64());
}

pub fn increment_events_processed(event_type: EventType) {
metrics::counter!("tips_audit_events_processed", "type" => event_type.as_str()).increment(1);
}

#[derive(Metrics, Clone)]
#[metrics(scope = "tips_audit")]
pub struct Metrics {
#[metric(describe = "Duration of update_bundle_history")]
pub update_bundle_history_duration: Histogram,

Expand All @@ -28,9 +56,6 @@ pub struct Metrics {
#[metric(describe = "Duration of S3 put_object")]
pub s3_put_duration: Histogram,

#[metric(describe = "Total events processed")]
pub events_processed: Counter,

#[metric(describe = "Total S3 writes skipped due to dedup")]
pub s3_writes_skipped: Counter,
}
Loading