Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1062,12 +1062,15 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
unsigned long duration = now - start_time;
u64 latency_ns = jiffies_to_nsecs(duration);
unsigned int bucket = diskstat_latency_bucket(latency_ns);

part_stat_lock();
update_io_ticks(bdev, now, true);
part_stat_inc(bdev, ios[sgrp]);
part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_add(bdev, nsecs[sgrp], latency_ns);
part_stat_latency_record(bdev, sgrp, now, bucket);
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
}
Expand Down
6 changes: 4 additions & 2 deletions block/blk-flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,13 @@ static void blk_flush_restore_request(struct request *rq)
static void blk_account_io_flush(struct request *rq)
{
struct block_device *part = rq->q->disk->part0;
u64 latency_ns = blk_time_get_ns() - rq->start_time_ns;
unsigned int bucket = diskstat_latency_bucket(latency_ns);

part_stat_lock();
part_stat_inc(part, ios[STAT_FLUSH]);
part_stat_add(part, nsecs[STAT_FLUSH],
blk_time_get_ns() - rq->start_time_ns);
part_stat_add(part, nsecs[STAT_FLUSH], latency_ns);
part_stat_latency_record(part, STAT_FLUSH, jiffies, bucket);
part_stat_unlock();
}

Expand Down
5 changes: 4 additions & 1 deletion block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1068,11 +1068,14 @@ static inline void blk_account_io_done(struct request *req, u64 now)
*/
if ((req->rq_flags & (RQF_IO_STAT|RQF_FLUSH_SEQ)) == RQF_IO_STAT) {
const int sgrp = op_stat_group(req_op(req));
u64 latency_ns = now - req->start_time_ns;
unsigned int bucket = diskstat_latency_bucket(latency_ns);

part_stat_lock();
update_io_ticks(req->part, jiffies, true);
part_stat_inc(req->part, ios[sgrp]);
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
part_stat_add(req->part, nsecs[sgrp], latency_ns);
part_stat_latency_record(req->part, sgrp, jiffies, bucket);
part_stat_local_dec(req->part,
in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
Expand Down
50 changes: 48 additions & 2 deletions block/genhd.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,23 +108,60 @@ static void part_stat_read_all(struct block_device *part,
struct disk_stats *stat)
{
int cpu;
u32 now_epoch = (u32)(jiffies / HZ);

memset(stat, 0, sizeof(struct disk_stats));
for_each_possible_cpu(cpu) {
struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
int group;
int slice;
int bucket;

for (group = 0; group < NR_STAT_GROUPS; group++) {
stat->nsecs[group] += ptr->nsecs[group];
stat->sectors[group] += ptr->sectors[group];
stat->ios[group] += ptr->ios[group];
stat->merges[group] += ptr->merges[group];

for (slice = 0; slice < NR_STAT_SLICES; slice++) {
u32 slice_epoch = READ_ONCE(ptr->latency_epoch[slice]);
s32 age = (s32)(now_epoch - slice_epoch);

if (age < 0 || age >= NR_STAT_SLICES)
continue;

for (bucket = 0; bucket < NR_STAT_BUCKETS; bucket++)
stat->latency[group][0][bucket] +=
ptr->latency[group][slice][bucket];
}
}

stat->io_ticks += ptr->io_ticks;
}
}

static u32 diskstat_p99_us(u32 buckets[NR_STAT_BUCKETS])
{
u32 total = 0;
u32 accum = 0;
u32 target;
int bucket;

for (bucket = 0; bucket < NR_STAT_BUCKETS; bucket++)
total += buckets[bucket];
if (!total)
return 0;

target = total - div_u64((u64)total, 100);
for (bucket = 0; bucket < NR_STAT_BUCKETS; bucket++) {
accum += buckets[bucket];
if (accum >= target)
return diskstat_latency_bucket_us(bucket);
}

return diskstat_latency_bucket_us(NR_STAT_BUCKETS - 1);
}

static void bdev_count_inflight_rw(struct block_device *part,
unsigned int inflight[2], bool mq_driver)
{
Expand Down Expand Up @@ -1078,7 +1115,8 @@ ssize_t part_stat_show(struct device *dev,
"%8lu %8lu %8llu %8u "
"%8u %8u %8u "
"%8lu %8lu %8llu %8u "
"%8lu %8u"
"%8lu %8u "
"%8u %8u %8u %8u"
"\n",
stat.ios[STAT_READ],
stat.merges[STAT_READ],
Expand All @@ -1100,7 +1138,11 @@ ssize_t part_stat_show(struct device *dev,
(unsigned long long)stat.sectors[STAT_DISCARD],
(unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
stat.ios[STAT_FLUSH],
(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC),
diskstat_p99_us(stat.latency[STAT_READ][0]),
diskstat_p99_us(stat.latency[STAT_WRITE][0]),
diskstat_p99_us(stat.latency[STAT_DISCARD][0]),
diskstat_p99_us(stat.latency[STAT_FLUSH][0]));
}

/*
Expand Down Expand Up @@ -1406,6 +1448,10 @@ static int diskstats_show(struct seq_file *seqf, void *v)
seq_put_decimal_ull(seqf, " ", stat.ios[STAT_FLUSH]);
seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
NSEC_PER_MSEC));
seq_put_decimal_ull(seqf, " ", diskstat_p99_us(stat.latency[STAT_READ][0]));
seq_put_decimal_ull(seqf, " ", diskstat_p99_us(stat.latency[STAT_WRITE][0]));
seq_put_decimal_ull(seqf, " ", diskstat_p99_us(stat.latency[STAT_DISCARD][0]));
seq_put_decimal_ull(seqf, " ", diskstat_p99_us(stat.latency[STAT_FLUSH][0]));
seq_putc(seqf, '\n');
}
rcu_read_unlock();
Expand Down
79 changes: 79 additions & 0 deletions include/linux/part_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,28 @@
#include <linux/blkdev.h>
#include <asm/local.h>

/*
* Diskstats latency histogram:
* - Bucket upper bounds are power-of-two in usecs, starting at DISK_LAT_BASE_USEC.
* - The last bucket is a saturation bucket for latencies >= DISK_LAT_MAX_USEC.
*
* Latency is tracked in NR_STAT_SLICES 1-second slices and
* summed to compute a NR_STAT_SLICES-second P99 latency.
*/
#define NR_STAT_BUCKETS 21
#define NR_STAT_SLICES 5
#define DISK_LAT_BASE_USEC 8U
#define DISK_LAT_MAX_USEC (DISK_LAT_BASE_USEC << (NR_STAT_BUCKETS - 1))

struct disk_stats {
u64 nsecs[NR_STAT_GROUPS];
unsigned long sectors[NR_STAT_GROUPS];
unsigned long ios[NR_STAT_GROUPS];
unsigned long merges[NR_STAT_GROUPS];
unsigned long io_ticks;
local_t in_flight[2];
u32 latency_epoch[NR_STAT_SLICES];
u32 latency[NR_STAT_GROUPS][NR_STAT_SLICES][NR_STAT_BUCKETS];
};

/*
Expand Down Expand Up @@ -81,4 +96,68 @@ static inline void part_stat_set_all(struct block_device *part, int value)

unsigned int bdev_count_inflight(struct block_device *part);

static inline unsigned int diskstat_latency_bucket(u64 latency_ns)
{
u64 latency_us = latency_ns / 1000;
u64 scaled;

if (latency_us <= DISK_LAT_BASE_USEC)
return 0;

if (latency_us >= DISK_LAT_MAX_USEC)
return NR_STAT_BUCKETS - 1;

scaled = div_u64(latency_us - 1, DISK_LAT_BASE_USEC);
return min_t(unsigned int, (unsigned int)fls64(scaled),
NR_STAT_BUCKETS - 1);
}

static inline u32 diskstat_latency_bucket_upper_us(unsigned int bucket)
{
if (bucket >= NR_STAT_BUCKETS - 1)
return DISK_LAT_MAX_USEC;
return DISK_LAT_BASE_USEC << bucket;
}

static inline u32 diskstat_latency_bucket_us(unsigned int bucket)
{
u32 high;
u32 low;

if (bucket >= NR_STAT_BUCKETS - 1)
return DISK_LAT_MAX_USEC;

high = diskstat_latency_bucket_upper_us(bucket);
low = high >> 1;
return low + (low >> 1);
}

static inline void __part_stat_latency_prepare(struct block_device *part,
u32 epoch, unsigned int slice)
{
struct disk_stats *stats = per_cpu_ptr(part->bd_stats, smp_processor_id());
int group;

if (likely(stats->latency_epoch[slice] == epoch))
return;

for (group = 0; group < NR_STAT_GROUPS; group++)
memset(stats->latency[group][slice], 0,
sizeof(stats->latency[group][slice]));
stats->latency_epoch[slice] = epoch;
}

static inline void part_stat_latency_record(struct block_device *part,
int sgrp, unsigned long now, unsigned int bucket)
{
u32 epoch = now / HZ;
unsigned int slice = epoch % NR_STAT_SLICES;

__part_stat_latency_prepare(part, epoch, slice);
if (bdev_is_partition(part))
__part_stat_latency_prepare(bdev_whole(part), epoch, slice);

part_stat_inc(part, latency[sgrp][slice][bucket]);
}

#endif /* _LINUX_PART_STAT_H */