From d8776d9a274314909289da5ace8d2191d500c01b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:23 +0800 Subject: [PATCH 1/8] blk-wbt: factor out a helper wbt_set_lat() To move implementation details inside blk-wbt.c, prepare to fix possible deadlock to call wbt_init() while queue is frozen in the next patch. Reviewed-by: Ming Lei Reviewed-by: Nilay Shroff Signed-off-by: Yu Kuai Reviewed-by: Hannes Reinecke --- block/blk-sysfs.c | 39 ++---------------------------------- block/blk-wbt.c | 50 ++++++++++++++++++++++++++++++++++++++++++++--- block/blk-wbt.h | 7 ++----- 3 files changed, 51 insertions(+), 45 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e0a70d26972b..a580688c3ad5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -636,11 +636,8 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page) static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, size_t count) { - struct request_queue *q = disk->queue; - struct rq_qos *rqos; ssize_t ret; s64 val; - unsigned int memflags; ret = queue_var_store64(&val, page); if (ret < 0) @@ -648,40 +645,8 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page, if (val < -1) return -EINVAL; - /* - * Ensure that the queue is idled, in case the latency update - * ends up either enabling or disabling wbt completely. We can't - * have IO inflight if that happens. - */ - memflags = blk_mq_freeze_queue(q); - - rqos = wbt_rq_qos(q); - if (!rqos) { - ret = wbt_init(disk); - if (ret) - goto out; - } - - ret = count; - if (val == -1) - val = wbt_default_latency_nsec(q); - else if (val >= 0) - val *= 1000ULL; - - if (wbt_get_min_lat(q) == val) - goto out; - - blk_mq_quiesce_queue(q); - - mutex_lock(&disk->rqos_state_mutex); - wbt_set_min_lat(q, val); - mutex_unlock(&disk->rqos_state_mutex); - - blk_mq_unquiesce_queue(q); -out: - blk_mq_unfreeze_queue(q, memflags); - - return ret; + ret = wbt_set_lat(disk, val); + return ret ? ret : count; } QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 0974875f77bd..abc2190689bb 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -93,6 +93,8 @@ struct rq_wb { struct rq_depth rq_depth; }; +static int wbt_init(struct gendisk *disk); + static inline struct rq_wb *RQWB(struct rq_qos *rqos) { return container_of(rqos, struct rq_wb, rqos); @@ -506,7 +508,7 @@ u64 wbt_get_min_lat(struct request_queue *q) return RQWB(rqos)->min_lat_nsec; } -void wbt_set_min_lat(struct request_queue *q, u64 val) +static void wbt_set_min_lat(struct request_queue *q, u64 val) { struct rq_qos *rqos = wbt_rq_qos(q); if (!rqos) @@ -741,7 +743,7 @@ void wbt_init_enable_default(struct gendisk *disk) WARN_ON_ONCE(wbt_init(disk)); } -u64 wbt_default_latency_nsec(struct request_queue *q) +static u64 wbt_default_latency_nsec(struct request_queue *q) { /* * We default to 2msec for non-rotational storage, and 75msec @@ -902,7 +904,7 @@ static const struct rq_qos_ops wbt_rqos_ops = { #endif }; -int wbt_init(struct gendisk *disk) +static int wbt_init(struct gendisk *disk) { struct request_queue *q = disk->queue; struct rq_wb *rwb; @@ -949,3 +951,45 @@ int wbt_init(struct gendisk *disk) return ret; } + +int wbt_set_lat(struct gendisk *disk, s64 val) +{ + struct request_queue *q = disk->queue; + unsigned int memflags; + struct rq_qos *rqos; + int ret = 0; + + /* + * Ensure that the queue is idled, in case the latency update + * ends up either enabling or disabling wbt completely. We can't + * have IO inflight if that happens. + */ + memflags = blk_mq_freeze_queue(q); + + rqos = wbt_rq_qos(q); + if (!rqos) { + ret = wbt_init(disk); + if (ret) + goto out; + } + + if (val == -1) + val = wbt_default_latency_nsec(q); + else if (val >= 0) + val *= 1000ULL; + + if (wbt_get_min_lat(q) == val) + goto out; + + blk_mq_quiesce_queue(q); + + mutex_lock(&disk->rqos_state_mutex); + wbt_set_min_lat(q, val); + mutex_unlock(&disk->rqos_state_mutex); + + blk_mq_unquiesce_queue(q); +out: + blk_mq_unfreeze_queue(q, memflags); + + return ret; +} diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 925f22475738..6e39da17218b 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -4,16 +4,13 @@ #ifdef CONFIG_BLK_WBT -int wbt_init(struct gendisk *disk); void wbt_init_enable_default(struct gendisk *disk); void wbt_disable_default(struct gendisk *disk); void wbt_enable_default(struct gendisk *disk); u64 wbt_get_min_lat(struct request_queue *q); -void wbt_set_min_lat(struct request_queue *q, u64 val); -bool wbt_disabled(struct request_queue *); - -u64 wbt_default_latency_nsec(struct request_queue *); +bool wbt_disabled(struct request_queue *q); +int wbt_set_lat(struct gendisk *disk, s64 val); #else From c211137cae0801502d1517337355236645104c00 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:24 +0800 Subject: [PATCH 2/8] blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter If wbt is disabled by default and user configures wbt by sysfs, queue will be frozen first and then pcpu_alloc_mutex will be held in blk_stat_alloc_callback(). Fix this problem by allocating memory first before queue frozen. Signed-off-by: Yu Kuai Reviewed-by: Nilay Shroff Reviewed-by: Ming Lei --- block/blk-wbt.c | 108 ++++++++++++++++++++++++++++-------------------- 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index abc2190689bb..9bef71ec645d 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -93,7 +93,7 @@ struct rq_wb { struct rq_depth rq_depth; }; -static int wbt_init(struct gendisk *disk); +static int wbt_init(struct gendisk *disk, struct rq_wb *rwb); static inline struct rq_wb *RQWB(struct rq_qos *rqos) { @@ -698,6 +698,41 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq) } } +static int wbt_data_dir(const struct request *rq) +{ + const enum req_op op = req_op(rq); + + if (op == REQ_OP_READ) + return READ; + else if (op_is_write(op)) + return WRITE; + + /* don't account */ + return -1; +} + +static struct rq_wb *wbt_alloc(void) +{ + struct rq_wb *rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); + + if (!rwb) + return NULL; + + rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); + if (!rwb->cb) { + kfree(rwb); + return NULL; + } + + return rwb; +} + +static void wbt_free(struct rq_wb *rwb) +{ + blk_stat_free_callback(rwb->cb); + kfree(rwb); +} + /* * Enable wbt if defaults are configured that way */ @@ -739,8 +774,17 @@ EXPORT_SYMBOL_GPL(wbt_enable_default); void wbt_init_enable_default(struct gendisk *disk) { - if (__wbt_enable_default(disk)) - WARN_ON_ONCE(wbt_init(disk)); + struct rq_wb *rwb; + + if (!__wbt_enable_default(disk)) + return; + + rwb = wbt_alloc(); + if (WARN_ON_ONCE(!rwb)) + return; + + if (WARN_ON_ONCE(wbt_init(disk, rwb))) + wbt_free(rwb); } static u64 wbt_default_latency_nsec(struct request_queue *q) @@ -755,19 +799,6 @@ static u64 wbt_default_latency_nsec(struct request_queue *q) return 75000000ULL; } -static int wbt_data_dir(const struct request *rq) -{ - const enum req_op op = req_op(rq); - - if (op == REQ_OP_READ) - return READ; - else if (op_is_write(op)) - return WRITE; - - /* don't account */ - return -1; -} - static void wbt_queue_depth_changed(struct rq_qos *rqos) { RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); @@ -779,8 +810,7 @@ static void wbt_exit(struct rq_qos *rqos) struct rq_wb *rwb = RQWB(rqos); blk_stat_remove_callback(rqos->disk->queue, rwb->cb); - blk_stat_free_callback(rwb->cb); - kfree(rwb); + wbt_free(rwb); } /* @@ -904,22 +934,11 @@ static const struct rq_qos_ops wbt_rqos_ops = { #endif }; -static int wbt_init(struct gendisk *disk) +static int wbt_init(struct gendisk *disk, struct rq_wb *rwb) { struct request_queue *q = disk->queue; - struct rq_wb *rwb; - int i; int ret; - - rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); - if (!rwb) - return -ENOMEM; - - rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); - if (!rwb->cb) { - kfree(rwb); - return -ENOMEM; - } + int i; for (i = 0; i < WBT_NUM_RWQ; i++) rq_wait_init(&rwb->rq_wait[i]); @@ -939,38 +958,38 @@ static int wbt_init(struct gendisk *disk) ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); mutex_unlock(&q->rq_qos_mutex); if (ret) - goto err_free; + return ret; blk_stat_add_callback(q, rwb->cb); - return 0; - -err_free: - blk_stat_free_callback(rwb->cb); - kfree(rwb); - return ret; - } int wbt_set_lat(struct gendisk *disk, s64 val) { struct request_queue *q = disk->queue; + struct rq_qos *rqos = wbt_rq_qos(q); + struct rq_wb *rwb = NULL; unsigned int memflags; - struct rq_qos *rqos; int ret = 0; + if (!rqos) { + rwb = wbt_alloc(); + if (!rwb) + return -ENOMEM; + } + /* * Ensure that the queue is idled, in case the latency update * ends up either enabling or disabling wbt completely. We can't * have IO inflight if that happens. */ memflags = blk_mq_freeze_queue(q); - - rqos = wbt_rq_qos(q); if (!rqos) { - ret = wbt_init(disk); - if (ret) + ret = wbt_init(disk, rwb); + if (ret) { + wbt_free(rwb); goto out; + } } if (val == -1) @@ -990,6 +1009,5 @@ int wbt_set_lat(struct gendisk *disk, s64 val) blk_mq_unquiesce_queue(q); out: blk_mq_unfreeze_queue(q, memflags); - return ret; } From 2af72e9f1a6f26942cde4391bfbe811cc0994251 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:25 +0800 Subject: [PATCH 3/8] blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos There is already a helper blk_mq_debugfs_register_rqos() to register one rqos, however this helper is called synchronously when the rqos is created with queue frozen. Prepare to fix possible deadlock to create blk-mq debugfs entries while queue is still frozen. Reviewed-by: Ming Lei Reviewed-by: Nilay Shroff Signed-off-by: Yu Kuai Reviewed-by: Hannes Reinecke --- block/blk-mq-debugfs.c | 23 +++++++++++++++-------- block/blk-mq-debugfs.h | 5 +++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4896525b1c05..4fe164b6d648 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -631,14 +631,7 @@ void blk_mq_debugfs_register(struct request_queue *q) blk_mq_debugfs_register_hctx(q, hctx); } - if (q->rq_qos) { - struct rq_qos *rqos = q->rq_qos; - - while (rqos) { - blk_mq_debugfs_register_rqos(rqos); - rqos = rqos->next; - } - } + blk_mq_debugfs_register_rq_qos(q); } static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, @@ -769,6 +762,20 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } +void blk_mq_debugfs_register_rq_qos(struct request_queue *q) +{ + lockdep_assert_held(&q->debugfs_mutex); + + if (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + + while (rqos) { + blk_mq_debugfs_register_rqos(rqos); + rqos = rqos->next; + } + } +} + void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index c80e453e3014..54948a266889 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -33,6 +33,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_register_rq_qos(struct request_queue *q); void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); #else @@ -78,6 +79,10 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { } +static inline void blk_mq_debugfs_register_rq_qos(struct request_queue *q) +{ +} + static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } From 195c7842dc50c9d49440342b982d4df2b84a5361 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:26 +0800 Subject: [PATCH 4/8] blk-rq-qos: fix possible debugfs_mutex deadlock Currently rq-qos debugfs entries are created from rq_qos_add(), while rq_qos_add() can be called while queue is still frozen. This can deadlock because creating new entries can trigger fs reclaim. Fix this problem by delaying creating rq-qos debugfs entries after queue is unfrozen. - For wbt, 1) it can be initialized by default, fix it by calling new helper after wbt_init() from wbt_init_enable_default(); 2) it can be initialized by sysfs, fix it by calling new helper after queue is unfrozen from wbt_set_lat(). - For iocost and iolatency, they can only be initialized by blkcg configuration, however, they don't have debugfs entries for now, hence they are not handled yet. Signed-off-by: Yu Kuai Reviewed-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke --- block/blk-rq-qos.c | 7 ------- block/blk-wbt.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 654478dfbc20..d7ce99ce2e80 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -347,13 +347,6 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); - - if (rqos->ops->debugfs_attrs) { - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_register_rqos(rqos); - mutex_unlock(&q->debugfs_mutex); - } - return 0; ebusy: blk_mq_unfreeze_queue(q, memflags); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 9bef71ec645d..de3528236545 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -774,6 +774,7 @@ EXPORT_SYMBOL_GPL(wbt_enable_default); void wbt_init_enable_default(struct gendisk *disk) { + struct request_queue *q = disk->queue; struct rq_wb *rwb; if (!__wbt_enable_default(disk)) @@ -783,8 +784,14 @@ void wbt_init_enable_default(struct gendisk *disk) if (WARN_ON_ONCE(!rwb)) return; - if (WARN_ON_ONCE(wbt_init(disk, rwb))) + if (WARN_ON_ONCE(wbt_init(disk, rwb))) { wbt_free(rwb); + return; + } + + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_register_rq_qos(q); + mutex_unlock(&q->debugfs_mutex); } static u64 wbt_default_latency_nsec(struct request_queue *q) @@ -1009,5 +1016,9 @@ int wbt_set_lat(struct gendisk *disk, s64 val) blk_mq_unquiesce_queue(q); out: blk_mq_unfreeze_queue(q, memflags); + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_register_rq_qos(q); + mutex_unlock(&q->debugfs_mutex); + return ret; } From 0f1778d8c7d02cb7d12588a4c707c4391e13d83d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:27 +0800 Subject: [PATCH 5/8] blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static Because it's only used inside blk-mq-debugfs.c now. Reviewed-by: Nilay Shroff Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke --- block/blk-mq-debugfs.c | 2 +- block/blk-mq-debugfs.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4fe164b6d648..11f00a868541 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -744,7 +744,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) rqos->debugfs_dir = NULL; } -void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) +static void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; const char *dir_name = rq_qos_id_to_name(rqos->id); diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 54948a266889..d94daa66556b 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -34,7 +34,6 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rq_qos(struct request_queue *q); -void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); #else static inline void blk_mq_debugfs_register(struct request_queue *q) @@ -75,10 +74,6 @@ static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hc { } -static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) -{ -} - static inline void blk_mq_debugfs_register_rq_qos(struct request_queue *q) { } From f823413b51d280c0b688b231f4e20fc426c4e006 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:28 +0800 Subject: [PATCH 6/8] blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos() Because this helper is only used by iocost and iolatency, while they don't have debugfs entries. Signed-off-by: Yu Kuai Reviewed-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke --- block/blk-mq-debugfs.c | 10 ---------- block/blk-mq-debugfs.h | 4 ---- block/blk-rq-qos.c | 4 ---- 3 files changed, 18 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 11f00a868541..22c182b40bc3 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -734,16 +734,6 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id) return "unknown"; } -void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) -{ - lockdep_assert_held(&rqos->disk->queue->debugfs_mutex); - - if (!rqos->disk->queue->debugfs_dir) - return; - debugfs_remove_recursive(rqos->debugfs_dir); - rqos->debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index d94daa66556b..49bb1aaa83dc 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -34,7 +34,6 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_register_rq_qos(struct request_queue *q); -void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { @@ -78,9 +77,6 @@ static inline void blk_mq_debugfs_register_rq_qos(struct request_queue *q) { } -static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) -{ -} #endif #if defined(CONFIG_BLK_DEV_ZONED) && defined(CONFIG_BLK_DEBUG_FS) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index d7ce99ce2e80..85cf74402a09 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -371,8 +371,4 @@ void rq_qos_del(struct rq_qos *rqos) if (!q->rq_qos) blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); - - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_unregister_rqos(rqos); - mutex_unlock(&q->debugfs_mutex); } From 59aae5737d0aac6b453658a4798389626504d890 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:29 +0800 Subject: [PATCH 7/8] blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs() In blk_mq_update_nr_hw_queues(), debugfs_mutex is not held while creating debugfs entries for hctxs. Hence add debugfs_mutex there, it's safe because queue is not frozen. Signed-off-by: Yu Kuai Reviewed-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke --- block/blk-mq-debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 22c182b40bc3..5c7cadf51a88 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -679,8 +679,10 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; + mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) From aef3ed7d93bfd7edae660e65799b071bfbc3dbbb Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 9 Jan 2026 14:52:30 +0800 Subject: [PATCH 8/8] blk-mq-debugfs: warn about possible deadlock Creating new debugfs entries can trigger fs reclaim, hence we can't do this with queue frozen, meanwhile, other locks that can be held while queue is frozen should not be held as well. Signed-off-by: Yu Kuai Reviewed-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke --- block/blk-mq-debugfs.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5c7cadf51a88..faeaa1fc86a7 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -608,9 +608,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {}, }; -static void debugfs_create_files(struct dentry *parent, void *data, +static void debugfs_create_files(struct request_queue *q, struct dentry *parent, + void *data, const struct blk_mq_debugfs_attr *attr) { + lockdep_assert_held(&q->debugfs_mutex); + /* + * Creating new debugfs entries with queue freezed has the risk of + * deadlock. + */ + WARN_ON_ONCE(q->mq_freeze_depth != 0); + /* + * debugfs_mutex should not be nested under other locks that can be + * grabbed while queue is frozen. + */ + lockdep_assert_not_held(&q->elevator_lock); + lockdep_assert_not_held(&q->rq_qos_mutex); + if (IS_ERR_OR_NULL(parent)) return; @@ -624,7 +638,7 @@ void blk_mq_debugfs_register(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); + debugfs_create_files(q, q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); queue_for_each_hw_ctx(q, hctx, i) { if (!hctx->debugfs_dir) @@ -643,7 +657,8 @@ static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, snprintf(name, sizeof(name), "cpu%u", ctx->cpu); ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir); - debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs); + debugfs_create_files(hctx->queue, ctx_dir, ctx, + blk_mq_debugfs_ctx_attrs); } void blk_mq_debugfs_register_hctx(struct request_queue *q, @@ -659,7 +674,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); - debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs); + debugfs_create_files(q, hctx->debugfs_dir, hctx, + blk_mq_debugfs_hctx_attrs); hctx_for_each_ctx(hctx, ctx, i) blk_mq_debugfs_register_ctx(hctx, ctx); @@ -712,7 +728,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir); - debugfs_create_files(q->sched_debugfs_dir, q, e->queue_debugfs_attrs); + debugfs_create_files(q, q->sched_debugfs_dir, q, e->queue_debugfs_attrs); } void blk_mq_debugfs_unregister_sched(struct request_queue *q) @@ -751,7 +767,8 @@ static void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) q->debugfs_dir); rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir); - debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); + debugfs_create_files(q, rqos->debugfs_dir, rqos, + rqos->ops->debugfs_attrs); } void blk_mq_debugfs_register_rq_qos(struct request_queue *q) @@ -788,7 +805,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, hctx->sched_debugfs_dir = debugfs_create_dir("sched", hctx->debugfs_dir); - debugfs_create_files(hctx->sched_debugfs_dir, hctx, + debugfs_create_files(q, hctx->sched_debugfs_dir, hctx, e->hctx_debugfs_attrs); }