| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef BLK_MQ_H |
| 3 | #define BLK_MQ_H |
| 4 | |
| 5 | #include <linux/blkdev.h> |
| 6 | #include <linux/sbitmap.h> |
| 7 | #include <linux/lockdep.h> |
| 8 | #include <linux/scatterlist.h> |
| 9 | #include <linux/prefetch.h> |
| 10 | #include <linux/srcu.h> |
| 11 | #include <linux/rw_hint.h> |
| 12 | #include <linux/rwsem.h> |
| 13 | |
| 14 | struct blk_mq_tags; |
| 15 | struct blk_flush_queue; |
| 16 | |
| 17 | #define BLKDEV_MIN_RQ 4 |
| 18 | #define BLKDEV_DEFAULT_RQ 128 |
| 19 | |
| 20 | enum rq_end_io_ret { |
| 21 | RQ_END_IO_NONE, |
| 22 | RQ_END_IO_FREE, |
| 23 | }; |
| 24 | |
| 25 | typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); |
| 26 | |
| 27 | /* |
| 28 | * request flags */ |
| 29 | typedef __u32 __bitwise req_flags_t; |
| 30 | |
| 31 | /* Keep rqf_name[] in sync with the definitions below */ |
| 32 | enum rqf_flags { |
| 33 | /* drive already may have started this one */ |
| 34 | __RQF_STARTED, |
| 35 | /* request for flush sequence */ |
| 36 | __RQF_FLUSH_SEQ, |
| 37 | /* merge of different types, fail separately */ |
| 38 | __RQF_MIXED_MERGE, |
| 39 | /* don't call prep for this one */ |
| 40 | __RQF_DONTPREP, |
| 41 | /* use hctx->sched_tags */ |
| 42 | __RQF_SCHED_TAGS, |
| 43 | /* use an I/O scheduler for this request */ |
| 44 | __RQF_USE_SCHED, |
| 45 | /* vaguely specified driver internal error. Ignored by block layer */ |
| 46 | __RQF_FAILED, |
| 47 | /* don't warn about errors */ |
| 48 | __RQF_QUIET, |
| 49 | /* account into disk and partition IO statistics */ |
| 50 | __RQF_IO_STAT, |
| 51 | /* runtime pm request */ |
| 52 | __RQF_PM, |
| 53 | /* on IO scheduler merge hash */ |
| 54 | __RQF_HASHED, |
| 55 | /* track IO completion time */ |
| 56 | __RQF_STATS, |
| 57 | /* Look at ->special_vec for the actual data payload instead of the |
| 58 | bio chain. */ |
| 59 | __RQF_SPECIAL_PAYLOAD, |
| 60 | /* request completion needs to be signaled to zone write plugging. */ |
| 61 | __RQF_ZONE_WRITE_PLUGGING, |
| 62 | /* ->timeout has been called, don't expire again */ |
| 63 | __RQF_TIMED_OUT, |
| 64 | __RQF_RESV, |
| 65 | __RQF_BITS |
| 66 | }; |
| 67 | |
| 68 | #define RQF_STARTED ((__force req_flags_t)(1 << __RQF_STARTED)) |
| 69 | #define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << __RQF_FLUSH_SEQ)) |
| 70 | #define RQF_MIXED_MERGE ((__force req_flags_t)(1 << __RQF_MIXED_MERGE)) |
| 71 | #define RQF_DONTPREP ((__force req_flags_t)(1 << __RQF_DONTPREP)) |
| 72 | #define RQF_SCHED_TAGS ((__force req_flags_t)(1 << __RQF_SCHED_TAGS)) |
| 73 | #define RQF_USE_SCHED ((__force req_flags_t)(1 << __RQF_USE_SCHED)) |
| 74 | #define RQF_FAILED ((__force req_flags_t)(1 << __RQF_FAILED)) |
| 75 | #define RQF_QUIET ((__force req_flags_t)(1 << __RQF_QUIET)) |
| 76 | #define RQF_IO_STAT ((__force req_flags_t)(1 << __RQF_IO_STAT)) |
| 77 | #define RQF_PM ((__force req_flags_t)(1 << __RQF_PM)) |
| 78 | #define RQF_HASHED ((__force req_flags_t)(1 << __RQF_HASHED)) |
| 79 | #define RQF_STATS ((__force req_flags_t)(1 << __RQF_STATS)) |
| 80 | #define RQF_SPECIAL_PAYLOAD \ |
| 81 | ((__force req_flags_t)(1 << __RQF_SPECIAL_PAYLOAD)) |
| 82 | #define RQF_ZONE_WRITE_PLUGGING \ |
| 83 | ((__force req_flags_t)(1 << __RQF_ZONE_WRITE_PLUGGING)) |
| 84 | #define RQF_TIMED_OUT ((__force req_flags_t)(1 << __RQF_TIMED_OUT)) |
| 85 | #define RQF_RESV ((__force req_flags_t)(1 << __RQF_RESV)) |
| 86 | |
| 87 | /* flags that prevent us from merging requests: */ |
| 88 | #define RQF_NOMERGE_FLAGS \ |
| 89 | (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) |
| 90 | |
| 91 | enum mq_rq_state { |
| 92 | MQ_RQ_IDLE = 0, |
| 93 | MQ_RQ_IN_FLIGHT = 1, |
| 94 | MQ_RQ_COMPLETE = 2, |
| 95 | }; |
| 96 | |
| 97 | /* |
| 98 | * Try to put the fields that are referenced together in the same cacheline. |
| 99 | * |
| 100 | * If you modify this structure, make sure to update blk_rq_init() and |
| 101 | * especially blk_mq_rq_ctx_init() to take care of the added fields. |
| 102 | */ |
| 103 | struct request { |
| 104 | struct request_queue *q; |
| 105 | struct blk_mq_ctx *mq_ctx; |
| 106 | struct blk_mq_hw_ctx *mq_hctx; |
| 107 | |
| 108 | blk_opf_t cmd_flags; /* op and common flags */ |
| 109 | req_flags_t rq_flags; |
| 110 | |
| 111 | int tag; |
| 112 | int internal_tag; |
| 113 | |
| 114 | unsigned int timeout; |
| 115 | |
| 116 | /* the following two fields are internal, NEVER access directly */ |
| 117 | unsigned int __data_len; /* total data len */ |
| 118 | sector_t __sector; /* sector cursor */ |
| 119 | |
| 120 | struct bio *bio; |
| 121 | struct bio *biotail; |
| 122 | |
| 123 | union { |
| 124 | struct list_head queuelist; |
| 125 | struct request *rq_next; |
| 126 | }; |
| 127 | |
| 128 | struct block_device *part; |
| 129 | #ifdef CONFIG_BLK_RQ_ALLOC_TIME |
| 130 | /* Time that the first bio started allocating this request. */ |
| 131 | u64 alloc_time_ns; |
| 132 | #endif |
| 133 | /* Time that this request was allocated for this IO. */ |
| 134 | u64 start_time_ns; |
| 135 | /* Time that I/O was submitted to the device. */ |
| 136 | u64 io_start_time_ns; |
| 137 | |
| 138 | #ifdef CONFIG_BLK_WBT |
| 139 | unsigned short wbt_flags; |
| 140 | #endif |
| 141 | /* |
| 142 | * rq sectors used for blk stats. It has the same value |
| 143 | * with blk_rq_sectors(rq), except that it never be zeroed |
| 144 | * by completion. |
| 145 | */ |
| 146 | unsigned short stats_sectors; |
| 147 | |
| 148 | /* |
| 149 | * Number of scatter-gather DMA addr+len pairs after |
| 150 | * physical address coalescing is performed. |
| 151 | */ |
| 152 | unsigned short nr_phys_segments; |
| 153 | unsigned short nr_integrity_segments; |
| 154 | |
| 155 | /* |
| 156 | * The lowest set bit for address gaps between physical segments. This |
| 157 | * provides information necessary for dma optimization opprotunities, |
| 158 | * like for testing if the segments can be coalesced against the |
| 159 | * device's iommu granule. |
| 160 | */ |
| 161 | unsigned char phys_gap_bit; |
| 162 | |
| 163 | #ifdef CONFIG_BLK_INLINE_ENCRYPTION |
| 164 | struct bio_crypt_ctx *crypt_ctx; |
| 165 | struct blk_crypto_keyslot *crypt_keyslot; |
| 166 | #endif |
| 167 | |
| 168 | enum mq_rq_state state; |
| 169 | atomic_t ref; |
| 170 | |
| 171 | unsigned long deadline; |
| 172 | |
| 173 | /* |
| 174 | * The hash is used inside the scheduler, and killed once the |
| 175 | * request reaches the dispatch list. The ipi_list is only used |
| 176 | * to queue the request for softirq completion, which is long |
| 177 | * after the request has been unhashed (and even removed from |
| 178 | * the dispatch list). |
| 179 | */ |
| 180 | union { |
| 181 | struct hlist_node hash; /* merge hash */ |
| 182 | struct llist_node ipi_list; |
| 183 | }; |
| 184 | |
| 185 | /* |
| 186 | * The rb_node is only used inside the io scheduler, requests |
| 187 | * are pruned when moved to the dispatch queue. special_vec must |
| 188 | * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be |
| 189 | * insert into an IO scheduler. |
| 190 | */ |
| 191 | union { |
| 192 | struct rb_node rb_node; /* sort/lookup */ |
| 193 | struct bio_vec special_vec; |
| 194 | }; |
| 195 | |
| 196 | /* |
| 197 | * Three pointers are available for the IO schedulers, if they need |
| 198 | * more they have to dynamically allocate it. |
| 199 | */ |
| 200 | struct { |
| 201 | struct io_cq *icq; |
| 202 | void *priv[2]; |
| 203 | } elv; |
| 204 | |
| 205 | struct { |
| 206 | unsigned int seq; |
| 207 | rq_end_io_fn *saved_end_io; |
| 208 | } flush; |
| 209 | |
| 210 | u64 fifo_time; |
| 211 | |
| 212 | /* |
| 213 | * completion callback. |
| 214 | */ |
| 215 | rq_end_io_fn *end_io; |
| 216 | void *end_io_data; |
| 217 | }; |
| 218 | |
| 219 | /* |
| 220 | * Returns a mask with all bits starting at req->phys_gap_bit set to 1. |
| 221 | */ |
| 222 | static inline unsigned long req_phys_gap_mask(const struct request *req) |
| 223 | { |
| 224 | return ~(((1 << req->phys_gap_bit) >> 1) - 1); |
| 225 | } |
| 226 | |
| 227 | static inline enum req_op req_op(const struct request *req) |
| 228 | { |
| 229 | return req->cmd_flags & REQ_OP_MASK; |
| 230 | } |
| 231 | |
| 232 | static inline bool blk_rq_is_passthrough(struct request *rq) |
| 233 | { |
| 234 | return blk_op_is_passthrough(op: rq->cmd_flags); |
| 235 | } |
| 236 | |
| 237 | static inline unsigned short req_get_ioprio(struct request *req) |
| 238 | { |
| 239 | if (req->bio) |
| 240 | return req->bio->bi_ioprio; |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) |
| 245 | |
| 246 | #define rq_dma_dir(rq) \ |
| 247 | (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) |
| 248 | |
| 249 | static inline int rq_list_empty(const struct rq_list *rl) |
| 250 | { |
| 251 | return rl->head == NULL; |
| 252 | } |
| 253 | |
| 254 | static inline void rq_list_init(struct rq_list *rl) |
| 255 | { |
| 256 | rl->head = NULL; |
| 257 | rl->tail = NULL; |
| 258 | } |
| 259 | |
| 260 | static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq) |
| 261 | { |
| 262 | rq->rq_next = NULL; |
| 263 | if (rl->tail) |
| 264 | rl->tail->rq_next = rq; |
| 265 | else |
| 266 | rl->head = rq; |
| 267 | rl->tail = rq; |
| 268 | } |
| 269 | |
| 270 | static inline void rq_list_add_head(struct rq_list *rl, struct request *rq) |
| 271 | { |
| 272 | rq->rq_next = rl->head; |
| 273 | rl->head = rq; |
| 274 | if (!rl->tail) |
| 275 | rl->tail = rq; |
| 276 | } |
| 277 | |
| 278 | static inline struct request *rq_list_pop(struct rq_list *rl) |
| 279 | { |
| 280 | struct request *rq = rl->head; |
| 281 | |
| 282 | if (rq) { |
| 283 | rl->head = rl->head->rq_next; |
| 284 | if (!rl->head) |
| 285 | rl->tail = NULL; |
| 286 | rq->rq_next = NULL; |
| 287 | } |
| 288 | |
| 289 | return rq; |
| 290 | } |
| 291 | |
| 292 | static inline struct request *rq_list_peek(struct rq_list *rl) |
| 293 | { |
| 294 | return rl->head; |
| 295 | } |
| 296 | |
| 297 | #define rq_list_for_each(rl, pos) \ |
| 298 | for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next) |
| 299 | |
| 300 | #define rq_list_for_each_safe(rl, pos, nxt) \ |
| 301 | for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \ |
| 302 | pos; pos = nxt, nxt = pos ? pos->rq_next : NULL) |
| 303 | |
| 304 | /** |
| 305 | * enum blk_eh_timer_return - How the timeout handler should proceed |
| 306 | * @BLK_EH_DONE: The block driver completed the command or will complete it at |
| 307 | * a later time. |
| 308 | * @BLK_EH_RESET_TIMER: Reset the request timer and continue waiting for the |
| 309 | * request to complete. |
| 310 | */ |
| 311 | enum blk_eh_timer_return { |
| 312 | BLK_EH_DONE, |
| 313 | BLK_EH_RESET_TIMER, |
| 314 | }; |
| 315 | |
| 316 | /** |
| 317 | * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware |
| 318 | * block device |
| 319 | */ |
| 320 | struct blk_mq_hw_ctx { |
| 321 | struct { |
| 322 | /** @lock: Protects the dispatch list. */ |
| 323 | spinlock_t lock; |
| 324 | /** |
| 325 | * @dispatch: Used for requests that are ready to be |
| 326 | * dispatched to the hardware but for some reason (e.g. lack of |
| 327 | * resources) could not be sent to the hardware. As soon as the |
| 328 | * driver can send new requests, requests at this list will |
| 329 | * be sent first for a fairer dispatch. |
| 330 | */ |
| 331 | struct list_head dispatch; |
| 332 | /** |
| 333 | * @state: BLK_MQ_S_* flags. Defines the state of the hw |
| 334 | * queue (active, scheduled to restart, stopped). |
| 335 | */ |
| 336 | unsigned long state; |
| 337 | } ____cacheline_aligned_in_smp; |
| 338 | |
| 339 | /** |
| 340 | * @run_work: Used for scheduling a hardware queue run at a later time. |
| 341 | */ |
| 342 | struct delayed_work run_work; |
| 343 | /** @cpumask: Map of available CPUs where this hctx can run. */ |
| 344 | cpumask_var_t cpumask; |
| 345 | /** |
| 346 | * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU |
| 347 | * selection from @cpumask. |
| 348 | */ |
| 349 | int next_cpu; |
| 350 | /** |
| 351 | * @next_cpu_batch: Counter of how many works left in the batch before |
| 352 | * changing to the next CPU. |
| 353 | */ |
| 354 | int next_cpu_batch; |
| 355 | |
| 356 | /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ |
| 357 | unsigned long flags; |
| 358 | |
| 359 | /** |
| 360 | * @sched_data: Pointer owned by the IO scheduler attached to a request |
| 361 | * queue. It's up to the IO scheduler how to use this pointer. |
| 362 | */ |
| 363 | void *sched_data; |
| 364 | /** |
| 365 | * @queue: Pointer to the request queue that owns this hardware context. |
| 366 | */ |
| 367 | struct request_queue *queue; |
| 368 | /** @fq: Queue of requests that need to perform a flush operation. */ |
| 369 | struct blk_flush_queue *fq; |
| 370 | |
| 371 | /** |
| 372 | * @driver_data: Pointer to data owned by the block driver that created |
| 373 | * this hctx |
| 374 | */ |
| 375 | void *driver_data; |
| 376 | |
| 377 | /** |
| 378 | * @ctx_map: Bitmap for each software queue. If bit is on, there is a |
| 379 | * pending request in that software queue. |
| 380 | */ |
| 381 | struct sbitmap ctx_map; |
| 382 | |
| 383 | /** |
| 384 | * @dispatch_from: Software queue to be used when no scheduler was |
| 385 | * selected. |
| 386 | */ |
| 387 | struct blk_mq_ctx *dispatch_from; |
| 388 | /** |
| 389 | * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to |
| 390 | * decide if the hw_queue is busy using Exponential Weighted Moving |
| 391 | * Average algorithm. |
| 392 | */ |
| 393 | unsigned int dispatch_busy; |
| 394 | |
| 395 | /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ |
| 396 | unsigned short type; |
| 397 | /** @nr_ctx: Number of software queues. */ |
| 398 | unsigned short nr_ctx; |
| 399 | /** @ctxs: Array of software queues. */ |
| 400 | struct blk_mq_ctx **ctxs; |
| 401 | |
| 402 | /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ |
| 403 | spinlock_t dispatch_wait_lock; |
| 404 | /** |
| 405 | * @dispatch_wait: Waitqueue to put requests when there is no tag |
| 406 | * available at the moment, to wait for another try in the future. |
| 407 | */ |
| 408 | wait_queue_entry_t dispatch_wait; |
| 409 | |
| 410 | /** |
| 411 | * @wait_index: Index of next available dispatch_wait queue to insert |
| 412 | * requests. |
| 413 | */ |
| 414 | atomic_t wait_index; |
| 415 | |
| 416 | /** |
| 417 | * @tags: Tags owned by the block driver. A tag at this set is only |
| 418 | * assigned when a request is dispatched from a hardware queue. |
| 419 | */ |
| 420 | struct blk_mq_tags *tags; |
| 421 | /** |
| 422 | * @sched_tags: Tags owned by I/O scheduler. If there is an I/O |
| 423 | * scheduler associated with a request queue, a tag is assigned when |
| 424 | * that request is allocated. Else, this member is not used. |
| 425 | */ |
| 426 | struct blk_mq_tags *sched_tags; |
| 427 | |
| 428 | /** @numa_node: NUMA node the storage adapter has been connected to. */ |
| 429 | unsigned int numa_node; |
| 430 | /** @queue_num: Index of this hardware queue. */ |
| 431 | unsigned int queue_num; |
| 432 | |
| 433 | /** |
| 434 | * @nr_active: Number of active requests. Only used when a tag set is |
| 435 | * shared across request queues. |
| 436 | */ |
| 437 | atomic_t nr_active; |
| 438 | |
| 439 | /** @cpuhp_online: List to store request if CPU is going to die */ |
| 440 | struct hlist_node cpuhp_online; |
| 441 | /** @cpuhp_dead: List to store request if some CPU die. */ |
| 442 | struct hlist_node cpuhp_dead; |
| 443 | /** @kobj: Kernel object for sysfs. */ |
| 444 | struct kobject kobj; |
| 445 | |
| 446 | #ifdef CONFIG_BLK_DEBUG_FS |
| 447 | /** |
| 448 | * @debugfs_dir: debugfs directory for this hardware queue. Named |
| 449 | * as cpu<cpu_number>. |
| 450 | */ |
| 451 | struct dentry *debugfs_dir; |
| 452 | /** @sched_debugfs_dir: debugfs directory for the scheduler. */ |
| 453 | struct dentry *sched_debugfs_dir; |
| 454 | #endif |
| 455 | |
| 456 | /** |
| 457 | * @hctx_list: if this hctx is not in use, this is an entry in |
| 458 | * q->unused_hctx_list. |
| 459 | */ |
| 460 | struct list_head hctx_list; |
| 461 | }; |
| 462 | |
| 463 | /** |
| 464 | * struct blk_mq_queue_map - Map software queues to hardware queues |
| 465 | * @mq_map: CPU ID to hardware queue index map. This is an array |
| 466 | * with nr_cpu_ids elements. Each element has a value in the range |
| 467 | * [@queue_offset, @queue_offset + @nr_queues). |
| 468 | * @nr_queues: Number of hardware queues to map CPU IDs onto. |
| 469 | * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe |
| 470 | * driver to map each hardware queue type (enum hctx_type) onto a distinct |
| 471 | * set of hardware queues. |
| 472 | */ |
| 473 | struct blk_mq_queue_map { |
| 474 | unsigned int *mq_map; |
| 475 | unsigned int nr_queues; |
| 476 | unsigned int queue_offset; |
| 477 | }; |
| 478 | |
| 479 | /** |
| 480 | * enum hctx_type - Type of hardware queue |
| 481 | * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. |
| 482 | * @HCTX_TYPE_READ: Just for READ I/O. |
| 483 | * @HCTX_TYPE_POLL: Polled I/O of any kind. |
| 484 | * @HCTX_MAX_TYPES: Number of types of hctx. |
| 485 | */ |
| 486 | enum hctx_type { |
| 487 | HCTX_TYPE_DEFAULT, |
| 488 | HCTX_TYPE_READ, |
| 489 | HCTX_TYPE_POLL, |
| 490 | |
| 491 | HCTX_MAX_TYPES, |
| 492 | }; |
| 493 | |
| 494 | /** |
| 495 | * struct blk_mq_tag_set - tag set that can be shared between request queues |
| 496 | * @ops: Pointers to functions that implement block driver behavior. |
| 497 | * @map: One or more ctx -> hctx mappings. One map exists for each |
| 498 | * hardware queue type (enum hctx_type) that the driver wishes |
| 499 | * to support. There are no restrictions on maps being of the |
| 500 | * same size, and it's perfectly legal to share maps between |
| 501 | * types. |
| 502 | * @nr_maps: Number of elements in the @map array. A number in the range |
| 503 | * [1, HCTX_MAX_TYPES]. |
| 504 | * @nr_hw_queues: Number of hardware queues supported by the block driver that |
| 505 | * owns this data structure. |
| 506 | * @queue_depth: Number of tags per hardware queue, reserved tags included. |
| 507 | * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag |
| 508 | * allocations. |
| 509 | * @cmd_size: Number of additional bytes to allocate per request. The block |
| 510 | * driver owns these additional bytes. |
| 511 | * @numa_node: NUMA node the storage adapter has been connected to. |
| 512 | * @timeout: Request processing timeout in jiffies. |
| 513 | * @flags: Zero or more BLK_MQ_F_* flags. |
| 514 | * @driver_data: Pointer to data owned by the block driver that created this |
| 515 | * tag set. |
| 516 | * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues |
| 517 | * elements. |
| 518 | * @shared_tags: |
| 519 | * Shared set of tags. Has @nr_hw_queues elements. If set, |
| 520 | * shared by all @tags. |
| 521 | * @tag_list_lock: Serializes tag_list accesses. |
| 522 | * @tag_list: List of the request queues that use this tag set. See also |
| 523 | * request_queue.tag_set_list. |
| 524 | * @srcu: Use as lock when type of the request queue is blocking |
| 525 | * (BLK_MQ_F_BLOCKING). |
| 526 | * @tags_srcu: SRCU used to defer freeing of tags page_list to prevent |
| 527 | * use-after-free when iterating tags. |
| 528 | * @update_nr_hwq_lock: |
| 529 | * Synchronize updating nr_hw_queues with add/del disk & |
| 530 | * switching elevator. |
| 531 | */ |
| 532 | struct blk_mq_tag_set { |
| 533 | const struct blk_mq_ops *ops; |
| 534 | struct blk_mq_queue_map map[HCTX_MAX_TYPES]; |
| 535 | unsigned int nr_maps; |
| 536 | unsigned int nr_hw_queues; |
| 537 | unsigned int queue_depth; |
| 538 | unsigned int reserved_tags; |
| 539 | unsigned int cmd_size; |
| 540 | int numa_node; |
| 541 | unsigned int timeout; |
| 542 | unsigned int flags; |
| 543 | void *driver_data; |
| 544 | |
| 545 | struct blk_mq_tags **tags; |
| 546 | |
| 547 | struct blk_mq_tags *shared_tags; |
| 548 | |
| 549 | struct mutex tag_list_lock; |
| 550 | struct list_head tag_list; |
| 551 | struct srcu_struct *srcu; |
| 552 | struct srcu_struct tags_srcu; |
| 553 | |
| 554 | struct rw_semaphore update_nr_hwq_lock; |
| 555 | }; |
| 556 | |
| 557 | /** |
| 558 | * struct blk_mq_queue_data - Data about a request inserted in a queue |
| 559 | * |
| 560 | * @rq: Request pointer. |
| 561 | * @last: If it is the last request in the queue. |
| 562 | */ |
| 563 | struct blk_mq_queue_data { |
| 564 | struct request *rq; |
| 565 | bool last; |
| 566 | }; |
| 567 | |
| 568 | typedef bool (busy_tag_iter_fn)(struct request *, void *); |
| 569 | |
| 570 | /** |
| 571 | * struct blk_mq_ops - Callback functions that implements block driver |
| 572 | * behaviour. |
| 573 | */ |
| 574 | struct blk_mq_ops { |
| 575 | /** |
| 576 | * @queue_rq: Queue a new request from block IO. |
| 577 | */ |
| 578 | blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *, |
| 579 | const struct blk_mq_queue_data *); |
| 580 | |
| 581 | /** |
| 582 | * @commit_rqs: If a driver uses bd->last to judge when to submit |
| 583 | * requests to hardware, it must define this function. In case of errors |
| 584 | * that make us stop issuing further requests, this hook serves the |
| 585 | * purpose of kicking the hardware (which the last request otherwise |
| 586 | * would have done). |
| 587 | */ |
| 588 | void (*commit_rqs)(struct blk_mq_hw_ctx *); |
| 589 | |
| 590 | /** |
| 591 | * @queue_rqs: Queue a list of new requests. Driver is guaranteed |
| 592 | * that each request belongs to the same queue. If the driver doesn't |
| 593 | * empty the @rqlist completely, then the rest will be queued |
| 594 | * individually by the block layer upon return. |
| 595 | */ |
| 596 | void (*queue_rqs)(struct rq_list *rqlist); |
| 597 | |
| 598 | /** |
| 599 | * @get_budget: Reserve budget before queue request, once .queue_rq is |
| 600 | * run, it is driver's responsibility to release the |
| 601 | * reserved budget. Also we have to handle failure case |
| 602 | * of .get_budget for avoiding I/O deadlock. |
| 603 | */ |
| 604 | int (*get_budget)(struct request_queue *); |
| 605 | |
| 606 | /** |
| 607 | * @put_budget: Release the reserved budget. |
| 608 | */ |
| 609 | void (*put_budget)(struct request_queue *, int); |
| 610 | |
| 611 | /** |
| 612 | * @set_rq_budget_token: store rq's budget token |
| 613 | */ |
| 614 | void (*set_rq_budget_token)(struct request *, int); |
| 615 | /** |
| 616 | * @get_rq_budget_token: retrieve rq's budget token |
| 617 | */ |
| 618 | int (*get_rq_budget_token)(struct request *); |
| 619 | |
| 620 | /** |
| 621 | * @timeout: Called on request timeout. |
| 622 | */ |
| 623 | enum blk_eh_timer_return (*timeout)(struct request *); |
| 624 | |
| 625 | /** |
| 626 | * @poll: Called to poll for completion of a specific tag. |
| 627 | */ |
| 628 | int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); |
| 629 | |
| 630 | /** |
| 631 | * @complete: Mark the request as complete. |
| 632 | */ |
| 633 | void (*complete)(struct request *); |
| 634 | |
| 635 | /** |
| 636 | * @init_hctx: Called when the block layer side of a hardware queue has |
| 637 | * been set up, allowing the driver to allocate/init matching |
| 638 | * structures. |
| 639 | */ |
| 640 | int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int); |
| 641 | /** |
| 642 | * @exit_hctx: Ditto for exit/teardown. |
| 643 | */ |
| 644 | void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); |
| 645 | |
| 646 | /** |
| 647 | * @init_request: Called for every command allocated by the block layer |
| 648 | * to allow the driver to set up driver specific data. |
| 649 | * |
| 650 | * Tag greater than or equal to queue_depth is for setting up |
| 651 | * flush request. |
| 652 | */ |
| 653 | int (*init_request)(struct blk_mq_tag_set *set, struct request *, |
| 654 | unsigned int, unsigned int); |
| 655 | /** |
| 656 | * @exit_request: Ditto for exit/teardown. |
| 657 | */ |
| 658 | void (*exit_request)(struct blk_mq_tag_set *set, struct request *, |
| 659 | unsigned int); |
| 660 | |
| 661 | /** |
| 662 | * @cleanup_rq: Called before freeing one request which isn't completed |
| 663 | * yet, and usually for freeing the driver private data. |
| 664 | */ |
| 665 | void (*cleanup_rq)(struct request *); |
| 666 | |
| 667 | /** |
| 668 | * @busy: If set, returns whether or not this queue currently is busy. |
| 669 | */ |
| 670 | bool (*busy)(struct request_queue *); |
| 671 | |
| 672 | /** |
| 673 | * @map_queues: This allows drivers specify their own queue mapping by |
| 674 | * overriding the setup-time function that builds the mq_map. |
| 675 | */ |
| 676 | void (*map_queues)(struct blk_mq_tag_set *set); |
| 677 | |
| 678 | #ifdef CONFIG_BLK_DEBUG_FS |
| 679 | /** |
| 680 | * @show_rq: Used by the debugfs implementation to show driver-specific |
| 681 | * information about a request. |
| 682 | */ |
| 683 | void (*show_rq)(struct seq_file *m, struct request *rq); |
| 684 | #endif |
| 685 | }; |
| 686 | |
| 687 | /* Keep hctx_flag_name[] in sync with the definitions below */ |
| 688 | enum { |
| 689 | BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, |
| 690 | /* |
| 691 | * Set when this device requires underlying blk-mq device for |
| 692 | * completing IO: |
| 693 | */ |
| 694 | BLK_MQ_F_STACKING = 1 << 2, |
| 695 | BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, |
| 696 | BLK_MQ_F_BLOCKING = 1 << 4, |
| 697 | |
| 698 | /* |
| 699 | * Alloc tags on a round-robin base instead of the first available one. |
| 700 | */ |
| 701 | BLK_MQ_F_TAG_RR = 1 << 5, |
| 702 | |
| 703 | /* |
| 704 | * Select 'none' during queue registration in case of a single hwq |
| 705 | * or shared hwqs instead of 'mq-deadline'. |
| 706 | */ |
| 707 | BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, |
| 708 | |
| 709 | BLK_MQ_F_MAX = 1 << 7, |
| 710 | }; |
| 711 | |
| 712 | #define BLK_MQ_MAX_DEPTH (10240) |
| 713 | #define BLK_MQ_NO_HCTX_IDX (-1U) |
| 714 | |
| 715 | enum { |
| 716 | /* Keep hctx_state_name[] in sync with the definitions below */ |
| 717 | BLK_MQ_S_STOPPED, |
| 718 | BLK_MQ_S_TAG_ACTIVE, |
| 719 | BLK_MQ_S_SCHED_RESTART, |
| 720 | /* hw queue is inactive after all its CPUs become offline */ |
| 721 | BLK_MQ_S_INACTIVE, |
| 722 | BLK_MQ_S_MAX |
| 723 | }; |
| 724 | |
| 725 | struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, |
| 726 | struct queue_limits *lim, void *queuedata, |
| 727 | struct lock_class_key *lkclass); |
| 728 | #define blk_mq_alloc_disk(set, lim, queuedata) \ |
| 729 | ({ \ |
| 730 | static struct lock_class_key __key; \ |
| 731 | \ |
| 732 | __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ |
| 733 | }) |
| 734 | struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, |
| 735 | struct lock_class_key *lkclass); |
| 736 | struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, |
| 737 | struct queue_limits *lim, void *queuedata); |
| 738 | int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, |
| 739 | struct request_queue *q); |
| 740 | void blk_mq_destroy_queue(struct request_queue *); |
| 741 | |
| 742 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); |
| 743 | int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, |
| 744 | const struct blk_mq_ops *ops, unsigned int queue_depth, |
| 745 | unsigned int set_flags); |
| 746 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set); |
| 747 | |
| 748 | void blk_mq_free_request(struct request *rq); |
| 749 | int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, |
| 750 | unsigned int poll_flags); |
| 751 | |
| 752 | bool blk_mq_queue_inflight(struct request_queue *q); |
| 753 | |
| 754 | enum { |
| 755 | /* return when out of requests */ |
| 756 | BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), |
| 757 | /* allocate from reserved pool */ |
| 758 | BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), |
| 759 | /* set RQF_PM */ |
| 760 | BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), |
| 761 | }; |
| 762 | |
| 763 | struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, |
| 764 | blk_mq_req_flags_t flags); |
| 765 | struct request *blk_mq_alloc_request_hctx(struct request_queue *q, |
| 766 | blk_opf_t opf, blk_mq_req_flags_t flags, |
| 767 | unsigned int hctx_idx); |
| 768 | |
| 769 | /* |
| 770 | * Tag address space map. |
| 771 | */ |
| 772 | struct blk_mq_tags { |
| 773 | unsigned int nr_tags; |
| 774 | unsigned int nr_reserved_tags; |
| 775 | unsigned int active_queues; |
| 776 | |
| 777 | struct sbitmap_queue bitmap_tags; |
| 778 | struct sbitmap_queue breserved_tags; |
| 779 | |
| 780 | struct request **rqs; |
| 781 | struct request **static_rqs; |
| 782 | struct list_head page_list; |
| 783 | |
| 784 | /* |
| 785 | * used to clear request reference in rqs[] before freeing one |
| 786 | * request pool |
| 787 | */ |
| 788 | spinlock_t lock; |
| 789 | struct rcu_head rcu_head; |
| 790 | }; |
| 791 | |
| 792 | static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, |
| 793 | unsigned int tag) |
| 794 | { |
| 795 | if (tag < tags->nr_tags) { |
| 796 | prefetch(tags->rqs[tag]); |
| 797 | return tags->rqs[tag]; |
| 798 | } |
| 799 | |
| 800 | return NULL; |
| 801 | } |
| 802 | |
| 803 | enum { |
| 804 | BLK_MQ_UNIQUE_TAG_BITS = 16, |
| 805 | BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1, |
| 806 | }; |
| 807 | |
| 808 | u32 blk_mq_unique_tag(struct request *rq); |
| 809 | |
| 810 | static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag) |
| 811 | { |
| 812 | return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS; |
| 813 | } |
| 814 | |
| 815 | static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) |
| 816 | { |
| 817 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; |
| 818 | } |
| 819 | |
| 820 | /** |
| 821 | * blk_mq_rq_state() - read the current MQ_RQ_* state of a request |
| 822 | * @rq: target request. |
| 823 | */ |
| 824 | static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) |
| 825 | { |
| 826 | return READ_ONCE(rq->state); |
| 827 | } |
| 828 | |
| 829 | static inline int blk_mq_request_started(struct request *rq) |
| 830 | { |
| 831 | return blk_mq_rq_state(rq) != MQ_RQ_IDLE; |
| 832 | } |
| 833 | |
| 834 | static inline int blk_mq_request_completed(struct request *rq) |
| 835 | { |
| 836 | return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; |
| 837 | } |
| 838 | |
| 839 | /* |
| 840 | * |
| 841 | * Set the state to complete when completing a request from inside ->queue_rq. |
| 842 | * This is used by drivers that want to ensure special complete actions that |
| 843 | * need access to the request are called on failure, e.g. by nvme for |
| 844 | * multipathing. |
| 845 | */ |
| 846 | static inline void blk_mq_set_request_complete(struct request *rq) |
| 847 | { |
| 848 | WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); |
| 849 | } |
| 850 | |
| 851 | /* |
| 852 | * Complete the request directly instead of deferring it to softirq or |
| 853 | * completing it another CPU. Useful in preemptible instead of an interrupt. |
| 854 | */ |
| 855 | static inline void blk_mq_complete_request_direct(struct request *rq, |
| 856 | void (*complete)(struct request *rq)) |
| 857 | { |
| 858 | WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); |
| 859 | complete(rq); |
| 860 | } |
| 861 | |
| 862 | void blk_mq_start_request(struct request *rq); |
| 863 | void blk_mq_end_request(struct request *rq, blk_status_t error); |
| 864 | void __blk_mq_end_request(struct request *rq, blk_status_t error); |
| 865 | void blk_mq_end_request_batch(struct io_comp_batch *ib); |
| 866 | |
| 867 | /* |
| 868 | * Only need start/end time stamping if we have iostat or |
| 869 | * blk stats enabled, or using an IO scheduler. |
| 870 | */ |
| 871 | static inline bool blk_mq_need_time_stamp(struct request *rq) |
| 872 | { |
| 873 | return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); |
| 874 | } |
| 875 | |
| 876 | static inline bool blk_mq_is_reserved_rq(struct request *rq) |
| 877 | { |
| 878 | return rq->rq_flags & RQF_RESV; |
| 879 | } |
| 880 | |
| 881 | /** |
| 882 | * blk_mq_add_to_batch() - add a request to the completion batch |
| 883 | * @req: The request to add to batch |
| 884 | * @iob: The batch to add the request |
| 885 | * @is_error: Specify true if the request failed with an error |
| 886 | * @complete: The completaion handler for the request |
| 887 | * |
| 888 | * Batched completions only work when there is no I/O error and no special |
| 889 | * ->end_io handler. |
| 890 | * |
| 891 | * Return: true when the request was added to the batch, otherwise false |
| 892 | */ |
| 893 | static inline bool blk_mq_add_to_batch(struct request *req, |
| 894 | struct io_comp_batch *iob, bool is_error, |
| 895 | void (*complete)(struct io_comp_batch *)) |
| 896 | { |
| 897 | /* |
| 898 | * Check various conditions that exclude batch processing: |
| 899 | * 1) No batch container |
| 900 | * 2) Has scheduler data attached |
| 901 | * 3) Not a passthrough request and end_io set |
| 902 | * 4) Not a passthrough request and failed with an error |
| 903 | */ |
| 904 | if (!iob) |
| 905 | return false; |
| 906 | if (req->rq_flags & RQF_SCHED_TAGS) |
| 907 | return false; |
| 908 | if (!blk_rq_is_passthrough(rq: req)) { |
| 909 | if (req->end_io) |
| 910 | return false; |
| 911 | if (is_error) |
| 912 | return false; |
| 913 | } |
| 914 | |
| 915 | if (!iob->complete) |
| 916 | iob->complete = complete; |
| 917 | else if (iob->complete != complete) |
| 918 | return false; |
| 919 | iob->need_ts |= blk_mq_need_time_stamp(rq: req); |
| 920 | rq_list_add_tail(rl: &iob->req_list, rq: req); |
| 921 | return true; |
| 922 | } |
| 923 | |
| 924 | void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); |
| 925 | void blk_mq_kick_requeue_list(struct request_queue *q); |
| 926 | void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); |
| 927 | void blk_mq_complete_request(struct request *rq); |
| 928 | bool blk_mq_complete_request_remote(struct request *rq); |
| 929 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 930 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 931 | void blk_mq_stop_hw_queues(struct request_queue *q); |
| 932 | void blk_mq_start_hw_queues(struct request_queue *q); |
| 933 | void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
| 934 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); |
| 935 | void blk_mq_quiesce_queue(struct request_queue *q); |
| 936 | void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set); |
| 937 | void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set); |
| 938 | void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set); |
| 939 | void blk_mq_unquiesce_queue(struct request_queue *q); |
| 940 | void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); |
| 941 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
| 942 | void blk_mq_run_hw_queues(struct request_queue *q, bool async); |
| 943 | void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); |
| 944 | void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, |
| 945 | busy_tag_iter_fn *fn, void *priv); |
| 946 | void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); |
| 947 | void blk_mq_freeze_queue_nomemsave(struct request_queue *q); |
| 948 | void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); |
| 949 | static inline unsigned int __must_check |
| 950 | blk_mq_freeze_queue(struct request_queue *q) |
| 951 | { |
| 952 | unsigned int memflags = memalloc_noio_save(); |
| 953 | |
| 954 | blk_mq_freeze_queue_nomemsave(q); |
| 955 | return memflags; |
| 956 | } |
| 957 | static inline void |
| 958 | blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) |
| 959 | { |
| 960 | blk_mq_unfreeze_queue_nomemrestore(q); |
| 961 | memalloc_noio_restore(flags: memflags); |
| 962 | } |
| 963 | void blk_freeze_queue_start(struct request_queue *q); |
| 964 | void blk_mq_freeze_queue_wait(struct request_queue *q); |
| 965 | int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, |
| 966 | unsigned long timeout); |
| 967 | void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); |
| 968 | void blk_freeze_queue_start_non_owner(struct request_queue *q); |
| 969 | |
| 970 | unsigned int blk_mq_num_possible_queues(unsigned int max_queues); |
| 971 | unsigned int blk_mq_num_online_queues(unsigned int max_queues); |
| 972 | void blk_mq_map_queues(struct blk_mq_queue_map *qmap); |
| 973 | void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, |
| 974 | struct device *dev, unsigned int offset); |
| 975 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); |
| 976 | |
| 977 | void blk_mq_quiesce_queue_nowait(struct request_queue *q); |
| 978 | |
| 979 | unsigned int blk_mq_rq_cpu(struct request *rq); |
| 980 | |
| 981 | bool __blk_should_fake_timeout(struct request_queue *q); |
| 982 | static inline bool blk_should_fake_timeout(struct request_queue *q) |
| 983 | { |
| 984 | if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) && |
| 985 | test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) |
| 986 | return __blk_should_fake_timeout(q); |
| 987 | return false; |
| 988 | } |
| 989 | |
| 990 | /** |
| 991 | * blk_mq_rq_from_pdu - cast a PDU to a request |
| 992 | * @pdu: the PDU (Protocol Data Unit) to be casted |
| 993 | * |
| 994 | * Return: request |
| 995 | * |
| 996 | * Driver command data is immediately after the request. So subtract request |
| 997 | * size to get back to the original request. |
| 998 | */ |
| 999 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) |
| 1000 | { |
| 1001 | return pdu - sizeof(struct request); |
| 1002 | } |
| 1003 | |
| 1004 | /** |
| 1005 | * blk_mq_rq_to_pdu - cast a request to a PDU |
| 1006 | * @rq: the request to be casted |
| 1007 | * |
| 1008 | * Return: pointer to the PDU |
| 1009 | * |
| 1010 | * Driver command data is immediately after the request. So add request to get |
| 1011 | * the PDU. |
| 1012 | */ |
| 1013 | static inline void *blk_mq_rq_to_pdu(struct request *rq) |
| 1014 | { |
| 1015 | return rq + 1; |
| 1016 | } |
| 1017 | |
| 1018 | static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id) |
| 1019 | { |
| 1020 | struct blk_mq_hw_ctx *hctx; |
| 1021 | |
| 1022 | rcu_read_lock(); |
| 1023 | hctx = rcu_dereference(q->queue_hw_ctx)[id]; |
| 1024 | rcu_read_unlock(); |
| 1025 | |
| 1026 | return hctx; |
| 1027 | } |
| 1028 | |
| 1029 | #define queue_for_each_hw_ctx(q, hctx, i) \ |
| 1030 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ |
| 1031 | ({ hctx = queue_hctx((q), i); 1; }); (i)++) |
| 1032 | |
| 1033 | #define hctx_for_each_ctx(hctx, ctx, i) \ |
| 1034 | for ((i) = 0; (i) < (hctx)->nr_ctx && \ |
| 1035 | ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) |
| 1036 | |
| 1037 | static inline void blk_mq_cleanup_rq(struct request *rq) |
| 1038 | { |
| 1039 | if (rq->q->mq_ops->cleanup_rq) |
| 1040 | rq->q->mq_ops->cleanup_rq(rq); |
| 1041 | } |
| 1042 | |
| 1043 | void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, |
| 1044 | struct lock_class_key *key); |
| 1045 | |
| 1046 | static inline bool rq_is_sync(struct request *rq) |
| 1047 | { |
| 1048 | return op_is_sync(op: rq->cmd_flags); |
| 1049 | } |
| 1050 | |
| 1051 | void blk_rq_init(struct request_queue *q, struct request *rq); |
| 1052 | int blk_rq_prep_clone(struct request *rq, struct request *rq_src, |
| 1053 | struct bio_set *bs, gfp_t gfp_mask, |
| 1054 | int (*bio_ctr)(struct bio *, struct bio *, void *), void *data); |
| 1055 | void blk_rq_unprep_clone(struct request *rq); |
| 1056 | blk_status_t blk_insert_cloned_request(struct request *rq); |
| 1057 | |
| 1058 | struct rq_map_data { |
| 1059 | struct page **pages; |
| 1060 | unsigned long offset; |
| 1061 | unsigned short page_order; |
| 1062 | unsigned short nr_entries; |
| 1063 | bool null_mapped; |
| 1064 | bool from_user; |
| 1065 | }; |
| 1066 | |
| 1067 | int blk_rq_map_user(struct request_queue *, struct request *, |
| 1068 | struct rq_map_data *, void __user *, unsigned long, gfp_t); |
| 1069 | int blk_rq_map_user_io(struct request *, struct rq_map_data *, |
| 1070 | void __user *, unsigned long, gfp_t, bool, int, bool, int); |
| 1071 | int blk_rq_map_user_iov(struct request_queue *, struct request *, |
| 1072 | struct rq_map_data *, const struct iov_iter *, gfp_t); |
| 1073 | int blk_rq_unmap_user(struct bio *); |
| 1074 | int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, |
| 1075 | gfp_t gfp); |
| 1076 | int blk_rq_append_bio(struct request *rq, struct bio *bio); |
| 1077 | void blk_execute_rq_nowait(struct request *rq, bool at_head); |
| 1078 | blk_status_t blk_execute_rq(struct request *rq, bool at_head); |
| 1079 | bool blk_rq_is_poll(struct request *rq); |
| 1080 | |
| 1081 | struct req_iterator { |
| 1082 | struct bvec_iter iter; |
| 1083 | struct bio *bio; |
| 1084 | }; |
| 1085 | |
| 1086 | #define __rq_for_each_bio(_bio, rq) \ |
| 1087 | if ((rq->bio)) \ |
| 1088 | for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) |
| 1089 | |
| 1090 | #define rq_for_each_segment(bvl, _rq, _iter) \ |
| 1091 | __rq_for_each_bio(_iter.bio, _rq) \ |
| 1092 | bio_for_each_segment(bvl, _iter.bio, _iter.iter) |
| 1093 | |
| 1094 | #define rq_for_each_bvec(bvl, _rq, _iter) \ |
| 1095 | __rq_for_each_bio(_iter.bio, _rq) \ |
| 1096 | bio_for_each_bvec(bvl, _iter.bio, _iter.iter) |
| 1097 | |
| 1098 | #define rq_iter_last(bvec, _iter) \ |
| 1099 | (_iter.bio->bi_next == NULL && \ |
| 1100 | bio_iter_last(bvec, _iter.iter)) |
| 1101 | |
| 1102 | /* |
| 1103 | * blk_rq_pos() : the current sector |
| 1104 | * blk_rq_bytes() : bytes left in the entire request |
| 1105 | * blk_rq_cur_bytes() : bytes left in the current segment |
| 1106 | * blk_rq_sectors() : sectors left in the entire request |
| 1107 | * blk_rq_cur_sectors() : sectors left in the current segment |
| 1108 | * blk_rq_stats_sectors() : sectors of the entire request used for stats |
| 1109 | */ |
| 1110 | static inline sector_t blk_rq_pos(const struct request *rq) |
| 1111 | { |
| 1112 | return rq->__sector; |
| 1113 | } |
| 1114 | |
| 1115 | static inline unsigned int blk_rq_bytes(const struct request *rq) |
| 1116 | { |
| 1117 | return rq->__data_len; |
| 1118 | } |
| 1119 | |
| 1120 | static inline int blk_rq_cur_bytes(const struct request *rq) |
| 1121 | { |
| 1122 | if (!rq->bio) |
| 1123 | return 0; |
| 1124 | if (!bio_has_data(bio: rq->bio)) /* dataless requests such as discard */ |
| 1125 | return rq->bio->bi_iter.bi_size; |
| 1126 | return bio_iovec(rq->bio).bv_len; |
| 1127 | } |
| 1128 | |
| 1129 | static inline unsigned int blk_rq_sectors(const struct request *rq) |
| 1130 | { |
| 1131 | return blk_rq_bytes(rq) >> SECTOR_SHIFT; |
| 1132 | } |
| 1133 | |
| 1134 | static inline unsigned int blk_rq_cur_sectors(const struct request *rq) |
| 1135 | { |
| 1136 | return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; |
| 1137 | } |
| 1138 | |
| 1139 | static inline unsigned int blk_rq_stats_sectors(const struct request *rq) |
| 1140 | { |
| 1141 | return rq->stats_sectors; |
| 1142 | } |
| 1143 | |
| 1144 | /* |
| 1145 | * Some commands like WRITE SAME have a payload or data transfer size which |
| 1146 | * is different from the size of the request. Any driver that supports such |
| 1147 | * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to |
| 1148 | * calculate the data transfer size. |
| 1149 | */ |
| 1150 | static inline unsigned int blk_rq_payload_bytes(struct request *rq) |
| 1151 | { |
| 1152 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) |
| 1153 | return rq->special_vec.bv_len; |
| 1154 | return blk_rq_bytes(rq); |
| 1155 | } |
| 1156 | |
| 1157 | /* |
| 1158 | * Return the first full biovec in the request. The caller needs to check that |
| 1159 | * there are any bvecs before calling this helper. |
| 1160 | */ |
| 1161 | static inline struct bio_vec req_bvec(struct request *rq) |
| 1162 | { |
| 1163 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) |
| 1164 | return rq->special_vec; |
| 1165 | return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); |
| 1166 | } |
| 1167 | |
| 1168 | static inline unsigned int blk_rq_count_bios(struct request *rq) |
| 1169 | { |
| 1170 | unsigned int nr_bios = 0; |
| 1171 | struct bio *bio; |
| 1172 | |
| 1173 | __rq_for_each_bio(bio, rq) |
| 1174 | nr_bios++; |
| 1175 | |
| 1176 | return nr_bios; |
| 1177 | } |
| 1178 | |
| 1179 | void blk_steal_bios(struct bio_list *list, struct request *rq); |
| 1180 | |
| 1181 | /* |
| 1182 | * Request completion related functions. |
| 1183 | * |
| 1184 | * blk_update_request() completes given number of bytes and updates |
| 1185 | * the request without completing it. |
| 1186 | */ |
| 1187 | bool blk_update_request(struct request *rq, blk_status_t error, |
| 1188 | unsigned int nr_bytes); |
| 1189 | void blk_abort_request(struct request *); |
| 1190 | |
| 1191 | /* |
| 1192 | * Number of physical segments as sent to the device. |
| 1193 | * |
| 1194 | * Normally this is the number of discontiguous data segments sent by the |
| 1195 | * submitter. But for data-less command like discard we might have no |
| 1196 | * actual data segments submitted, but the driver might have to add it's |
| 1197 | * own special payload. In that case we still return 1 here so that this |
| 1198 | * special payload will be mapped. |
| 1199 | */ |
| 1200 | static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) |
| 1201 | { |
| 1202 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) |
| 1203 | return 1; |
| 1204 | return rq->nr_phys_segments; |
| 1205 | } |
| 1206 | |
| 1207 | /* |
| 1208 | * Number of discard segments (or ranges) the driver needs to fill in. |
| 1209 | * Each discard bio merged into a request is counted as one segment. |
| 1210 | */ |
| 1211 | static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) |
| 1212 | { |
| 1213 | return max_t(unsigned short, rq->nr_phys_segments, 1); |
| 1214 | } |
| 1215 | |
| 1216 | /** |
| 1217 | * blk_rq_nr_bvec - return number of bvecs in a request |
| 1218 | * @rq: request to calculate bvecs for |
| 1219 | * |
| 1220 | * Returns the number of bvecs. |
| 1221 | */ |
| 1222 | static inline unsigned int blk_rq_nr_bvec(struct request *rq) |
| 1223 | { |
| 1224 | struct req_iterator rq_iter; |
| 1225 | struct bio_vec bv; |
| 1226 | unsigned int nr_bvec = 0; |
| 1227 | |
| 1228 | rq_for_each_bvec(bv, rq, rq_iter) |
| 1229 | nr_bvec++; |
| 1230 | |
| 1231 | return nr_bvec; |
| 1232 | } |
| 1233 | |
| 1234 | int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, |
| 1235 | struct scatterlist **last_sg); |
| 1236 | static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) |
| 1237 | { |
| 1238 | struct scatterlist *last_sg = NULL; |
| 1239 | |
| 1240 | return __blk_rq_map_sg(rq, sglist, last_sg: &last_sg); |
| 1241 | } |
| 1242 | void blk_dump_rq_flags(struct request *, char *); |
| 1243 | |
| 1244 | #endif /* BLK_MQ_H */ |
| 1245 | |