| 1 | #include <linux/device.h> |
| 2 | #include <linux/init.h> |
| 3 | #include <linux/kernel.h> |
| 4 | #include <linux/miscdevice.h> |
| 5 | #include <linux/module.h> |
| 6 | #include <linux/anon_inodes.h> |
| 7 | #include <linux/ktime.h> |
| 8 | #include <linux/hrtimer.h> |
| 9 | #include <linux/poll.h> |
| 10 | |
| 11 | #include <linux/io_uring/cmd.h> |
| 12 | #include <linux/io_uring_types.h> |
| 13 | #include <uapi/linux/io_uring/mock_file.h> |
| 14 | |
| 15 | struct io_mock_iocb { |
| 16 | struct kiocb *iocb; |
| 17 | struct hrtimer timer; |
| 18 | int res; |
| 19 | }; |
| 20 | |
| 21 | struct io_mock_file { |
| 22 | size_t size; |
| 23 | u64 rw_delay_ns; |
| 24 | bool pollable; |
| 25 | struct wait_queue_head poll_wq; |
| 26 | }; |
| 27 | |
| 28 | #define IO_VALID_COPY_CMD_FLAGS IORING_MOCK_COPY_FROM |
| 29 | |
| 30 | static int io_copy_regbuf(struct iov_iter *reg_iter, void __user *ubuf) |
| 31 | { |
| 32 | size_t ret, copied = 0; |
| 33 | size_t buflen = PAGE_SIZE; |
| 34 | void *tmp_buf; |
| 35 | |
| 36 | tmp_buf = kzalloc(buflen, GFP_KERNEL); |
| 37 | if (!tmp_buf) |
| 38 | return -ENOMEM; |
| 39 | |
| 40 | while (iov_iter_count(i: reg_iter)) { |
| 41 | size_t len = min(iov_iter_count(reg_iter), buflen); |
| 42 | |
| 43 | if (iov_iter_rw(i: reg_iter) == ITER_SOURCE) { |
| 44 | ret = copy_from_iter(addr: tmp_buf, bytes: len, i: reg_iter); |
| 45 | if (ret <= 0) |
| 46 | break; |
| 47 | if (copy_to_user(to: ubuf, from: tmp_buf, n: ret)) |
| 48 | break; |
| 49 | } else { |
| 50 | if (copy_from_user(to: tmp_buf, from: ubuf, n: len)) |
| 51 | break; |
| 52 | ret = copy_to_iter(addr: tmp_buf, bytes: len, i: reg_iter); |
| 53 | if (ret <= 0) |
| 54 | break; |
| 55 | } |
| 56 | ubuf += ret; |
| 57 | copied += ret; |
| 58 | } |
| 59 | |
| 60 | kfree(objp: tmp_buf); |
| 61 | return copied; |
| 62 | } |
| 63 | |
| 64 | static int io_cmd_copy_regbuf(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 65 | { |
| 66 | const struct io_uring_sqe *sqe = cmd->sqe; |
| 67 | const struct iovec __user *iovec; |
| 68 | unsigned flags, iovec_len; |
| 69 | struct iov_iter iter; |
| 70 | void __user *ubuf; |
| 71 | int dir, ret; |
| 72 | |
| 73 | ubuf = u64_to_user_ptr(READ_ONCE(sqe->addr3)); |
| 74 | iovec = u64_to_user_ptr(READ_ONCE(sqe->addr)); |
| 75 | iovec_len = READ_ONCE(sqe->len); |
| 76 | flags = READ_ONCE(sqe->file_index); |
| 77 | |
| 78 | if (unlikely(sqe->ioprio || sqe->__pad1)) |
| 79 | return -EINVAL; |
| 80 | if (flags & ~IO_VALID_COPY_CMD_FLAGS) |
| 81 | return -EINVAL; |
| 82 | |
| 83 | dir = (flags & IORING_MOCK_COPY_FROM) ? ITER_SOURCE : ITER_DEST; |
| 84 | ret = io_uring_cmd_import_fixed_vec(ioucmd: cmd, uvec: iovec, uvec_segs: iovec_len, ddir: dir, iter: &iter, |
| 85 | issue_flags); |
| 86 | if (ret) |
| 87 | return ret; |
| 88 | ret = io_copy_regbuf(reg_iter: &iter, ubuf); |
| 89 | return ret ? ret : -EFAULT; |
| 90 | } |
| 91 | |
| 92 | static int io_mock_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 93 | { |
| 94 | switch (cmd->cmd_op) { |
| 95 | case IORING_MOCK_CMD_COPY_REGBUF: |
| 96 | return io_cmd_copy_regbuf(cmd, issue_flags); |
| 97 | } |
| 98 | return -ENOTSUPP; |
| 99 | } |
| 100 | |
| 101 | static enum hrtimer_restart io_mock_rw_timer_expired(struct hrtimer *timer) |
| 102 | { |
| 103 | struct io_mock_iocb *mio = container_of(timer, struct io_mock_iocb, timer); |
| 104 | struct kiocb *iocb = mio->iocb; |
| 105 | |
| 106 | WRITE_ONCE(iocb->private, NULL); |
| 107 | iocb->ki_complete(iocb, mio->res); |
| 108 | kfree(objp: mio); |
| 109 | return HRTIMER_NORESTART; |
| 110 | } |
| 111 | |
| 112 | static ssize_t io_mock_delay_rw(struct kiocb *iocb, size_t len) |
| 113 | { |
| 114 | struct io_mock_file *mf = iocb->ki_filp->private_data; |
| 115 | struct io_mock_iocb *mio; |
| 116 | |
| 117 | mio = kzalloc(sizeof(*mio), GFP_KERNEL); |
| 118 | if (!mio) |
| 119 | return -ENOMEM; |
| 120 | |
| 121 | mio->iocb = iocb; |
| 122 | mio->res = len; |
| 123 | hrtimer_setup(timer: &mio->timer, function: io_mock_rw_timer_expired, |
| 124 | CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
| 125 | hrtimer_start(timer: &mio->timer, tim: ns_to_ktime(ns: mf->rw_delay_ns), |
| 126 | mode: HRTIMER_MODE_REL); |
| 127 | return -EIOCBQUEUED; |
| 128 | } |
| 129 | |
| 130 | static ssize_t io_mock_read_iter(struct kiocb *iocb, struct iov_iter *to) |
| 131 | { |
| 132 | struct io_mock_file *mf = iocb->ki_filp->private_data; |
| 133 | size_t len = iov_iter_count(i: to); |
| 134 | size_t nr_zeroed; |
| 135 | |
| 136 | if (iocb->ki_pos + len > mf->size) |
| 137 | return -EINVAL; |
| 138 | nr_zeroed = iov_iter_zero(bytes: len, to); |
| 139 | if (!mf->rw_delay_ns || nr_zeroed != len) |
| 140 | return nr_zeroed; |
| 141 | |
| 142 | return io_mock_delay_rw(iocb, len); |
| 143 | } |
| 144 | |
| 145 | static ssize_t io_mock_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 146 | { |
| 147 | struct io_mock_file *mf = iocb->ki_filp->private_data; |
| 148 | size_t len = iov_iter_count(i: from); |
| 149 | |
| 150 | if (iocb->ki_pos + len > mf->size) |
| 151 | return -EINVAL; |
| 152 | if (!mf->rw_delay_ns) { |
| 153 | iov_iter_advance(i: from, bytes: len); |
| 154 | return len; |
| 155 | } |
| 156 | |
| 157 | return io_mock_delay_rw(iocb, len); |
| 158 | } |
| 159 | |
| 160 | static loff_t io_mock_llseek(struct file *file, loff_t offset, int whence) |
| 161 | { |
| 162 | struct io_mock_file *mf = file->private_data; |
| 163 | |
| 164 | return fixed_size_llseek(file, offset, whence, size: mf->size); |
| 165 | } |
| 166 | |
| 167 | static __poll_t io_mock_poll(struct file *file, struct poll_table_struct *pt) |
| 168 | { |
| 169 | struct io_mock_file *mf = file->private_data; |
| 170 | __poll_t mask = 0; |
| 171 | |
| 172 | poll_wait(filp: file, wait_address: &mf->poll_wq, p: pt); |
| 173 | |
| 174 | mask |= EPOLLOUT | EPOLLWRNORM; |
| 175 | mask |= EPOLLIN | EPOLLRDNORM; |
| 176 | return mask; |
| 177 | } |
| 178 | |
| 179 | static int io_mock_release(struct inode *inode, struct file *file) |
| 180 | { |
| 181 | struct io_mock_file *mf = file->private_data; |
| 182 | |
| 183 | kfree(objp: mf); |
| 184 | return 0; |
| 185 | } |
| 186 | |
| 187 | static const struct file_operations io_mock_fops = { |
| 188 | .owner = THIS_MODULE, |
| 189 | .release = io_mock_release, |
| 190 | .uring_cmd = io_mock_cmd, |
| 191 | .read_iter = io_mock_read_iter, |
| 192 | .write_iter = io_mock_write_iter, |
| 193 | .llseek = io_mock_llseek, |
| 194 | }; |
| 195 | |
| 196 | static const struct file_operations io_mock_poll_fops = { |
| 197 | .owner = THIS_MODULE, |
| 198 | .release = io_mock_release, |
| 199 | .uring_cmd = io_mock_cmd, |
| 200 | .read_iter = io_mock_read_iter, |
| 201 | .write_iter = io_mock_write_iter, |
| 202 | .llseek = io_mock_llseek, |
| 203 | .poll = io_mock_poll, |
| 204 | }; |
| 205 | |
| 206 | #define IO_VALID_CREATE_FLAGS (IORING_MOCK_CREATE_F_SUPPORT_NOWAIT | \ |
| 207 | IORING_MOCK_CREATE_F_POLL) |
| 208 | |
| 209 | static int io_create_mock_file(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 210 | { |
| 211 | const struct file_operations *fops = &io_mock_fops; |
| 212 | const struct io_uring_sqe *sqe = cmd->sqe; |
| 213 | struct io_uring_mock_create mc, __user *uarg; |
| 214 | struct file *file; |
| 215 | struct io_mock_file *mf __free(kfree) = NULL; |
| 216 | size_t uarg_size; |
| 217 | |
| 218 | /* |
| 219 | * It's a testing only driver that allows exercising edge cases |
| 220 | * that wouldn't be possible to hit otherwise. |
| 221 | */ |
| 222 | add_taint(TAINT_TEST, LOCKDEP_STILL_OK); |
| 223 | |
| 224 | uarg = u64_to_user_ptr(READ_ONCE(sqe->addr)); |
| 225 | uarg_size = READ_ONCE(sqe->len); |
| 226 | |
| 227 | if (sqe->ioprio || sqe->__pad1 || sqe->addr3 || sqe->file_index) |
| 228 | return -EINVAL; |
| 229 | if (uarg_size != sizeof(mc)) |
| 230 | return -EINVAL; |
| 231 | |
| 232 | memset(&mc, 0, sizeof(mc)); |
| 233 | if (copy_from_user(to: &mc, from: uarg, n: uarg_size)) |
| 234 | return -EFAULT; |
| 235 | if (!mem_is_zero(s: mc.__resv, n: sizeof(mc.__resv))) |
| 236 | return -EINVAL; |
| 237 | if (mc.flags & ~IO_VALID_CREATE_FLAGS) |
| 238 | return -EINVAL; |
| 239 | if (mc.file_size > SZ_1G) |
| 240 | return -EINVAL; |
| 241 | if (mc.rw_delay_ns > NSEC_PER_SEC) |
| 242 | return -EINVAL; |
| 243 | |
| 244 | mf = kzalloc(sizeof(*mf), GFP_KERNEL_ACCOUNT); |
| 245 | if (!mf) |
| 246 | return -ENOMEM; |
| 247 | |
| 248 | init_waitqueue_head(&mf->poll_wq); |
| 249 | mf->size = mc.file_size; |
| 250 | mf->rw_delay_ns = mc.rw_delay_ns; |
| 251 | if (mc.flags & IORING_MOCK_CREATE_F_POLL) { |
| 252 | fops = &io_mock_poll_fops; |
| 253 | mf->pollable = true; |
| 254 | } |
| 255 | |
| 256 | FD_PREPARE(fdf, O_RDWR | O_CLOEXEC, |
| 257 | anon_inode_create_getfile("[io_uring_mock]" , fops, mf, |
| 258 | O_RDWR | O_CLOEXEC, NULL)); |
| 259 | if (fdf.err) |
| 260 | return fdf.err; |
| 261 | |
| 262 | retain_and_null_ptr(mf); |
| 263 | file = fd_prepare_file(fdf); |
| 264 | file->f_mode |= FMODE_READ | FMODE_CAN_READ | FMODE_WRITE | |
| 265 | FMODE_CAN_WRITE | FMODE_LSEEK; |
| 266 | if (mc.flags & IORING_MOCK_CREATE_F_SUPPORT_NOWAIT) |
| 267 | file->f_mode |= FMODE_NOWAIT; |
| 268 | |
| 269 | mc.out_fd = fd_prepare_fd(fdf); |
| 270 | if (copy_to_user(to: uarg, from: &mc, n: uarg_size)) |
| 271 | return -EFAULT; |
| 272 | |
| 273 | fd_publish(fdf); |
| 274 | return 0; |
| 275 | } |
| 276 | |
| 277 | static int io_probe_mock(struct io_uring_cmd *cmd) |
| 278 | { |
| 279 | const struct io_uring_sqe *sqe = cmd->sqe; |
| 280 | struct io_uring_mock_probe mp, __user *uarg; |
| 281 | size_t uarg_size; |
| 282 | |
| 283 | uarg = u64_to_user_ptr(READ_ONCE(sqe->addr)); |
| 284 | uarg_size = READ_ONCE(sqe->len); |
| 285 | |
| 286 | if (sqe->ioprio || sqe->__pad1 || sqe->addr3 || sqe->file_index || |
| 287 | uarg_size != sizeof(mp)) |
| 288 | return -EINVAL; |
| 289 | |
| 290 | memset(&mp, 0, sizeof(mp)); |
| 291 | if (copy_from_user(to: &mp, from: uarg, n: uarg_size)) |
| 292 | return -EFAULT; |
| 293 | if (!mem_is_zero(s: &mp, n: sizeof(mp))) |
| 294 | return -EINVAL; |
| 295 | |
| 296 | mp.features = IORING_MOCK_FEAT_END; |
| 297 | |
| 298 | if (copy_to_user(to: uarg, from: &mp, n: uarg_size)) |
| 299 | return -EFAULT; |
| 300 | return 0; |
| 301 | } |
| 302 | |
| 303 | static int iou_mock_mgr_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| 304 | { |
| 305 | if (!capable(CAP_SYS_ADMIN)) |
| 306 | return -EPERM; |
| 307 | |
| 308 | switch (cmd->cmd_op) { |
| 309 | case IORING_MOCK_MGR_CMD_PROBE: |
| 310 | return io_probe_mock(cmd); |
| 311 | case IORING_MOCK_MGR_CMD_CREATE: |
| 312 | return io_create_mock_file(cmd, issue_flags); |
| 313 | } |
| 314 | return -EOPNOTSUPP; |
| 315 | } |
| 316 | |
| 317 | static const struct file_operations iou_mock_dev_fops = { |
| 318 | .owner = THIS_MODULE, |
| 319 | .uring_cmd = iou_mock_mgr_cmd, |
| 320 | }; |
| 321 | |
| 322 | static struct miscdevice iou_mock_miscdev = { |
| 323 | .minor = MISC_DYNAMIC_MINOR, |
| 324 | .name = "io_uring_mock" , |
| 325 | .fops = &iou_mock_dev_fops, |
| 326 | }; |
| 327 | |
| 328 | static int __init io_mock_init(void) |
| 329 | { |
| 330 | int ret; |
| 331 | |
| 332 | ret = misc_register(misc: &iou_mock_miscdev); |
| 333 | if (ret < 0) { |
| 334 | pr_err("Could not initialize io_uring mock device\n" ); |
| 335 | return ret; |
| 336 | } |
| 337 | return 0; |
| 338 | } |
| 339 | |
| 340 | static void __exit io_mock_exit(void) |
| 341 | { |
| 342 | misc_deregister(misc: &iou_mock_miscdev); |
| 343 | } |
| 344 | |
| 345 | module_init(io_mock_init) |
| 346 | module_exit(io_mock_exit) |
| 347 | |
| 348 | MODULE_AUTHOR("Pavel Begunkov <asml.silence@gmail.com>" ); |
| 349 | MODULE_DESCRIPTION("io_uring mock file" ); |
| 350 | MODULE_LICENSE("GPL" ); |
| 351 | |