|
| 1 | +From d0a6bf47dd6cd2a9ed17dbdc32dd34a6ba0f5b5f Mon Sep 17 00:00:00 2001 |
| 2 | +From: Babis Chalios <bchalios@amazon.es> |
| 3 | +Date: Tue, 2 Dec 2025 20:11:44 +0000 |
| 4 | +Subject: [PATCH 2/4] ptp: vmclock: support device notifications |
| 5 | + |
| 6 | +Add optional support for device notifications in VMClock. When |
| 7 | +supported, the hypervisor will send a device notification every time it |
| 8 | +updates the seq_count to a new even value. |
| 9 | + |
| 10 | +Moreover, add support for poll() in VMClock as a means to propagate this |
| 11 | +notification to user space. poll() will return a POLLIN event to |
| 12 | +listeners every time seq_count changes to a value different than the one |
| 13 | +last seen (since open() or last read()/pread()). This means that when |
| 14 | +poll() returns a POLLIN event, listeners need to use read() to observe |
| 15 | +what has changed and update the reader's view of seq_count. In other |
| 16 | +words, after a poll() returned, all subsequent calls to poll() will |
| 17 | +immediately return with a POLLIN event until the listener calls read(). |
| 18 | + |
| 19 | +The device advertises support for the notification mechanism by setting |
| 20 | +flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If |
| 21 | +the flag is not present the driver won't setup the ACPI notification |
| 22 | +handler and poll() will always immediately return POLLHUP. |
| 23 | + |
| 24 | +Signed-off-by: Babis Chalios <bchalios@amazon.es> |
| 25 | +--- |
| 26 | + drivers/ptp/ptp_vmclock.c | 130 ++++++++++++++++++++++++++++--- |
| 27 | + include/uapi/linux/vmclock-abi.h | 5 ++ |
| 28 | + 2 files changed, 126 insertions(+), 9 deletions(-) |
| 29 | + |
| 30 | +diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c |
| 31 | +index 1ce69eada4b2..4673915c43e7 100644 |
| 32 | +--- a/drivers/ptp/ptp_vmclock.c |
| 33 | ++++ b/drivers/ptp/ptp_vmclock.c |
| 34 | +@@ -5,6 +5,9 @@ |
| 35 | + * Copyright © 2024 Amazon.com, Inc. or its affiliates. |
| 36 | + */ |
| 37 | + |
| 38 | ++#include "linux/poll.h" |
| 39 | ++#include "linux/types.h" |
| 40 | ++#include "linux/wait.h" |
| 41 | + #include <linux/acpi.h> |
| 42 | + #include <linux/device.h> |
| 43 | + #include <linux/err.h> |
| 44 | +@@ -37,6 +40,7 @@ struct vmclock_state { |
| 45 | + struct resource res; |
| 46 | + struct vmclock_abi *clk; |
| 47 | + struct miscdevice miscdev; |
| 48 | ++ wait_queue_head_t disrupt_wait; |
| 49 | + struct ptp_clock_info ptp_clock_info; |
| 50 | + struct ptp_clock *ptp_clock; |
| 51 | + enum clocksource_ids cs_id, sys_cs_id; |
| 52 | +@@ -311,10 +315,15 @@ static const struct ptp_clock_info ptp_vmclock_info = { |
| 53 | + .getcrosststamp = ptp_vmclock_getcrosststamp, |
| 54 | + }; |
| 55 | + |
| 56 | ++struct vmclock_file_state { |
| 57 | ++ struct vmclock_state *st; |
| 58 | ++ atomic_t seq; |
| 59 | ++}; |
| 60 | ++ |
| 61 | + static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) |
| 62 | + { |
| 63 | +- struct vmclock_state *st = container_of(fp->private_data, |
| 64 | +- struct vmclock_state, miscdev); |
| 65 | ++ struct vmclock_file_state *fst = fp->private_data; |
| 66 | ++ struct vmclock_state *st = fst->st; |
| 67 | + |
| 68 | + if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) |
| 69 | + return -EROFS; |
| 70 | +@@ -333,11 +342,12 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) |
| 71 | + static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, |
| 72 | + size_t count, loff_t *ppos) |
| 73 | + { |
| 74 | +- struct vmclock_state *st = container_of(fp->private_data, |
| 75 | +- struct vmclock_state, miscdev); |
| 76 | ++ struct vmclock_file_state *fst = fp->private_data; |
| 77 | ++ struct vmclock_state *st = fst->st; |
| 78 | ++ |
| 79 | + ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); |
| 80 | + size_t max_count; |
| 81 | +- int32_t seq; |
| 82 | ++ int32_t seq, old_seq; |
| 83 | + |
| 84 | + if (*ppos >= PAGE_SIZE) |
| 85 | + return 0; |
| 86 | +@@ -346,6 +356,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, |
| 87 | + if (count > max_count) |
| 88 | + count = max_count; |
| 89 | + |
| 90 | ++ old_seq = atomic_read(&fst->seq); |
| 91 | + while (1) { |
| 92 | + seq = st->clk->seq_count & ~1ULL; |
| 93 | + virt_rmb(); |
| 94 | +@@ -354,8 +365,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, |
| 95 | + return -EFAULT; |
| 96 | + |
| 97 | + virt_rmb(); |
| 98 | +- if (seq == st->clk->seq_count) |
| 99 | +- break; |
| 100 | ++ if (seq == st->clk->seq_count) { |
| 101 | ++ /* |
| 102 | ++ * Either we updated fst->seq to seq (the latest version we observed) |
| 103 | ++ * or someone else did (old_seq == seq), so we can break. |
| 104 | ++ */ |
| 105 | ++ if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) || |
| 106 | ++ old_seq == seq) { |
| 107 | ++ break; |
| 108 | ++ } |
| 109 | ++ } |
| 110 | + |
| 111 | + if (ktime_after(ktime_get(), deadline)) |
| 112 | + return -ETIMEDOUT; |
| 113 | +@@ -365,9 +384,57 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, |
| 114 | + return count; |
| 115 | + } |
| 116 | + |
| 117 | ++static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait) |
| 118 | ++{ |
| 119 | ++ struct vmclock_file_state *fst = fp->private_data; |
| 120 | ++ struct vmclock_state *st = fst->st; |
| 121 | ++ uint32_t seq; |
| 122 | ++ |
| 123 | ++ /* |
| 124 | ++ * Hypervisor will not send us any notifications, so fail immediately |
| 125 | ++ * to avoid having caller sleeping for ever. |
| 126 | ++ */ |
| 127 | ++ if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) |
| 128 | ++ return POLLHUP; |
| 129 | ++ |
| 130 | ++ poll_wait(fp, &st->disrupt_wait, wait); |
| 131 | ++ |
| 132 | ++ seq = st->clk->seq_count; |
| 133 | ++ if (atomic_read(&fst->seq) != seq) |
| 134 | ++ return POLLIN | POLLRDNORM; |
| 135 | ++ |
| 136 | ++ return 0; |
| 137 | ++} |
| 138 | ++ |
| 139 | ++static int vmclock_miscdev_open(struct inode *inode, struct file *fp) |
| 140 | ++{ |
| 141 | ++ struct vmclock_state *st = container_of(fp->private_data, |
| 142 | ++ struct vmclock_state, miscdev); |
| 143 | ++ struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL); |
| 144 | ++ |
| 145 | ++ if (!fst) |
| 146 | ++ return -ENOMEM; |
| 147 | ++ |
| 148 | ++ fst->st = st; |
| 149 | ++ atomic_set(&fst->seq, 0); |
| 150 | ++ |
| 151 | ++ fp->private_data = fst; |
| 152 | ++ |
| 153 | ++ return 0; |
| 154 | ++} |
| 155 | ++ |
| 156 | ++static int vmclock_miscdev_release(struct inode *inode, struct file *fp) |
| 157 | ++{ |
| 158 | ++ kfree(fp->private_data); |
| 159 | ++ return 0; |
| 160 | ++} |
| 161 | ++ |
| 162 | + static const struct file_operations vmclock_miscdev_fops = { |
| 163 | +- .mmap = vmclock_miscdev_mmap, |
| 164 | +- .read = vmclock_miscdev_read, |
| 165 | ++ .open = vmclock_miscdev_open, |
| 166 | ++ .release = vmclock_miscdev_release, |
| 167 | ++ .mmap = vmclock_miscdev_mmap, |
| 168 | ++ .read = vmclock_miscdev_read, |
| 169 | ++ .poll = vmclock_miscdev_poll, |
| 170 | + }; |
| 171 | + |
| 172 | + /* module operations */ |
| 173 | +@@ -413,6 +480,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data |
| 174 | + return AE_ERROR; |
| 175 | + } |
| 176 | + |
| 177 | ++static void |
| 178 | ++vmclock_acpi_notification_handler(acpi_handle __always_unused handle, |
| 179 | ++ u32 __always_unused event, void *dev) |
| 180 | ++{ |
| 181 | ++ struct device *device = dev; |
| 182 | ++ struct vmclock_state *st = device->driver_data; |
| 183 | ++ |
| 184 | ++ wake_up_interruptible(&st->disrupt_wait); |
| 185 | ++} |
| 186 | ++ |
| 187 | ++static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) |
| 188 | ++{ |
| 189 | ++ struct acpi_device *adev = ACPI_COMPANION(dev); |
| 190 | ++ acpi_status status; |
| 191 | ++ |
| 192 | ++ /* |
| 193 | ++ * This should never happen as this function is only called when |
| 194 | ++ * has_acpi_companion(dev) is true, but the logic is sufficiently |
| 195 | ++ * complex that Coverity can't see the tautology. |
| 196 | ++ */ |
| 197 | ++ if (!adev) |
| 198 | ++ return -ENODEV; |
| 199 | ++ |
| 200 | ++ /* The device does not support notifications. Nothing else to do */ |
| 201 | ++ if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) |
| 202 | ++ return 0; |
| 203 | ++ |
| 204 | ++ status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, |
| 205 | ++ vmclock_acpi_notification_handler, |
| 206 | ++ dev); |
| 207 | ++ if (ACPI_FAILURE(status)) { |
| 208 | ++ dev_err(dev, "failed to install notification handler"); |
| 209 | ++ return -ENODEV; |
| 210 | ++ } |
| 211 | ++ |
| 212 | ++ return 0; |
| 213 | ++} |
| 214 | ++ |
| 215 | + static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) |
| 216 | + { |
| 217 | + struct acpi_device *adev = ACPI_COMPANION(dev); |
| 218 | +@@ -495,6 +600,11 @@ static int vmclock_probe(struct platform_device *pdev) |
| 219 | + goto out; |
| 220 | + } |
| 221 | + |
| 222 | ++ init_waitqueue_head(&st->disrupt_wait); |
| 223 | ++ ret = vmclock_setup_notification(dev, st); |
| 224 | ++ if (ret) |
| 225 | ++ return ret; |
| 226 | ++ |
| 227 | + /* If the structure is big enough, it can be mapped to userspace */ |
| 228 | + if (st->clk->size >= PAGE_SIZE) { |
| 229 | + st->miscdev.minor = MISC_DYNAMIC_MINOR; |
| 230 | +@@ -544,6 +654,8 @@ static int vmclock_probe(struct platform_device *pdev) |
| 231 | + goto out; |
| 232 | + } |
| 233 | + |
| 234 | ++ dev->driver_data = st; |
| 235 | ++ |
| 236 | + dev_info(dev, "%s: registered %s%s%s\n", st->name, |
| 237 | + st->miscdev.minor ? "miscdev" : "", |
| 238 | + (st->miscdev.minor && st->ptp_clock) ? ", " : "", |
| 239 | +diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h |
| 240 | +index 75deb6ae2b27..4b7cd2b8532c 100644 |
| 241 | +--- a/include/uapi/linux/vmclock-abi.h |
| 242 | ++++ b/include/uapi/linux/vmclock-abi.h |
| 243 | +@@ -125,6 +125,11 @@ struct vmclock_abi { |
| 244 | + * loaded from some save state (restored from a snapshot). |
| 245 | + */ |
| 246 | + #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) |
| 247 | ++ /* |
| 248 | ++ * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send |
| 249 | ++ * a notification every time it updates seq_count to a new even number. |
| 250 | ++ */ |
| 251 | ++#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) |
| 252 | + |
| 253 | + uint8_t pad[2]; |
| 254 | + uint8_t clock_status; |
| 255 | +-- |
| 256 | +2.34.1 |
| 257 | + |
0 commit comments