1 | /* $NetBSD: smbfs_kq.c,v 1.26 2013/10/17 21:04:44 christos Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2003, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jaromir Dolecek. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: smbfs_kq.c,v 1.26 2013/10/17 21:04:44 christos Exp $" ); |
34 | |
35 | #include <sys/param.h> |
36 | #include <sys/systm.h> |
37 | #include <sys/namei.h> |
38 | #include <sys/kernel.h> |
39 | #include <sys/proc.h> |
40 | #include <sys/buf.h> |
41 | #include <sys/fcntl.h> |
42 | #include <sys/mount.h> |
43 | #include <sys/unistd.h> |
44 | #include <sys/vnode.h> |
45 | #include <sys/lockf.h> |
46 | #include <sys/kmem.h> |
47 | #include <sys/kthread.h> |
48 | #include <sys/file.h> |
49 | #include <sys/dirent.h> |
50 | #include <sys/mallocvar.h> |
51 | |
52 | #include <machine/limits.h> |
53 | |
54 | #include <uvm/uvm.h> |
55 | #include <uvm/uvm_extern.h> |
56 | |
57 | #include <netsmb/smb.h> |
58 | #include <netsmb/smb_conn.h> |
59 | #include <netsmb/smb_subr.h> |
60 | #include <netsmb/smb_rq.h> |
61 | |
62 | #include <fs/smbfs/smbfs.h> |
63 | #include <fs/smbfs/smbfs_node.h> |
64 | #include <fs/smbfs/smbfs_subr.h> |
65 | |
66 | #include <miscfs/genfs/genfs.h> |
67 | |
68 | /* |
69 | * The maximum of outstanding SMB requests is 65536, since the |
70 | * message id is 16bit. Don't consume all. If there is more |
71 | * than 30k directory notify requests, fall back to polling mode. |
72 | */ |
73 | #define DNOTIFY_MAX 30000 |
74 | |
75 | struct kevq { |
76 | SLIST_ENTRY(kevq) kev_link; /* link on kevlist */ |
77 | SLIST_ENTRY(kevq) k_link; /* link on poll/dn list */ |
78 | |
79 | struct vnode *vp; |
80 | u_int usecount; |
81 | u_int flags; |
82 | #define KEVQ_BUSY 0x01 /* currently being processed */ |
83 | #define KEVQ_WANT 0x02 /* want to change this entry */ |
84 | #define KEVQ_DNOT 0x04 /* kevent using NT directory change notify */ |
85 | struct timespec omtime; /* old modification time */ |
86 | struct timespec octime; /* old change time */ |
87 | nlink_t onlink; /* old number of references to file */ |
88 | struct smb_rq *rq; /* request structure */ |
89 | }; |
90 | |
91 | static struct lwp *smbkql; /* the kevent handler */ |
92 | static struct smb_cred smbkq_scred; |
93 | |
94 | static kmutex_t smbkq_lock; |
95 | /* guard access to k*evlist */ |
96 | static SLIST_HEAD(, kevq) kevlist = SLIST_HEAD_INITIALIZER(kevlist); |
97 | static SLIST_HEAD(, kevq) kplist = SLIST_HEAD_INITIALIZER(kplist); |
98 | static SLIST_HEAD(, kevq) kdnlist = SLIST_HEAD_INITIALIZER(kdnlist); |
99 | |
100 | static int dnot_num = 0; /* number of active dir notifications */ |
101 | static u_int32_t kevs; |
102 | |
103 | static void smbfskq_dirnotify(void *); |
104 | |
105 | /* |
106 | * This routine periodically checks server for change |
107 | * of any of the watched files every SMBFS_MINATTRTIME/2 seconds. |
108 | * Only changes in size, modification time, change time and nlinks |
109 | * are being checked, everything else is ignored. |
110 | * Directory events are watched via NT DIRECTORY CHANGE NOTIFY |
111 | * if the server supports it. |
112 | * |
113 | * The routine only calls VOP_GETATTR() when it's likely it would get |
114 | * some new data, i.e. when the vnode expires from attrcache. This |
115 | * should give same result as periodically running stat(2) from userland, |
116 | * while keeping CPU/network usage low, and still provide proper kevent |
117 | * semantics. |
118 | * The poller thread is created when first vnode is added to watch list, |
119 | * and exits when the watch list is empty. The overhead of thread creation |
120 | * isn't really important, neither speed of attach and detach of knote. |
121 | */ |
122 | /* ARGSUSED */ |
123 | static void |
124 | smbfs_kqpoll(void *arg) |
125 | { |
126 | struct kevq *ke; |
127 | struct vattr attr; |
128 | int error = 0; |
129 | struct lwp *l; |
130 | u_quad_t osize; |
131 | int needwake; |
132 | |
133 | l = curlwp; |
134 | |
135 | mutex_enter(&smbkq_lock); |
136 | for(;;) { |
137 | /* check all entries on poll list for changes */ |
138 | SLIST_FOREACH(ke, &kplist, k_link) { |
139 | /* skip if still in attrcache */ |
140 | if (smbfs_attr_cachelookup(ke->vp, &attr) != ENOENT) |
141 | continue; |
142 | |
143 | /* |
144 | * Mark entry busy, release lock and check |
145 | * for changes. |
146 | */ |
147 | ke->flags |= KEVQ_BUSY; |
148 | mutex_exit(&smbkq_lock); |
149 | |
150 | /* save v_size, smbfs_getattr() updates it */ |
151 | osize = ke->vp->v_size; |
152 | |
153 | vn_lock(ke->vp, LK_SHARED | LK_RETRY); |
154 | error = VOP_GETATTR(ke->vp, &attr, l->l_cred); |
155 | VOP_UNLOCK(ke->vp); |
156 | if (error) { |
157 | /* relock and proceed with next */ |
158 | mutex_enter(&smbkq_lock); |
159 | continue; |
160 | } |
161 | |
162 | /* following is a bit fragile, but about best |
163 | * we can get */ |
164 | if (ke->vp->v_type != VDIR && attr.va_size != osize) { |
165 | int extended = (attr.va_size > osize); |
166 | VN_KNOTE(ke->vp, NOTE_WRITE |
167 | | (extended ? NOTE_EXTEND : 0)); |
168 | ke->omtime = attr.va_mtime; |
169 | } else if (attr.va_mtime.tv_sec != ke->omtime.tv_sec |
170 | || attr.va_mtime.tv_nsec != ke->omtime.tv_nsec) { |
171 | VN_KNOTE(ke->vp, NOTE_WRITE); |
172 | ke->omtime = attr.va_mtime; |
173 | } |
174 | |
175 | if (attr.va_ctime.tv_sec != ke->octime.tv_sec |
176 | || attr.va_ctime.tv_nsec != ke->octime.tv_nsec) { |
177 | VN_KNOTE(ke->vp, NOTE_ATTRIB); |
178 | ke->octime = attr.va_ctime; |
179 | } |
180 | |
181 | if (attr.va_nlink != ke->onlink) { |
182 | VN_KNOTE(ke->vp, NOTE_LINK); |
183 | ke->onlink = attr.va_nlink; |
184 | } |
185 | |
186 | mutex_enter(&smbkq_lock); |
187 | ke->flags &= ~KEVQ_BUSY; |
188 | if (ke->flags & KEVQ_WANT) { |
189 | ke->flags &= ~KEVQ_WANT; |
190 | wakeup(ke); |
191 | } |
192 | } |
193 | |
194 | /* Exit if there are no more kevents to watch for */ |
195 | if (kevs == 0) { |
196 | smbkql = NULL; |
197 | break; |
198 | } |
199 | |
200 | /* only wake periodically if poll list is nonempty */ |
201 | needwake = !SLIST_EMPTY(&kplist); |
202 | |
203 | /* wait a while before checking for changes again */ |
204 | if (SLIST_EMPTY(&kdnlist)) { |
205 | error = mtsleep(smbkql, PSOCK, "smbkqidl" , |
206 | needwake ? (SMBFS_ATTRTIMO * hz / 2) : 0, |
207 | &smbkq_lock); |
208 | } |
209 | |
210 | if (!error) { |
211 | /* woken up, check if any pending notifications */ |
212 | while (!SLIST_EMPTY(&kdnlist)) { |
213 | int s, hint; |
214 | |
215 | s = splnet(); |
216 | ke = SLIST_FIRST(&kdnlist); |
217 | SLIST_REMOVE_HEAD(&kdnlist, k_link); |
218 | SLIST_NEXT(ke, k_link) = NULL; |
219 | splx(s); |
220 | |
221 | /* drop lock while processing */ |
222 | mutex_exit(&smbkq_lock); |
223 | |
224 | /* |
225 | * Skip fetch if not yet setup. |
226 | */ |
227 | if (__predict_false(ke->rq == NULL)) |
228 | goto notifyrq; |
229 | |
230 | error = smbfs_smb_nt_dirnotify_fetch(ke->rq, |
231 | &hint); |
232 | ke->rq = NULL; /* rq deallocated by now */ |
233 | if (error) { |
234 | /* |
235 | * if there is error, switch to |
236 | * polling for this one |
237 | */ |
238 | ke->flags &= KEVQ_DNOT; |
239 | SLIST_INSERT_HEAD(&kplist, ke, k_link); |
240 | continue; |
241 | } |
242 | |
243 | VN_KNOTE(ke->vp, hint); |
244 | |
245 | notifyrq: |
246 | /* reissue the notify request */ |
247 | (void) smbfs_smb_nt_dirnotify_setup( |
248 | VTOSMB(ke->vp), |
249 | &ke->rq, &smbkq_scred, |
250 | smbfskq_dirnotify, ke); |
251 | |
252 | /* reacquire the lock */ |
253 | mutex_enter(&smbkq_lock); |
254 | } |
255 | } |
256 | } |
257 | mutex_exit(&smbkq_lock); |
258 | |
259 | kthread_exit(0); |
260 | } |
261 | |
262 | static void |
263 | smbfskq_dirnotify(void *arg) |
264 | { |
265 | struct kevq *ke = arg; |
266 | |
267 | if (SLIST_NEXT(ke, k_link)) { |
268 | /* already on notify list */ |
269 | return; |
270 | } |
271 | |
272 | SLIST_INSERT_HEAD(&kdnlist, ke, k_link); |
273 | wakeup(smbkql); |
274 | } |
275 | |
276 | static void |
277 | filt_smbfsdetach(struct knote *kn) |
278 | { |
279 | struct kevq *ke = (struct kevq *)kn->kn_hook; |
280 | struct vnode *vp = ke->vp; |
281 | struct smb_rq *rq = NULL; |
282 | |
283 | mutex_enter(vp->v_interlock); |
284 | SLIST_REMOVE(&ke->vp->v_klist, kn, knote, kn_selnext); |
285 | mutex_exit(vp->v_interlock); |
286 | |
287 | /* Remove the vnode from watch list */ |
288 | mutex_enter(&smbkq_lock); |
289 | |
290 | /* the handler does something to it, wait */ |
291 | while (ke->flags & KEVQ_BUSY) { |
292 | ke->flags |= KEVQ_WANT; |
293 | mtsleep(ke, PSOCK, "smbkqdw" , 0, &smbkq_lock); |
294 | } |
295 | |
296 | if (ke->usecount > 1) { |
297 | /* keep, other kevents need this */ |
298 | ke->usecount--; |
299 | } else { |
300 | /* last user, g/c */ |
301 | if (ke->flags & KEVQ_DNOT) { |
302 | dnot_num--; |
303 | rq = ke->rq; |
304 | |
305 | /* If on dirnotify list, remove */ |
306 | if (SLIST_NEXT(ke, k_link)) |
307 | SLIST_REMOVE(&kdnlist, ke, kevq, k_link); |
308 | } else |
309 | SLIST_REMOVE(&kplist, ke, kevq, k_link); |
310 | SLIST_REMOVE(&kevlist, ke, kevq, kev_link); |
311 | kmem_free(ke, sizeof(*ke)); |
312 | } |
313 | kevs--; |
314 | |
315 | mutex_exit(&smbkq_lock); |
316 | |
317 | /* If there was request still pending, cancel it now */ |
318 | if (rq) { |
319 | smb_iod_removerq(rq); |
320 | |
321 | /* |
322 | * Explicitly cancel the request, so that server can |
323 | * free directory change notify resources. |
324 | */ |
325 | smbfs_smb_ntcancel(SSTOCP(rq->sr_share), rq->sr_mid, |
326 | &smbkq_scred); |
327 | |
328 | /* Free */ |
329 | smb_rq_done(rq); |
330 | } |
331 | } |
332 | |
333 | static int |
334 | filt_smbfsread(struct knote *kn, long hint) |
335 | { |
336 | struct kevq *ke = (struct kevq *)kn->kn_hook; |
337 | struct vnode *vp = ke->vp; |
338 | int rv; |
339 | |
340 | if (hint == NOTE_REVOKE) { |
341 | /* |
342 | * filesystem is gone, so set the EOF flag and schedule |
343 | * the knote for deletion. |
344 | */ |
345 | KASSERT(mutex_owned(vp->v_interlock)); |
346 | kn->kn_flags |= (EV_EOF | EV_ONESHOT); |
347 | return (1); |
348 | } |
349 | |
350 | /* There is no size info for directories */ |
351 | if (hint == 0) { |
352 | mutex_enter(vp->v_interlock); |
353 | } else { |
354 | KASSERT(mutex_owned(vp->v_interlock)); |
355 | } |
356 | if (vp->v_type == VDIR) { |
357 | /* |
358 | * This is kind of hackish, since we need to |
359 | * set the flag when we are called with the hint |
360 | * to make confirming call from kern_event.c |
361 | * succeed too, but need to unset it afterwards |
362 | * so that the directory wouldn't stay flagged |
363 | * as changed. |
364 | * XXX perhaps just fail for directories? |
365 | */ |
366 | if (hint & NOTE_WRITE) { |
367 | kn->kn_fflags |= NOTE_WRITE; |
368 | rv = (1 * sizeof(struct dirent)); |
369 | } else if (hint == 0 && (kn->kn_fflags & NOTE_WRITE)) { |
370 | kn->kn_fflags &= ~NOTE_WRITE; |
371 | rv = (1 * sizeof(struct dirent)); |
372 | } else |
373 | rv = 0; |
374 | } else { |
375 | kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset; |
376 | rv = (kn->kn_data != 0); |
377 | } |
378 | if (hint == 0) { |
379 | mutex_enter(vp->v_interlock); |
380 | } |
381 | |
382 | return rv; |
383 | } |
384 | |
385 | static int |
386 | filt_smbfsvnode(struct knote *kn, long hint) |
387 | { |
388 | struct kevq *ke = (struct kevq *)kn->kn_hook; |
389 | struct vnode *vp = ke->vp; |
390 | |
391 | switch (hint) { |
392 | case NOTE_REVOKE: |
393 | KASSERT(mutex_owned(vp->v_interlock)); |
394 | kn->kn_flags |= EV_EOF; |
395 | if ((kn->kn_sfflags & hint) != 0) |
396 | kn->kn_fflags |= hint; |
397 | return (1); |
398 | case 0: |
399 | mutex_enter(vp->v_interlock); |
400 | mutex_exit(vp->v_interlock); |
401 | break; |
402 | default: |
403 | KASSERT(mutex_owned(vp->v_interlock)); |
404 | if ((kn->kn_sfflags & hint) != 0) |
405 | kn->kn_fflags |= hint; |
406 | break; |
407 | } |
408 | |
409 | return (kn->kn_fflags != 0); |
410 | } |
411 | |
412 | static const struct filterops smbfsread_filtops = |
413 | { 1, NULL, filt_smbfsdetach, filt_smbfsread }; |
414 | static const struct filterops smbfsvnode_filtops = |
415 | { 1, NULL, filt_smbfsdetach, filt_smbfsvnode }; |
416 | |
417 | int |
418 | smbfs_kqfilter(void *v) |
419 | { |
420 | struct vop_kqfilter_args /* { |
421 | struct vnode *a_vp; |
422 | struct knote *a_kn; |
423 | } */ *ap = v; |
424 | struct vnode *vp = ap->a_vp; |
425 | struct knote *kn = ap->a_kn; |
426 | struct kevq *ke, *ken; |
427 | int error = 0; |
428 | struct vattr attr; |
429 | struct lwp *l = curlwp; /* XXX */ |
430 | int dnot; |
431 | struct smb_vc *vcp = SSTOVC(VTOSMB(vp)->n_mount->sm_share); |
432 | static bool again; |
433 | |
434 | switch (kn->kn_filter) { |
435 | case EVFILT_READ: |
436 | kn->kn_fop = &smbfsread_filtops; |
437 | break; |
438 | case EVFILT_VNODE: |
439 | kn->kn_fop = &smbfsvnode_filtops; |
440 | break; |
441 | default: |
442 | return (EINVAL); |
443 | } |
444 | |
445 | /* Find out if we can use directory change notify for this file */ |
446 | dnot = (vp->v_type == VDIR |
447 | && (SMB_CAPS(vcp) & SMB_CAP_NT_SMBS) |
448 | && dnot_num < DNOTIFY_MAX); |
449 | |
450 | /* |
451 | * Put the vnode to watched list. |
452 | */ |
453 | kevs++; |
454 | |
455 | /* |
456 | * Fetch current attributes. It's only needed when the vnode |
457 | * is not watched yet, but we need to do this without lock |
458 | * held. This is likely cheap due to attrcache, so do it now. |
459 | */ |
460 | memset(&attr, 0, sizeof(attr)); |
461 | vn_lock(vp, LK_SHARED | LK_RETRY); |
462 | (void) VOP_GETATTR(vp, &attr, l->l_cred); |
463 | VOP_UNLOCK(vp); |
464 | |
465 | /* ensure the handler is running */ |
466 | /* XXX this is unreliable. */ |
467 | if (!again) { |
468 | mutex_init(&smbkq_lock, MUTEX_DEFAULT, IPL_NONE); |
469 | } |
470 | if (!smbkql) { |
471 | /* XXX very fishy */ |
472 | error = kthread_create(PRI_NONE, 0, NULL, smbfs_kqpoll, |
473 | NULL, &smbkql, "smbkq" ); |
474 | smb_makescred(&smbkq_scred, smbkql, smbkql->l_cred); |
475 | if (error) { |
476 | kevs--; |
477 | return (error); |
478 | } |
479 | } |
480 | |
481 | /* |
482 | * Allocate new kev. It's more probable it will be needed, |
483 | * and the malloc is cheaper than scanning possibly |
484 | * large kevlist list second time after malloc. |
485 | */ |
486 | ken = kmem_alloc(sizeof(*ken), KM_SLEEP); |
487 | |
488 | /* Check the list and insert new entry */ |
489 | mutex_enter(&smbkq_lock); |
490 | SLIST_FOREACH(ke, &kevlist, kev_link) { |
491 | if (ke->vp == vp) |
492 | break; |
493 | } |
494 | |
495 | if (ke) { |
496 | /* already watched, so just bump usecount */ |
497 | ke->usecount++; |
498 | kmem_free(ken, sizeof(*ken)); |
499 | } else { |
500 | /* need a new one */ |
501 | memset(ken, 0, sizeof(*ken)); |
502 | ke = ken; |
503 | ke->vp = vp; |
504 | ke->usecount = 1; |
505 | ke->flags = (dnot) ? KEVQ_DNOT : 0; |
506 | ke->omtime = attr.va_mtime; |
507 | ke->octime = attr.va_ctime; |
508 | ke->onlink = attr.va_nlink; |
509 | |
510 | if (dnot) { |
511 | int s; |
512 | |
513 | /* |
514 | * Add kevent to list of 'need attend' kevnets. |
515 | * The handler will pick it up and setup request |
516 | * appropriately. |
517 | */ |
518 | s = splnet(); |
519 | SLIST_INSERT_HEAD(&kdnlist, ke, k_link); |
520 | splx(s); |
521 | dnot_num++; |
522 | } else { |
523 | /* add to poll list */ |
524 | SLIST_INSERT_HEAD(&kplist, ke, k_link); |
525 | } |
526 | |
527 | SLIST_INSERT_HEAD(&kevlist, ke, kev_link); |
528 | |
529 | /* kick the handler */ |
530 | wakeup(smbkql); |
531 | } |
532 | |
533 | mutex_enter(vp->v_interlock); |
534 | SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext); |
535 | kn->kn_hook = ke; |
536 | mutex_exit(vp->v_interlock); |
537 | |
538 | mutex_exit(&smbkq_lock); |
539 | |
540 | return (0); |
541 | } |
542 | |