1/* $NetBSD: smbfs_kq.c,v 1.26 2013/10/17 21:04:44 christos Exp $ */
2
3/*-
4 * Copyright (c) 2003, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: smbfs_kq.c,v 1.26 2013/10/17 21:04:44 christos Exp $");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/namei.h>
38#include <sys/kernel.h>
39#include <sys/proc.h>
40#include <sys/buf.h>
41#include <sys/fcntl.h>
42#include <sys/mount.h>
43#include <sys/unistd.h>
44#include <sys/vnode.h>
45#include <sys/lockf.h>
46#include <sys/kmem.h>
47#include <sys/kthread.h>
48#include <sys/file.h>
49#include <sys/dirent.h>
50#include <sys/mallocvar.h>
51
52#include <machine/limits.h>
53
54#include <uvm/uvm.h>
55#include <uvm/uvm_extern.h>
56
57#include <netsmb/smb.h>
58#include <netsmb/smb_conn.h>
59#include <netsmb/smb_subr.h>
60#include <netsmb/smb_rq.h>
61
62#include <fs/smbfs/smbfs.h>
63#include <fs/smbfs/smbfs_node.h>
64#include <fs/smbfs/smbfs_subr.h>
65
66#include <miscfs/genfs/genfs.h>
67
68/*
69 * The maximum of outstanding SMB requests is 65536, since the
70 * message id is 16bit. Don't consume all. If there is more
71 * than 30k directory notify requests, fall back to polling mode.
72 */
73#define DNOTIFY_MAX 30000
74
75struct kevq {
76 SLIST_ENTRY(kevq) kev_link; /* link on kevlist */
77 SLIST_ENTRY(kevq) k_link; /* link on poll/dn list */
78
79 struct vnode *vp;
80 u_int usecount;
81 u_int flags;
82#define KEVQ_BUSY 0x01 /* currently being processed */
83#define KEVQ_WANT 0x02 /* want to change this entry */
84#define KEVQ_DNOT 0x04 /* kevent using NT directory change notify */
85 struct timespec omtime; /* old modification time */
86 struct timespec octime; /* old change time */
87 nlink_t onlink; /* old number of references to file */
88 struct smb_rq *rq; /* request structure */
89};
90
91static struct lwp *smbkql; /* the kevent handler */
92static struct smb_cred smbkq_scred;
93
94static kmutex_t smbkq_lock;
95 /* guard access to k*evlist */
96static SLIST_HEAD(, kevq) kevlist = SLIST_HEAD_INITIALIZER(kevlist);
97static SLIST_HEAD(, kevq) kplist = SLIST_HEAD_INITIALIZER(kplist);
98static SLIST_HEAD(, kevq) kdnlist = SLIST_HEAD_INITIALIZER(kdnlist);
99
100static int dnot_num = 0; /* number of active dir notifications */
101static u_int32_t kevs;
102
103static void smbfskq_dirnotify(void *);
104
105/*
106 * This routine periodically checks server for change
107 * of any of the watched files every SMBFS_MINATTRTIME/2 seconds.
108 * Only changes in size, modification time, change time and nlinks
109 * are being checked, everything else is ignored.
110 * Directory events are watched via NT DIRECTORY CHANGE NOTIFY
111 * if the server supports it.
112 *
113 * The routine only calls VOP_GETATTR() when it's likely it would get
114 * some new data, i.e. when the vnode expires from attrcache. This
115 * should give same result as periodically running stat(2) from userland,
116 * while keeping CPU/network usage low, and still provide proper kevent
117 * semantics.
118 * The poller thread is created when first vnode is added to watch list,
119 * and exits when the watch list is empty. The overhead of thread creation
120 * isn't really important, neither speed of attach and detach of knote.
121 */
122/* ARGSUSED */
123static void
124smbfs_kqpoll(void *arg)
125{
126 struct kevq *ke;
127 struct vattr attr;
128 int error = 0;
129 struct lwp *l;
130 u_quad_t osize;
131 int needwake;
132
133 l = curlwp;
134
135 mutex_enter(&smbkq_lock);
136 for(;;) {
137 /* check all entries on poll list for changes */
138 SLIST_FOREACH(ke, &kplist, k_link) {
139 /* skip if still in attrcache */
140 if (smbfs_attr_cachelookup(ke->vp, &attr) != ENOENT)
141 continue;
142
143 /*
144 * Mark entry busy, release lock and check
145 * for changes.
146 */
147 ke->flags |= KEVQ_BUSY;
148 mutex_exit(&smbkq_lock);
149
150 /* save v_size, smbfs_getattr() updates it */
151 osize = ke->vp->v_size;
152
153 vn_lock(ke->vp, LK_SHARED | LK_RETRY);
154 error = VOP_GETATTR(ke->vp, &attr, l->l_cred);
155 VOP_UNLOCK(ke->vp);
156 if (error) {
157 /* relock and proceed with next */
158 mutex_enter(&smbkq_lock);
159 continue;
160 }
161
162 /* following is a bit fragile, but about best
163 * we can get */
164 if (ke->vp->v_type != VDIR && attr.va_size != osize) {
165 int extended = (attr.va_size > osize);
166 VN_KNOTE(ke->vp, NOTE_WRITE
167 | (extended ? NOTE_EXTEND : 0));
168 ke->omtime = attr.va_mtime;
169 } else if (attr.va_mtime.tv_sec != ke->omtime.tv_sec
170 || attr.va_mtime.tv_nsec != ke->omtime.tv_nsec) {
171 VN_KNOTE(ke->vp, NOTE_WRITE);
172 ke->omtime = attr.va_mtime;
173 }
174
175 if (attr.va_ctime.tv_sec != ke->octime.tv_sec
176 || attr.va_ctime.tv_nsec != ke->octime.tv_nsec) {
177 VN_KNOTE(ke->vp, NOTE_ATTRIB);
178 ke->octime = attr.va_ctime;
179 }
180
181 if (attr.va_nlink != ke->onlink) {
182 VN_KNOTE(ke->vp, NOTE_LINK);
183 ke->onlink = attr.va_nlink;
184 }
185
186 mutex_enter(&smbkq_lock);
187 ke->flags &= ~KEVQ_BUSY;
188 if (ke->flags & KEVQ_WANT) {
189 ke->flags &= ~KEVQ_WANT;
190 wakeup(ke);
191 }
192 }
193
194 /* Exit if there are no more kevents to watch for */
195 if (kevs == 0) {
196 smbkql = NULL;
197 break;
198 }
199
200 /* only wake periodically if poll list is nonempty */
201 needwake = !SLIST_EMPTY(&kplist);
202
203 /* wait a while before checking for changes again */
204 if (SLIST_EMPTY(&kdnlist)) {
205 error = mtsleep(smbkql, PSOCK, "smbkqidl",
206 needwake ? (SMBFS_ATTRTIMO * hz / 2) : 0,
207 &smbkq_lock);
208 }
209
210 if (!error) {
211 /* woken up, check if any pending notifications */
212 while (!SLIST_EMPTY(&kdnlist)) {
213 int s, hint;
214
215 s = splnet();
216 ke = SLIST_FIRST(&kdnlist);
217 SLIST_REMOVE_HEAD(&kdnlist, k_link);
218 SLIST_NEXT(ke, k_link) = NULL;
219 splx(s);
220
221 /* drop lock while processing */
222 mutex_exit(&smbkq_lock);
223
224 /*
225 * Skip fetch if not yet setup.
226 */
227 if (__predict_false(ke->rq == NULL))
228 goto notifyrq;
229
230 error = smbfs_smb_nt_dirnotify_fetch(ke->rq,
231 &hint);
232 ke->rq = NULL; /* rq deallocated by now */
233 if (error) {
234 /*
235 * if there is error, switch to
236 * polling for this one
237 */
238 ke->flags &= KEVQ_DNOT;
239 SLIST_INSERT_HEAD(&kplist, ke, k_link);
240 continue;
241 }
242
243 VN_KNOTE(ke->vp, hint);
244
245 notifyrq:
246 /* reissue the notify request */
247 (void) smbfs_smb_nt_dirnotify_setup(
248 VTOSMB(ke->vp),
249 &ke->rq, &smbkq_scred,
250 smbfskq_dirnotify, ke);
251
252 /* reacquire the lock */
253 mutex_enter(&smbkq_lock);
254 }
255 }
256 }
257 mutex_exit(&smbkq_lock);
258
259 kthread_exit(0);
260}
261
262static void
263smbfskq_dirnotify(void *arg)
264{
265 struct kevq *ke = arg;
266
267 if (SLIST_NEXT(ke, k_link)) {
268 /* already on notify list */
269 return;
270 }
271
272 SLIST_INSERT_HEAD(&kdnlist, ke, k_link);
273 wakeup(smbkql);
274}
275
276static void
277filt_smbfsdetach(struct knote *kn)
278{
279 struct kevq *ke = (struct kevq *)kn->kn_hook;
280 struct vnode *vp = ke->vp;
281 struct smb_rq *rq = NULL;
282
283 mutex_enter(vp->v_interlock);
284 SLIST_REMOVE(&ke->vp->v_klist, kn, knote, kn_selnext);
285 mutex_exit(vp->v_interlock);
286
287 /* Remove the vnode from watch list */
288 mutex_enter(&smbkq_lock);
289
290 /* the handler does something to it, wait */
291 while (ke->flags & KEVQ_BUSY) {
292 ke->flags |= KEVQ_WANT;
293 mtsleep(ke, PSOCK, "smbkqdw", 0, &smbkq_lock);
294 }
295
296 if (ke->usecount > 1) {
297 /* keep, other kevents need this */
298 ke->usecount--;
299 } else {
300 /* last user, g/c */
301 if (ke->flags & KEVQ_DNOT) {
302 dnot_num--;
303 rq = ke->rq;
304
305 /* If on dirnotify list, remove */
306 if (SLIST_NEXT(ke, k_link))
307 SLIST_REMOVE(&kdnlist, ke, kevq, k_link);
308 } else
309 SLIST_REMOVE(&kplist, ke, kevq, k_link);
310 SLIST_REMOVE(&kevlist, ke, kevq, kev_link);
311 kmem_free(ke, sizeof(*ke));
312 }
313 kevs--;
314
315 mutex_exit(&smbkq_lock);
316
317 /* If there was request still pending, cancel it now */
318 if (rq) {
319 smb_iod_removerq(rq);
320
321 /*
322 * Explicitly cancel the request, so that server can
323 * free directory change notify resources.
324 */
325 smbfs_smb_ntcancel(SSTOCP(rq->sr_share), rq->sr_mid,
326 &smbkq_scred);
327
328 /* Free */
329 smb_rq_done(rq);
330 }
331}
332
333static int
334filt_smbfsread(struct knote *kn, long hint)
335{
336 struct kevq *ke = (struct kevq *)kn->kn_hook;
337 struct vnode *vp = ke->vp;
338 int rv;
339
340 if (hint == NOTE_REVOKE) {
341 /*
342 * filesystem is gone, so set the EOF flag and schedule
343 * the knote for deletion.
344 */
345 KASSERT(mutex_owned(vp->v_interlock));
346 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
347 return (1);
348 }
349
350 /* There is no size info for directories */
351 if (hint == 0) {
352 mutex_enter(vp->v_interlock);
353 } else {
354 KASSERT(mutex_owned(vp->v_interlock));
355 }
356 if (vp->v_type == VDIR) {
357 /*
358 * This is kind of hackish, since we need to
359 * set the flag when we are called with the hint
360 * to make confirming call from kern_event.c
361 * succeed too, but need to unset it afterwards
362 * so that the directory wouldn't stay flagged
363 * as changed.
364 * XXX perhaps just fail for directories?
365 */
366 if (hint & NOTE_WRITE) {
367 kn->kn_fflags |= NOTE_WRITE;
368 rv = (1 * sizeof(struct dirent));
369 } else if (hint == 0 && (kn->kn_fflags & NOTE_WRITE)) {
370 kn->kn_fflags &= ~NOTE_WRITE;
371 rv = (1 * sizeof(struct dirent));
372 } else
373 rv = 0;
374 } else {
375 kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
376 rv = (kn->kn_data != 0);
377 }
378 if (hint == 0) {
379 mutex_enter(vp->v_interlock);
380 }
381
382 return rv;
383}
384
385static int
386filt_smbfsvnode(struct knote *kn, long hint)
387{
388 struct kevq *ke = (struct kevq *)kn->kn_hook;
389 struct vnode *vp = ke->vp;
390
391 switch (hint) {
392 case NOTE_REVOKE:
393 KASSERT(mutex_owned(vp->v_interlock));
394 kn->kn_flags |= EV_EOF;
395 if ((kn->kn_sfflags & hint) != 0)
396 kn->kn_fflags |= hint;
397 return (1);
398 case 0:
399 mutex_enter(vp->v_interlock);
400 mutex_exit(vp->v_interlock);
401 break;
402 default:
403 KASSERT(mutex_owned(vp->v_interlock));
404 if ((kn->kn_sfflags & hint) != 0)
405 kn->kn_fflags |= hint;
406 break;
407 }
408
409 return (kn->kn_fflags != 0);
410}
411
412static const struct filterops smbfsread_filtops =
413 { 1, NULL, filt_smbfsdetach, filt_smbfsread };
414static const struct filterops smbfsvnode_filtops =
415 { 1, NULL, filt_smbfsdetach, filt_smbfsvnode };
416
417int
418smbfs_kqfilter(void *v)
419{
420 struct vop_kqfilter_args /* {
421 struct vnode *a_vp;
422 struct knote *a_kn;
423 } */ *ap = v;
424 struct vnode *vp = ap->a_vp;
425 struct knote *kn = ap->a_kn;
426 struct kevq *ke, *ken;
427 int error = 0;
428 struct vattr attr;
429 struct lwp *l = curlwp; /* XXX */
430 int dnot;
431 struct smb_vc *vcp = SSTOVC(VTOSMB(vp)->n_mount->sm_share);
432 static bool again;
433
434 switch (kn->kn_filter) {
435 case EVFILT_READ:
436 kn->kn_fop = &smbfsread_filtops;
437 break;
438 case EVFILT_VNODE:
439 kn->kn_fop = &smbfsvnode_filtops;
440 break;
441 default:
442 return (EINVAL);
443 }
444
445 /* Find out if we can use directory change notify for this file */
446 dnot = (vp->v_type == VDIR
447 && (SMB_CAPS(vcp) & SMB_CAP_NT_SMBS)
448 && dnot_num < DNOTIFY_MAX);
449
450 /*
451 * Put the vnode to watched list.
452 */
453 kevs++;
454
455 /*
456 * Fetch current attributes. It's only needed when the vnode
457 * is not watched yet, but we need to do this without lock
458 * held. This is likely cheap due to attrcache, so do it now.
459 */
460 memset(&attr, 0, sizeof(attr));
461 vn_lock(vp, LK_SHARED | LK_RETRY);
462 (void) VOP_GETATTR(vp, &attr, l->l_cred);
463 VOP_UNLOCK(vp);
464
465 /* ensure the handler is running */
466 /* XXX this is unreliable. */
467 if (!again) {
468 mutex_init(&smbkq_lock, MUTEX_DEFAULT, IPL_NONE);
469 }
470 if (!smbkql) {
471 /* XXX very fishy */
472 error = kthread_create(PRI_NONE, 0, NULL, smbfs_kqpoll,
473 NULL, &smbkql, "smbkq");
474 smb_makescred(&smbkq_scred, smbkql, smbkql->l_cred);
475 if (error) {
476 kevs--;
477 return (error);
478 }
479 }
480
481 /*
482 * Allocate new kev. It's more probable it will be needed,
483 * and the malloc is cheaper than scanning possibly
484 * large kevlist list second time after malloc.
485 */
486 ken = kmem_alloc(sizeof(*ken), KM_SLEEP);
487
488 /* Check the list and insert new entry */
489 mutex_enter(&smbkq_lock);
490 SLIST_FOREACH(ke, &kevlist, kev_link) {
491 if (ke->vp == vp)
492 break;
493 }
494
495 if (ke) {
496 /* already watched, so just bump usecount */
497 ke->usecount++;
498 kmem_free(ken, sizeof(*ken));
499 } else {
500 /* need a new one */
501 memset(ken, 0, sizeof(*ken));
502 ke = ken;
503 ke->vp = vp;
504 ke->usecount = 1;
505 ke->flags = (dnot) ? KEVQ_DNOT : 0;
506 ke->omtime = attr.va_mtime;
507 ke->octime = attr.va_ctime;
508 ke->onlink = attr.va_nlink;
509
510 if (dnot) {
511 int s;
512
513 /*
514 * Add kevent to list of 'need attend' kevnets.
515 * The handler will pick it up and setup request
516 * appropriately.
517 */
518 s = splnet();
519 SLIST_INSERT_HEAD(&kdnlist, ke, k_link);
520 splx(s);
521 dnot_num++;
522 } else {
523 /* add to poll list */
524 SLIST_INSERT_HEAD(&kplist, ke, k_link);
525 }
526
527 SLIST_INSERT_HEAD(&kevlist, ke, kev_link);
528
529 /* kick the handler */
530 wakeup(smbkql);
531 }
532
533 mutex_enter(vp->v_interlock);
534 SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
535 kn->kn_hook = ke;
536 mutex_exit(vp->v_interlock);
537
538 mutex_exit(&smbkq_lock);
539
540 return (0);
541}
542