[PATCH] afs: Fix file locking
Jeff Layton
jlayton at redhat.com
Wed Nov 15 03:48:02 PST 2017
On Wed, 2017-11-15 at 10:30 +0000, David Howells wrote:
> Fix the AFS file locking whereby the use of the big kernel lock (which
> could be slept with) was replaced by a spinlock (which couldn't). The
> problem is that the AFS code was doing stuff inside the critical section
> that might call schedule(), so this is a broken transformation.
>
> Fix this by the following means:
>
> (1) Use a state machine with a proper state that can only be changed under
> the spinlock rather than using a collection of bit flags.
>
> (2) Cache the key used for the lock and the lock type in the afs_vnode
> struct so that the manager work function doesn't have to refer to a
> file_lock struct that's been dequeued. This makes signal handling
> safer.
>
> (4) Move the unlock from afs_do_unlk() to afs_fl_release_private() which
> means that unlock is achieved in other circumstances too.
>
> (5) Unlock the file on the server before taking the next conflicting lock.
>
> Also change:
>
> (1) Check the permits on a file before actually trying the lock.
>
> (2) fsync the file before effecting an explicit unlock operation. We
> don't fsync if the lock is erased otherwise as we might not be in a
> context where we can actually do that.
>
> Further fixes:
>
> (1) Fixed-fileserver address rotation is made to work. It's only used by
> the locking functions, so couldn't be tested before.
>
>
> Fixes: 1c8c601a8c0d ("locks: protect most of the file_lock handling with i_lock")
This was broken before then, actually.
Fixes: 72f98e72551f (locks: turn lock_flocks into a spinlock)
> Signed-off-by: David Howells <dhowells at redhat.com>
> cc: jlayton at redhat.com
> ---
>
> fs/afs/flock.c | 548 ++++++++++++++++++++++++++++----------------------
> fs/afs/internal.h | 23 +-
> fs/afs/rotate.c | 70 +++++-
> fs/afs/security.c | 4
> fs/afs/server_list.c | 2
> 5 files changed, 385 insertions(+), 262 deletions(-)
>
> diff --git a/fs/afs/flock.c b/fs/afs/flock.c
> index 7571a5dfd5a3..c40ba2fe3cbe 100644
> --- a/fs/afs/flock.c
> +++ b/fs/afs/flock.c
> @@ -170,7 +170,7 @@ void afs_lock_work(struct work_struct *work)
> {
> struct afs_vnode *vnode =
> container_of(work, struct afs_vnode, lock_work.work);
> - struct file_lock *fl;
> + struct file_lock *fl, *next;
> afs_lock_type_t type;
> struct key *key;
> int ret;
> @@ -179,117 +179,136 @@ void afs_lock_work(struct work_struct *work)
>
> spin_lock(&vnode->lock);
>
> - if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
> +again:
> + _debug("wstate %u for %p", vnode->lock_state, vnode);
> + switch (vnode->lock_state) {
> + case AFS_VNODE_LOCK_NEED_UNLOCK:
> _debug("unlock");
> + vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
> spin_unlock(&vnode->lock);
>
> /* attempt to release the server lock; if it fails, we just
> - * wait 5 minutes and it'll time out anyway */
> - ret = afs_release_lock(vnode, vnode->unlock_key);
> + * wait 5 minutes and it'll expire anyway */
> + ret = afs_release_lock(vnode, vnode->lock_key);
> if (ret < 0)
> printk(KERN_WARNING "AFS:"
> " Failed to release lock on {%x:%x} error %d\n",
> vnode->fid.vid, vnode->fid.vnode, ret);
>
> spin_lock(&vnode->lock);
> - key_put(vnode->unlock_key);
> - vnode->unlock_key = NULL;
> - clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
> - }
> + key_put(vnode->lock_key);
> + vnode->lock_key = NULL;
> + vnode->lock_state = AFS_VNODE_LOCK_NONE;
> +
> + if (list_empty(&vnode->pending_locks)) {
> + spin_unlock(&vnode->lock);
> + return;
> + }
> +
> + /* The new front of the queue now owns the state variables. */
> + next = list_entry(vnode->pending_locks.next,
> + struct file_lock, fl_u.afs.link);
> + vnode->lock_key = afs_file_key(next->fl_file);
> + vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
> + vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
> + goto again;
>
> - /* if we've got a lock, then it must be time to extend that lock as AFS
> - * locks time out after 5 minutes */
> - if (!list_empty(&vnode->granted_locks)) {
> + /* If we've already got a lock, then it must be time to extend that
> + * lock as AFS locks time out after 5 minutes.
> + */
> + case AFS_VNODE_LOCK_GRANTED:
> _debug("extend");
>
> - if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
> - BUG();
> - fl = list_entry(vnode->granted_locks.next,
> - struct file_lock, fl_u.afs.link);
> - key = key_get(afs_file_key(fl->fl_file));
> + ASSERT(!list_empty(&vnode->granted_locks));
> +
> + key = key_get(vnode->lock_key);
> + vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
> spin_unlock(&vnode->lock);
>
> - ret = afs_extend_lock(vnode, key);
> - clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
> + ret = afs_extend_lock(vnode, key); /* RPC */
> key_put(key);
> - switch (ret) {
> - case 0:
> +
> + if (ret < 0)
> + pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
> + vnode->fid.vid, vnode->fid.vnode, ret);
> +
> + spin_lock(&vnode->lock);
> +
> + if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
> + goto again;
> + vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
> +
> + if (ret == 0)
> afs_schedule_lock_extension(vnode);
> - break;
> - default:
> - /* ummm... we failed to extend the lock - retry
> - * extension shortly */
> - printk(KERN_WARNING "AFS:"
> - " Failed to extend lock on {%x:%x} error %d\n",
> - vnode->fid.vid, vnode->fid.vnode, ret);
> + else
> queue_delayed_work(afs_lock_manager, &vnode->lock_work,
> HZ * 10);
> - break;
> - }
> - _leave(" [extend]");
> + spin_unlock(&vnode->lock);
> + _leave(" [ext]");
> return;
> - }
>
> - /* if we don't have a granted lock, then we must've been called back by
> - * the server, and so if might be possible to get a lock we're
> - * currently waiting for */
> - if (!list_empty(&vnode->pending_locks)) {
> + /* If we don't have a granted lock, then we must've been called
> + * back by the server, and so if might be possible to get a
> + * lock we're currently waiting for.
> + */
> + case AFS_VNODE_LOCK_WAITING_FOR_CB:
> _debug("get");
>
> - if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
> - BUG();
> - fl = list_entry(vnode->pending_locks.next,
> - struct file_lock, fl_u.afs.link);
> - key = key_get(afs_file_key(fl->fl_file));
> - type = (fl->fl_type == F_RDLCK) ?
> - AFS_LOCK_READ : AFS_LOCK_WRITE;
> + key = key_get(vnode->lock_key);
> + type = vnode->lock_type;
> + vnode->lock_state = AFS_VNODE_LOCK_SETTING;
> spin_unlock(&vnode->lock);
>
> - ret = afs_set_lock(vnode, key, type);
> - clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
> + ret = afs_set_lock(vnode, key, type); /* RPC */
> + key_put(key);
> +
> + spin_lock(&vnode->lock);
> switch (ret) {
> case -EWOULDBLOCK:
> _debug("blocked");
> break;
> case 0:
> _debug("acquired");
> - if (type == AFS_LOCK_READ)
> - set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
> - else
> - set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
> - ret = AFS_LOCK_GRANTED;
> + vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
> + /* Fall through */
> default:
> - spin_lock(&vnode->lock);
> - /* the pending lock may have been withdrawn due to a
> - * signal */
> - if (list_entry(vnode->pending_locks.next,
> - struct file_lock, fl_u.afs.link) == fl) {
> - fl->fl_u.afs.state = ret;
> - if (ret == AFS_LOCK_GRANTED)
> - afs_grant_locks(vnode, fl);
> - else
> - list_del_init(&fl->fl_u.afs.link);
> - wake_up(&fl->fl_wait);
> - spin_unlock(&vnode->lock);
> - } else {
> + /* Pass the lock or the error onto the first locker in
> + * the list - if they're looking for this type of lock.
> + * If they're not, we assume that whoever asked for it
> + * took a signal.
> + */
> + if (list_empty(&vnode->pending_locks)) {
> _debug("withdrawn");
> - clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
> - clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
> - spin_unlock(&vnode->lock);
> - afs_release_lock(vnode, key);
> - if (!list_empty(&vnode->pending_locks))
> - afs_lock_may_be_available(vnode);
> + vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
> + goto again;
> }
> - break;
> +
> + fl = list_entry(vnode->pending_locks.next,
> + struct file_lock, fl_u.afs.link);
> + type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
> + if (vnode->lock_type != type) {
> + _debug("changed");
> + vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
> + goto again;
> + }
> +
> + fl->fl_u.afs.state = ret;
> + if (ret == 0)
> + afs_grant_locks(vnode, fl);
> + else
> + list_del_init(&fl->fl_u.afs.link);
> + wake_up(&fl->fl_wait);
> + spin_unlock(&vnode->lock);
> + _leave(" [granted]");
> + return;
> }
> - key_put(key);
> - _leave(" [pend]");
> +
> + default:
> + /* Looks like a lock request was withdrawn. */
> + spin_unlock(&vnode->lock);
> + _leave(" [no]");
> return;
> }
> -
> - /* looks like the lock request was withdrawn on a signal */
> - spin_unlock(&vnode->lock);
> - _leave(" [no locks]");
> }
>
> /*
> @@ -298,15 +317,105 @@ void afs_lock_work(struct work_struct *work)
> * AF_RXRPC
> * - the caller must hold the vnode lock
> */
> -static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
> +static void afs_defer_unlock(struct afs_vnode *vnode)
> {
> - cancel_delayed_work(&vnode->lock_work);
> - if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
> - !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
> - BUG();
> - if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
> - BUG();
> - vnode->unlock_key = key_get(key);
> + _enter("");
> +
> + if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
> + vnode->lock_state == AFS_VNODE_LOCK_EXTENDING) {
> + cancel_delayed_work(&vnode->lock_work);
> +
> + vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
> + afs_lock_may_be_available(vnode);
> + }
> +}
> +
> +/*
> + * Check that our view of the file metadata is up to date and check to see
> + * whether we think that we have a locking permit.
> + */
> +static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
> + afs_lock_type_t type, bool can_sleep)
> +{
> + afs_access_t access;
> + int ret;
> +
> + /* Make sure we've got a callback on this file and that our view of the
> + * data version is up to date.
> + */
> + ret = afs_validate(vnode, key);
> + if (ret < 0)
> + return ret;
> +
> + /* Check the permission set to see if we're actually going to be
> + * allowed to get a lock on this file.
> + */
> + ret = afs_check_permit(vnode, key, &access);
> + if (ret < 0)
> + return ret;
> +
> + /* At a rough estimation, you need LOCK, WRITE or INSERT perm to
> + * read-lock a file and WRITE or INSERT perm to write-lock a file.
> + *
> + * We can't rely on the server to do this for us since if we want to
> + * share a read lock that we already have, we won't go the server.
> + */
> + if (type == AFS_LOCK_READ) {
> + if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
> + return -EACCES;
> + if (vnode->status.lock_count == -1 && !can_sleep)
> + return -EAGAIN; /* Write locked */
> + } else {
> + if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
> + return -EACCES;
> + if (vnode->status.lock_count != 0 && !can_sleep)
> + return -EAGAIN; /* Locked */
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Remove the front runner from the pending queue.
> + * - The caller must hold vnode->lock.
> + */
> +static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
> +{
> + struct file_lock *next;
> +
> + _enter("");
> +
> + /* ->lock_type, ->lock_key and ->lock_state only belong to this
> + * file_lock if we're at the front of the pending queue or if we have
> + * the lock granted or if the lock_state is NEED_UNLOCK or UNLOCKING.
> + */
> + if (vnode->granted_locks.next == &fl->fl_u.afs.link &&
> + vnode->granted_locks.prev == &fl->fl_u.afs.link) {
> + list_del_init(&fl->fl_u.afs.link);
> + afs_defer_unlock(vnode);
> + return;
> + }
> +
> + if (!list_empty(&vnode->granted_locks) ||
> + vnode->pending_locks.next != &fl->fl_u.afs.link) {
> + list_del_init(&fl->fl_u.afs.link);
> + return;
> + }
> +
> + list_del_init(&fl->fl_u.afs.link);
> + key_put(vnode->lock_key);
> + vnode->lock_key = NULL;
> + vnode->lock_state = AFS_VNODE_LOCK_NONE;
> +
> + if (list_empty(&vnode->pending_locks))
> + return;
> +
> + /* The new front of the queue now owns the state variables. */
> + next = list_entry(vnode->pending_locks.next,
> + struct file_lock, fl_u.afs.link);
> + vnode->lock_key = afs_file_key(next->fl_file);
> + vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
> + vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
> afs_lock_may_be_available(vnode);
> }
>
> @@ -315,7 +424,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
> */
> static int afs_do_setlk(struct file *file, struct file_lock *fl)
> {
> - struct inode *inode = file_inode(file);
> + struct inode *inode = locks_inode(file);
> struct afs_vnode *vnode = AFS_FS_I(inode);
> afs_lock_type_t type;
> struct key *key = afs_file_key(file);
> @@ -333,165 +442,136 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
>
> type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
>
> - spin_lock(&inode->i_lock);
> -
> - /* make sure we've got a callback on this file and that our view of the
> - * data version is up to date */
> - ret = afs_validate(vnode, key);
> + ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
> if (ret < 0)
> - goto error;
> -
> - if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
> - ret = -EAGAIN;
> - goto error;
> - }
> + return ret;
>
> spin_lock(&vnode->lock);
>
> - /* if we've already got a readlock on the server then we can instantly
> + /* If we've already got a readlock on the server then we instantly
> * grant another readlock, irrespective of whether there are any
> - * pending writelocks */
> + * pending writelocks.
> + */
> if (type == AFS_LOCK_READ &&
> - vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
> + vnode->lock_state == AFS_VNODE_LOCK_GRANTED &&
> + vnode->lock_type == AFS_LOCK_READ) {
> _debug("instant readlock");
> - ASSERTCMP(vnode->flags &
> - ((1 << AFS_VNODE_LOCKING) |
> - (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
> ASSERT(!list_empty(&vnode->granted_locks));
> - goto sharing_existing_lock;
> + goto share_existing_lock;
> }
>
> - /* if there's no-one else with a lock on this vnode, then we need to
> - * ask the server for a lock */
> - if (list_empty(&vnode->pending_locks) &&
> - list_empty(&vnode->granted_locks)) {
> - _debug("not locked");
> - ASSERTCMP(vnode->flags &
> - ((1 << AFS_VNODE_LOCKING) |
> - (1 << AFS_VNODE_READLOCKED) |
> - (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
> - list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
> - set_bit(AFS_VNODE_LOCKING, &vnode->flags);
> - spin_unlock(&vnode->lock);
> + list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
>
> - ret = afs_set_lock(vnode, key, type);
> - clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
> - switch (ret) {
> - case 0:
> - _debug("acquired");
> - goto acquired_server_lock;
> - case -EWOULDBLOCK:
> - _debug("would block");
> - spin_lock(&vnode->lock);
> - ASSERT(list_empty(&vnode->granted_locks));
> - ASSERTCMP(vnode->pending_locks.next, ==,
> - &fl->fl_u.afs.link);
> - goto wait;
> - default:
> - spin_lock(&vnode->lock);
> - list_del_init(&fl->fl_u.afs.link);
> - spin_unlock(&vnode->lock);
> - goto error;
> - }
> - }
> + if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
> + goto need_to_wait;
>
> - /* otherwise, we need to wait for a local lock to become available */
> - _debug("wait local");
> - list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
> -wait:
> - if (!(fl->fl_flags & FL_SLEEP)) {
> - _debug("noblock");
> - ret = -EAGAIN;
> - goto abort_attempt;
> - }
> + /* We don't have a lock on this vnode and we aren't currently waiting
> + * for one either, so ask the server for a lock.
> + *
> + * Note that we need to be careful if we get interrupted by a signal
> + * after dispatching the request as we may still get the lock, even
> + * though we don't wait for the reply (it's not too bad a problem - the
> + * lock will expire in 10 mins anyway).
> + */
> + _debug("not locked");
> + vnode->lock_key = key_get(key);
> + vnode->lock_type = type;
> + vnode->lock_state = AFS_VNODE_LOCK_SETTING;
> spin_unlock(&vnode->lock);
>
> - /* now we need to sleep and wait for the lock manager thread to get the
> - * lock from the server */
> - _debug("sleep");
> - ret = wait_event_interruptible(fl->fl_wait,
> - fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
> - if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
> - ret = fl->fl_u.afs.state;
> - if (ret < 0)
> - goto error;
> - spin_lock(&vnode->lock);
> - goto given_lock;
> - }
> -
> - /* we were interrupted, but someone may still be in the throes of
> - * giving us the lock */
> - _debug("intr");
> - ASSERTCMP(ret, ==, -ERESTARTSYS);
> + ret = afs_set_lock(vnode, key, type); /* RPC */
>
> spin_lock(&vnode->lock);
> - if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
> - ret = fl->fl_u.afs.state;
> - if (ret < 0) {
> - spin_unlock(&vnode->lock);
> - goto error;
> - }
> - goto given_lock;
> - }
> + switch (ret) {
> + default:
> + goto abort_attempt;
>
> -abort_attempt:
> - /* we aren't going to get the lock, either because we're unwilling to
> - * wait, or because some signal happened */
> - _debug("abort");
> - if (list_empty(&vnode->granted_locks) &&
> - vnode->pending_locks.next == &fl->fl_u.afs.link) {
> - if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
> - /* kick the next pending lock into having a go */
> - list_del_init(&fl->fl_u.afs.link);
> - afs_lock_may_be_available(vnode);
> - }
> - } else {
> - list_del_init(&fl->fl_u.afs.link);
> + case -EWOULDBLOCK:
> + /* The server doesn't have a lock-waiting queue, so the client
> + * will have to retry. The server will break the outstanding
> + * callbacks on a file when a lock is released.
> + */
> + _debug("would block");
> + ASSERT(list_empty(&vnode->granted_locks));
> + ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
> + vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
> + goto need_to_wait;
> +
> + case 0:
> + _debug("acquired");
> + break;
> }
> - spin_unlock(&vnode->lock);
> - goto error;
>
> -acquired_server_lock:
> /* we've acquired a server lock, but it needs to be renewed after 5
> * mins */
> - spin_lock(&vnode->lock);
> + vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
> afs_schedule_lock_extension(vnode);
> - if (type == AFS_LOCK_READ)
> - set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
> - else
> - set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
> -sharing_existing_lock:
> +
> +share_existing_lock:
> /* the lock has been granted as far as we're concerned... */
> fl->fl_u.afs.state = AFS_LOCK_GRANTED;
> list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
> +
> given_lock:
> /* ... but we do still need to get the VFS's blessing */
> - ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
> - ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
> - (1 << AFS_VNODE_WRITELOCKED))) != 0);
> + spin_unlock(&vnode->lock);
> +
> ret = posix_lock_file(file, fl, NULL);
> if (ret < 0)
> goto vfs_rejected_lock;
> - spin_unlock(&vnode->lock);
>
> - /* again, make sure we've got a callback on this file and, again, make
> + /* Again, make sure we've got a callback on this file and, again, make
> * sure that our view of the data version is up to date (we ignore
> - * errors incurred here and deal with the consequences elsewhere) */
> + * errors incurred here and deal with the consequences elsewhere).
> + */
> afs_validate(vnode, key);
> + _leave(" = 0");
> + return 0;
>
> -error:
> - spin_unlock(&inode->i_lock);
> +need_to_wait:
> + /* We're going to have to wait. Either this client doesn't have a lock
> + * on the server yet and we need to wait for a callback to occur, or
> + * the client does have a lock on the server, but it belongs to some
> + * other process(es) and is incompatible with the lock we want.
> + */
> + ret = -EAGAIN;
> + if (fl->fl_flags & FL_SLEEP) {
> + spin_unlock(&vnode->lock);
> +
> + _debug("sleep");
> + ret = wait_event_interruptible(fl->fl_wait,
> + fl->fl_u.afs.state != AFS_LOCK_PENDING);
> +
> + spin_lock(&vnode->lock);
> + }
> +
> + if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
> + goto given_lock;
> + if (fl->fl_u.afs.state < 0)
> + ret = fl->fl_u.afs.state;
> +
> +abort_attempt:
> + /* we aren't going to get the lock, either because we're unwilling to
> + * wait, or because some signal happened */
> + _debug("abort");
> + afs_dequeue_lock(vnode, fl);
> +
> +error_unlock:
> + spin_unlock(&vnode->lock);
> _leave(" = %d", ret);
> return ret;
>
> vfs_rejected_lock:
> - /* the VFS rejected the lock we just obtained, so we have to discard
> - * what we just got */
> + /* The VFS rejected the lock we just obtained, so we have to discard
> + * what we just got. We defer this to the lock manager work item to
> + * deal with.
> + */
> _debug("vfs refused %d", ret);
> + spin_lock(&vnode->lock);
> list_del_init(&fl->fl_u.afs.link);
> if (list_empty(&vnode->granted_locks))
> - afs_defer_unlock(vnode, key);
> - goto abort_attempt;
> + afs_defer_unlock(vnode);
> + goto error_unlock;
> }
>
> /*
> @@ -499,34 +579,21 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
> */
> static int afs_do_unlk(struct file *file, struct file_lock *fl)
> {
> - struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
> - struct key *key = afs_file_key(file);
> + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
> int ret;
>
> _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
>
> + /* Flush all pending writes before doing anything with locks. */
> + vfs_fsync(file, 0);
> +
> /* only whole-file unlocks are supported */
> if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
> return -EINVAL;
>
> - fl->fl_ops = &afs_lock_ops;
> - INIT_LIST_HEAD(&fl->fl_u.afs.link);
> - fl->fl_u.afs.state = AFS_LOCK_PENDING;
> -
> - spin_lock(&vnode->lock);
> ret = posix_lock_file(file, fl, NULL);
> - if (ret < 0) {
> - spin_unlock(&vnode->lock);
> - _leave(" = %d [vfs]", ret);
> - return ret;
> - }
> -
> - /* discard the server lock only if all granted locks are gone */
> - if (list_empty(&vnode->granted_locks))
> - afs_defer_unlock(vnode, key);
> - spin_unlock(&vnode->lock);
> - _leave(" = 0");
> - return 0;
> + _leave(" = %d [%u]", ret, vnode->lock_state);
> + return ret;
> }
>
> /*
> @@ -534,7 +601,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
> */
> static int afs_do_getlk(struct file *file, struct file_lock *fl)
> {
> - struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
> + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
> struct key *key = afs_file_key(file);
> int ret, lock_count;
>
> @@ -542,29 +609,25 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
>
> fl->fl_type = F_UNLCK;
>
> - inode_lock(&vnode->vfs_inode);
> -
> /* check local lock records first */
> - ret = 0;
> posix_test_lock(file, fl);
> if (fl->fl_type == F_UNLCK) {
> /* no local locks; consult the server */
> ret = afs_fetch_status(vnode, key);
> if (ret < 0)
> goto error;
> - lock_count = vnode->status.lock_count;
> - if (lock_count) {
> - if (lock_count > 0)
> - fl->fl_type = F_RDLCK;
> - else
> - fl->fl_type = F_WRLCK;
> - fl->fl_start = 0;
> - fl->fl_end = OFFSET_MAX;
> - }
> +
> + lock_count = READ_ONCE(vnode->status.lock_count);
> + if (lock_count > 0)
> + fl->fl_type = F_RDLCK;
> + else
> + fl->fl_type = F_WRLCK;
> + fl->fl_start = 0;
> + fl->fl_end = OFFSET_MAX;
> }
>
> + ret = 0;
> error:
> - inode_unlock(&vnode->vfs_inode);
> _leave(" = %d [%hd]", ret, fl->fl_type);
> return ret;
> }
> @@ -574,7 +637,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
> */
> int afs_lock(struct file *file, int cmd, struct file_lock *fl)
> {
> - struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
> + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
>
> _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
> vnode->fid.vid, vnode->fid.vnode, cmd,
> @@ -597,7 +660,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
> */
> int afs_flock(struct file *file, int cmd, struct file_lock *fl)
> {
> - struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
> + struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
>
> _enter("{%x:%u},%d,{t=%x,fl=%x}",
> vnode->fid.vid, vnode->fid.vnode, cmd,
> @@ -627,9 +690,13 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
> */
> static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
> {
> + struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
> +
> _enter("");
>
> + spin_lock(&vnode->lock);
> list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
> + spin_unlock(&vnode->lock);
> }
>
> /*
> @@ -638,7 +705,12 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
> */
> static void afs_fl_release_private(struct file_lock *fl)
> {
> + struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
> +
> _enter("");
>
> - list_del_init(&fl->fl_u.afs.link);
> + spin_lock(&vnode->lock);
> + afs_dequeue_lock(vnode, fl);
> + _debug("state %u for %p", vnode->lock_state, vnode);
> + spin_unlock(&vnode->lock);
> }
I don't know about this...unlocking the lock on the server from
fl_release_private is rather unusual.
fl_release_private is called as we're freeing a file_lock of any sort.
That includes both lock requests and lock records. Not all of those
necessarily correspond to a lock on the server.
> diff --git a/fs/afs/internal.h b/fs/afs/internal.h
> index bd8dcee7e066..e03910cebdd4 100644
> --- a/fs/afs/internal.h
> +++ b/fs/afs/internal.h
> @@ -430,6 +430,16 @@ struct afs_volume {
> u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
> };
>
> +enum afs_lock_state {
> + AFS_VNODE_LOCK_NONE, /* The vnode has no lock on the server */
> + AFS_VNODE_LOCK_WAITING_FOR_CB, /* We're waiting for the server to break the callback */
> + AFS_VNODE_LOCK_SETTING, /* We're asking the server for a lock */
> + AFS_VNODE_LOCK_GRANTED, /* We have a lock on the server */
> + AFS_VNODE_LOCK_EXTENDING, /* We're extending a lock on the server */
> + AFS_VNODE_LOCK_NEED_UNLOCK, /* We need to unlock on the server */
> + AFS_VNODE_LOCK_UNLOCKING, /* We're telling the server to unlock */
> +};
> +
> /*
> * AFS inode private data
> */
> @@ -454,18 +464,16 @@ struct afs_vnode {
> #define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
> #define AFS_VNODE_DELETED 4 /* set if vnode deleted on server */
> #define AFS_VNODE_MOUNTPOINT 5 /* set if vnode is a mountpoint symlink */
> -#define AFS_VNODE_LOCKING 6 /* set if waiting for lock on vnode */
> -#define AFS_VNODE_READLOCKED 7 /* set if vnode is read-locked on the server */
> -#define AFS_VNODE_WRITELOCKED 8 /* set if vnode is write-locked on the server */
> -#define AFS_VNODE_UNLOCKING 9 /* set if vnode is being unlocked on the server */
> -#define AFS_VNODE_AUTOCELL 10 /* set if Vnode is an auto mount point */
> -#define AFS_VNODE_PSEUDODIR 11 /* set if Vnode is a pseudo directory */
> +#define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */
> +#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
>
> struct list_head wb_keys; /* List of keys available for writeback */
> struct list_head pending_locks; /* locks waiting to be granted */
> struct list_head granted_locks; /* locks granted on this file */
> struct delayed_work lock_work; /* work to be done in locking */
> - struct key *unlock_key; /* key to be used in unlocking */
> + struct key *lock_key; /* Key to be used in lock ops */
> + enum afs_lock_state lock_state : 8;
> + afs_lock_type_t lock_type : 8;
>
> /* outstanding callback notification on this file */
> struct afs_cb_interest *cb_interest; /* Server on which this resides */
> @@ -843,6 +851,7 @@ extern void afs_clear_permits(struct afs_vnode *);
> extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
> extern void afs_zap_permits(struct rcu_head *);
> extern struct key *afs_request_key(struct afs_cell *);
> +extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
> extern int afs_permission(struct inode *, int);
> extern void __exit afs_clean_up_permit_cache(void);
>
> diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
> index e728ca1776c9..d04511fb3879 100644
> --- a/fs/afs/rotate.c
> +++ b/fs/afs/rotate.c
> @@ -46,8 +46,7 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
> return false;
> }
>
> - if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
> - test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
> + if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
> fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
> return true;
> }
> @@ -117,7 +116,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
> case VSALVAGING: m = "being salvaged"; break;
> default: m = "busy"; break;
> }
> -
> +
> pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
> }
>
> @@ -438,24 +437,67 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
>
> _enter("");
>
> - if (!cbi) {
> - fc->ac.error = -ESTALE;
> + switch (fc->ac.error) {
> + case SHRT_MAX:
> + if (!cbi) {
> + fc->ac.error = -ESTALE;
> + fc->flags |= AFS_FS_CURSOR_STOP;
> + return false;
> + }
> +
> + fc->cbi = afs_get_cb_interest(vnode->cb_interest);
> +
> + read_lock(&cbi->server->fs_lock);
> + alist = rcu_dereference_protected(cbi->server->addresses,
> + lockdep_is_held(&cbi->server->fs_lock));
> + afs_get_addrlist(alist);
> + read_unlock(&cbi->server->fs_lock);
> + if (!alist) {
> + fc->ac.error = -ESTALE;
> + fc->flags |= AFS_FS_CURSOR_STOP;
> + return false;
> + }
> +
> + fc->ac.alist = alist;
> + fc->ac.addr = NULL;
> + fc->ac.start = READ_ONCE(alist->index);
> + fc->ac.index = fc->ac.start;
> + fc->ac.error = 0;
> + fc->ac.begun = false;
> + goto iterate_address;
> +
> + case 0:
> + default:
> + /* Success or local failure. Stop. */
> fc->flags |= AFS_FS_CURSOR_STOP;
> + _leave(" = f [okay/local %d]", fc->ac.error);
> return false;
> - }
>
> - read_lock(&cbi->server->fs_lock);
> - alist = afs_get_addrlist(cbi->server->addresses);
> - read_unlock(&cbi->server->fs_lock);
> - if (!alist) {
> - fc->ac.error = -ESTALE;
> + case -ECONNABORTED:
> fc->flags |= AFS_FS_CURSOR_STOP;
> + _leave(" = f [abort]");
> return false;
> +
> + case -ENETUNREACH:
> + case -EHOSTUNREACH:
> + case -ECONNREFUSED:
> + case -ETIMEDOUT:
> + case -ETIME:
> + _debug("no conn");
> + goto iterate_address;
> }
>
> - fc->ac.alist = alist;
> - fc->ac.error = 0;
> - return true;
> +iterate_address:
> + /* Iterate over the current server's address list to try and find an
> + * address on which it will respond to us.
> + */
> + if (afs_iterate_addresses(&fc->ac)) {
> + _leave(" = t");
> + return true;
> + }
> +
> + afs_end_cursor(&fc->ac);
> + return false;
> }
>
> /*
> diff --git a/fs/afs/security.c b/fs/afs/security.c
> index 46a881a4d08f..2b00097101b3 100644
> --- a/fs/afs/security.c
> +++ b/fs/afs/security.c
> @@ -284,8 +284,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
> * permitted to be accessed with this authorisation, and if so, what access it
> * is granted
> */
> -static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
> - afs_access_t *_access)
> +int afs_check_permit(struct afs_vnode *vnode, struct key *key,
> + afs_access_t *_access)
> {
> struct afs_permits *permits;
> bool valid = false;
> diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
> index 26bad7032bba..0ab3f8457839 100644
> --- a/fs/afs/server_list.c
> +++ b/fs/afs/server_list.c
> @@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
> {
> int i;
>
> - if (refcount_dec_and_test(&slist->usage)) {
> + if (slist && refcount_dec_and_test(&slist->usage)) {
> for (i = 0; i < slist->nr_servers; i++) {
> afs_put_cb_interest(net, slist->servers[i].cb_interest);
> afs_put_server(net, slist->servers[i].server);
>
--
Jeff Layton <jlayton at redhat.com>
More information about the linux-afs
mailing list