[PATCH] afs: Fix file locking

Jeff Layton jlayton at redhat.com
Wed Nov 15 03:48:02 PST 2017


On Wed, 2017-11-15 at 10:30 +0000, David Howells wrote:
> Fix the AFS file locking whereby the use of the big kernel lock (which
> could be slept with) was replaced by a spinlock (which couldn't).  The
> problem is that the AFS code was doing stuff inside the critical section
> that might call schedule(), so this is a broken transformation.
> 
> Fix this by the following means:
> 
>  (1) Use a state machine with a proper state that can only be changed under
>      the spinlock rather than using a collection of bit flags.
> 
>  (2) Cache the key used for the lock and the lock type in the afs_vnode
>      struct so that the manager work function doesn't have to refer to a
>      file_lock struct that's been dequeued.  This makes signal handling
>      safer.
> 
>  (4) Move the unlock from afs_do_unlk() to afs_fl_release_private() which
>      means that unlock is achieved in other circumstances too.
> 
>  (5) Unlock the file on the server before taking the next conflicting lock.
> 
> Also change:
> 
>  (1) Check the permits on a file before actually trying the lock.
> 
>  (2) fsync the file before effecting an explicit unlock operation.  We
>      don't fsync if the lock is erased otherwise as we might not be in a
>      context where we can actually do that.
> 
> Further fixes:
> 
>  (1) Fixed-fileserver address rotation is made to work.  It's only used by
>      the locking functions, so couldn't be tested before.
> 
> 
> Fixes: 1c8c601a8c0d ("locks: protect most of the file_lock handling with i_lock")

This was broken before then, actually.

Fixes: 72f98e72551f (locks: turn lock_flocks into a spinlock)

> Signed-off-by: David Howells <dhowells at redhat.com>
> cc: jlayton at redhat.com
> ---
> 
>  fs/afs/flock.c       |  548 ++++++++++++++++++++++++++++----------------------
>  fs/afs/internal.h    |   23 +-
>  fs/afs/rotate.c      |   70 +++++-
>  fs/afs/security.c    |    4 
>  fs/afs/server_list.c |    2 
>  5 files changed, 385 insertions(+), 262 deletions(-)
> 
> diff --git a/fs/afs/flock.c b/fs/afs/flock.c
> index 7571a5dfd5a3..c40ba2fe3cbe 100644
> --- a/fs/afs/flock.c
> +++ b/fs/afs/flock.c
> @@ -170,7 +170,7 @@ void afs_lock_work(struct work_struct *work)
>  {
>  	struct afs_vnode *vnode =
>  		container_of(work, struct afs_vnode, lock_work.work);
> -	struct file_lock *fl;
> +	struct file_lock *fl, *next;
>  	afs_lock_type_t type;
>  	struct key *key;
>  	int ret;
> @@ -179,117 +179,136 @@ void afs_lock_work(struct work_struct *work)
>  
>  	spin_lock(&vnode->lock);
>  
> -	if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
> +again:
> +	_debug("wstate %u for %p", vnode->lock_state, vnode);
> +	switch (vnode->lock_state) {
> +	case AFS_VNODE_LOCK_NEED_UNLOCK:
>  		_debug("unlock");
> +		vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
>  		spin_unlock(&vnode->lock);
>  
>  		/* attempt to release the server lock; if it fails, we just
> -		 * wait 5 minutes and it'll time out anyway */
> -		ret = afs_release_lock(vnode, vnode->unlock_key);
> +		 * wait 5 minutes and it'll expire anyway */
> +		ret = afs_release_lock(vnode, vnode->lock_key);
>  		if (ret < 0)
>  			printk(KERN_WARNING "AFS:"
>  			       " Failed to release lock on {%x:%x} error %d\n",
>  			       vnode->fid.vid, vnode->fid.vnode, ret);
>  
>  		spin_lock(&vnode->lock);
> -		key_put(vnode->unlock_key);
> -		vnode->unlock_key = NULL;
> -		clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
> -	}
> +		key_put(vnode->lock_key);
> +		vnode->lock_key = NULL;
> +		vnode->lock_state = AFS_VNODE_LOCK_NONE;
> +
> +		if (list_empty(&vnode->pending_locks)) {
> +			spin_unlock(&vnode->lock);
> +			return;
> +		}
> +
> +		/* The new front of the queue now owns the state variables. */
> +		next = list_entry(vnode->pending_locks.next,
> +				  struct file_lock, fl_u.afs.link);
> +		vnode->lock_key = afs_file_key(next->fl_file);
> +		vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
> +		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
> +		goto again;
>  
> -	/* if we've got a lock, then it must be time to extend that lock as AFS
> -	 * locks time out after 5 minutes */
> -	if (!list_empty(&vnode->granted_locks)) {
> +	/* If we've already got a lock, then it must be time to extend that
> +	 * lock as AFS locks time out after 5 minutes.
> +	 */
> +	case AFS_VNODE_LOCK_GRANTED:
>  		_debug("extend");
>  
> -		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
> -			BUG();
> -		fl = list_entry(vnode->granted_locks.next,
> -				struct file_lock, fl_u.afs.link);
> -		key = key_get(afs_file_key(fl->fl_file));
> +		ASSERT(!list_empty(&vnode->granted_locks));
> +
> +		key = key_get(vnode->lock_key);
> +		vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
>  		spin_unlock(&vnode->lock);
>  
> -		ret = afs_extend_lock(vnode, key);
> -		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
> +		ret = afs_extend_lock(vnode, key); /* RPC */
>  		key_put(key);
> -		switch (ret) {
> -		case 0:
> +
> +		if (ret < 0)
> +			pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
> +				   vnode->fid.vid, vnode->fid.vnode, ret);
> +
> +		spin_lock(&vnode->lock);
> +
> +		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
> +			goto again;
> +		vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
> +
> +		if (ret == 0)
>  			afs_schedule_lock_extension(vnode);
> -			break;
> -		default:
> -			/* ummm... we failed to extend the lock - retry
> -			 * extension shortly */
> -			printk(KERN_WARNING "AFS:"
> -			       " Failed to extend lock on {%x:%x} error %d\n",
> -			       vnode->fid.vid, vnode->fid.vnode, ret);
> +		else
>  			queue_delayed_work(afs_lock_manager, &vnode->lock_work,
>  					   HZ * 10);
> -			break;
> -		}
> -		_leave(" [extend]");
> +		spin_unlock(&vnode->lock);
> +		_leave(" [ext]");
>  		return;
> -	}
>  
> -	/* if we don't have a granted lock, then we must've been called back by
> -	 * the server, and so if might be possible to get a lock we're
> -	 * currently waiting for */
> -	if (!list_empty(&vnode->pending_locks)) {
> +		/* If we don't have a granted lock, then we must've been called
> +		 * back by the server, and so if might be possible to get a
> +		 * lock we're currently waiting for.
> +		 */
> +	case AFS_VNODE_LOCK_WAITING_FOR_CB:
>  		_debug("get");
>  
> -		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
> -			BUG();
> -		fl = list_entry(vnode->pending_locks.next,
> -				struct file_lock, fl_u.afs.link);
> -		key = key_get(afs_file_key(fl->fl_file));
> -		type = (fl->fl_type == F_RDLCK) ?
> -			AFS_LOCK_READ : AFS_LOCK_WRITE;
> +		key = key_get(vnode->lock_key);
> +		type = vnode->lock_type;
> +		vnode->lock_state = AFS_VNODE_LOCK_SETTING;
>  		spin_unlock(&vnode->lock);
>  
> -		ret = afs_set_lock(vnode, key, type);
> -		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
> +		ret = afs_set_lock(vnode, key, type); /* RPC */
> +		key_put(key);
> +
> +		spin_lock(&vnode->lock);
>  		switch (ret) {
>  		case -EWOULDBLOCK:
>  			_debug("blocked");
>  			break;
>  		case 0:
>  			_debug("acquired");
> -			if (type == AFS_LOCK_READ)
> -				set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
> -			else
> -				set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
> -			ret = AFS_LOCK_GRANTED;
> +			vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
> +			/* Fall through */
>  		default:
> -			spin_lock(&vnode->lock);
> -			/* the pending lock may have been withdrawn due to a
> -			 * signal */
> -			if (list_entry(vnode->pending_locks.next,
> -				       struct file_lock, fl_u.afs.link) == fl) {
> -				fl->fl_u.afs.state = ret;
> -				if (ret == AFS_LOCK_GRANTED)
> -					afs_grant_locks(vnode, fl);
> -				else
> -					list_del_init(&fl->fl_u.afs.link);
> -				wake_up(&fl->fl_wait);
> -				spin_unlock(&vnode->lock);
> -			} else {
> +			/* Pass the lock or the error onto the first locker in
> +			 * the list - if they're looking for this type of lock.
> +			 * If they're not, we assume that whoever asked for it
> +			 * took a signal.
> +			 */
> +			if (list_empty(&vnode->pending_locks)) {
>  				_debug("withdrawn");
> -				clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
> -				clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
> -				spin_unlock(&vnode->lock);
> -				afs_release_lock(vnode, key);
> -				if (!list_empty(&vnode->pending_locks))
> -					afs_lock_may_be_available(vnode);
> +				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
> +				goto again;
>  			}
> -			break;
> +
> +			fl = list_entry(vnode->pending_locks.next,
> +					struct file_lock, fl_u.afs.link);
> +			type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
> +			if (vnode->lock_type != type) {
> +				_debug("changed");
> +				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
> +				goto again;
> +			}
> +
> +			fl->fl_u.afs.state = ret;
> +			if (ret == 0)
> +				afs_grant_locks(vnode, fl);
> +			else
> +				list_del_init(&fl->fl_u.afs.link);
> +			wake_up(&fl->fl_wait);
> +			spin_unlock(&vnode->lock);
> +			_leave(" [granted]");
> +			return;
>  		}
> -		key_put(key);
> -		_leave(" [pend]");
> +
> +	default:
> +		/* Looks like a lock request was withdrawn. */
> +		spin_unlock(&vnode->lock);
> +		_leave(" [no]");
>  		return;
>  	}
> -
> -	/* looks like the lock request was withdrawn on a signal */
> -	spin_unlock(&vnode->lock);
> -	_leave(" [no locks]");
>  }
>  
>  /*
> @@ -298,15 +317,105 @@ void afs_lock_work(struct work_struct *work)
>   * AF_RXRPC
>   * - the caller must hold the vnode lock
>   */
> -static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
> +static void afs_defer_unlock(struct afs_vnode *vnode)
>  {
> -	cancel_delayed_work(&vnode->lock_work);
> -	if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
> -	    !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
> -		BUG();
> -	if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
> -		BUG();
> -	vnode->unlock_key = key_get(key);
> +	_enter("");
> +
> +	if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
> +	    vnode->lock_state == AFS_VNODE_LOCK_EXTENDING) {
> +		cancel_delayed_work(&vnode->lock_work);
> +
> +		vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
> +		afs_lock_may_be_available(vnode);
> +	}
> +}
> +
> +/*
> + * Check that our view of the file metadata is up to date and check to see
> + * whether we think that we have a locking permit.
> + */
> +static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
> +			      afs_lock_type_t type, bool can_sleep)
> +{
> +	afs_access_t access;
> +	int ret;
> +
> +	/* Make sure we've got a callback on this file and that our view of the
> +	 * data version is up to date.
> +	 */
> +	ret = afs_validate(vnode, key);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Check the permission set to see if we're actually going to be
> +	 * allowed to get a lock on this file.
> +	 */
> +	ret = afs_check_permit(vnode, key, &access);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* At a rough estimation, you need LOCK, WRITE or INSERT perm to
> +	 * read-lock a file and WRITE or INSERT perm to write-lock a file.
> +	 *
> +	 * We can't rely on the server to do this for us since if we want to
> +	 * share a read lock that we already have, we won't go the server.
> +	 */
> +	if (type == AFS_LOCK_READ) {
> +		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
> +			return -EACCES;
> +		if (vnode->status.lock_count == -1 && !can_sleep)
> +			return -EAGAIN; /* Write locked */
> +	} else {
> +		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
> +			return -EACCES;
> +		if (vnode->status.lock_count != 0 && !can_sleep)
> +			return -EAGAIN; /* Locked */
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Remove the front runner from the pending queue.
> + * - The caller must hold vnode->lock.
> + */
> +static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
> +{
> +	struct file_lock *next;
> +
> +	_enter("");
> +
> +	/* ->lock_type, ->lock_key and ->lock_state only belong to this
> +	 * file_lock if we're at the front of the pending queue or if we have
> +	 * the lock granted or if the lock_state is NEED_UNLOCK or UNLOCKING.
> +	 */
> +	if (vnode->granted_locks.next == &fl->fl_u.afs.link &&
> +	    vnode->granted_locks.prev == &fl->fl_u.afs.link) {
> +		list_del_init(&fl->fl_u.afs.link);
> +		afs_defer_unlock(vnode);
> +		return;
> +	}
> +
> +	if (!list_empty(&vnode->granted_locks) ||
> +	    vnode->pending_locks.next != &fl->fl_u.afs.link) {
> +		list_del_init(&fl->fl_u.afs.link);
> +		return;
> +	}
> +
> +	list_del_init(&fl->fl_u.afs.link);
> +	key_put(vnode->lock_key);
> +	vnode->lock_key = NULL;
> +	vnode->lock_state = AFS_VNODE_LOCK_NONE;
> +
> +	if (list_empty(&vnode->pending_locks))
> +		return;
> +
> +	/* The new front of the queue now owns the state variables. */
> +	next = list_entry(vnode->pending_locks.next,
> +			  struct file_lock, fl_u.afs.link);
> +	vnode->lock_key = afs_file_key(next->fl_file);
> +	vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
> +	vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
>  	afs_lock_may_be_available(vnode);
>  }
>  
> @@ -315,7 +424,7 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
>   */
>  static int afs_do_setlk(struct file *file, struct file_lock *fl)
>  {
> -	struct inode *inode = file_inode(file);
> +	struct inode *inode = locks_inode(file);
>  	struct afs_vnode *vnode = AFS_FS_I(inode);
>  	afs_lock_type_t type;
>  	struct key *key = afs_file_key(file);
> @@ -333,165 +442,136 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
>  
>  	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
>  
> -	spin_lock(&inode->i_lock);
> -
> -	/* make sure we've got a callback on this file and that our view of the
> -	 * data version is up to date */
> -	ret = afs_validate(vnode, key);
> +	ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
>  	if (ret < 0)
> -		goto error;
> -
> -	if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
> -		ret = -EAGAIN;
> -		goto error;
> -	}
> +		return ret;
>  
>  	spin_lock(&vnode->lock);
>  
> -	/* if we've already got a readlock on the server then we can instantly
> +	/* If we've already got a readlock on the server then we instantly
>  	 * grant another readlock, irrespective of whether there are any
> -	 * pending writelocks */
> +	 * pending writelocks.
> +	 */
>  	if (type == AFS_LOCK_READ &&
> -	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
> +	    vnode->lock_state == AFS_VNODE_LOCK_GRANTED &&
> +	    vnode->lock_type == AFS_LOCK_READ) {
>  		_debug("instant readlock");
> -		ASSERTCMP(vnode->flags &
> -			  ((1 << AFS_VNODE_LOCKING) |
> -			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
>  		ASSERT(!list_empty(&vnode->granted_locks));
> -		goto sharing_existing_lock;
> +		goto share_existing_lock;
>  	}
>  
> -	/* if there's no-one else with a lock on this vnode, then we need to
> -	 * ask the server for a lock */
> -	if (list_empty(&vnode->pending_locks) &&
> -	    list_empty(&vnode->granted_locks)) {
> -		_debug("not locked");
> -		ASSERTCMP(vnode->flags &
> -			  ((1 << AFS_VNODE_LOCKING) |
> -			   (1 << AFS_VNODE_READLOCKED) |
> -			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
> -		list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
> -		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
> -		spin_unlock(&vnode->lock);
> +	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
>  
> -		ret = afs_set_lock(vnode, key, type);
> -		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
> -		switch (ret) {
> -		case 0:
> -			_debug("acquired");
> -			goto acquired_server_lock;
> -		case -EWOULDBLOCK:
> -			_debug("would block");
> -			spin_lock(&vnode->lock);
> -			ASSERT(list_empty(&vnode->granted_locks));
> -			ASSERTCMP(vnode->pending_locks.next, ==,
> -				  &fl->fl_u.afs.link);
> -			goto wait;
> -		default:
> -			spin_lock(&vnode->lock);
> -			list_del_init(&fl->fl_u.afs.link);
> -			spin_unlock(&vnode->lock);
> -			goto error;
> -		}
> -	}
> +	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
> +		goto need_to_wait;
>  
> -	/* otherwise, we need to wait for a local lock to become available */
> -	_debug("wait local");
> -	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
> -wait:
> -	if (!(fl->fl_flags & FL_SLEEP)) {
> -		_debug("noblock");
> -		ret = -EAGAIN;
> -		goto abort_attempt;
> -	}
> +	/* We don't have a lock on this vnode and we aren't currently waiting
> +	 * for one either, so ask the server for a lock.
> +	 *
> +	 * Note that we need to be careful if we get interrupted by a signal
> +	 * after dispatching the request as we may still get the lock, even
> +	 * though we don't wait for the reply (it's not too bad a problem - the
> +	 * lock will expire in 10 mins anyway).
> +	 */
> +	_debug("not locked");
> +	vnode->lock_key = key_get(key);
> +	vnode->lock_type = type;
> +	vnode->lock_state = AFS_VNODE_LOCK_SETTING;
>  	spin_unlock(&vnode->lock);
>  
> -	/* now we need to sleep and wait for the lock manager thread to get the
> -	 * lock from the server */
> -	_debug("sleep");
> -	ret = wait_event_interruptible(fl->fl_wait,
> -				       fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
> -	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
> -		ret = fl->fl_u.afs.state;
> -		if (ret < 0)
> -			goto error;
> -		spin_lock(&vnode->lock);
> -		goto given_lock;
> -	}
> -
> -	/* we were interrupted, but someone may still be in the throes of
> -	 * giving us the lock */
> -	_debug("intr");
> -	ASSERTCMP(ret, ==, -ERESTARTSYS);
> +	ret = afs_set_lock(vnode, key, type); /* RPC */
>  
>  	spin_lock(&vnode->lock);
> -	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
> -		ret = fl->fl_u.afs.state;
> -		if (ret < 0) {
> -			spin_unlock(&vnode->lock);
> -			goto error;
> -		}
> -		goto given_lock;
> -	}
> +	switch (ret) {
> +	default:
> +		goto abort_attempt;
>  
> -abort_attempt:
> -	/* we aren't going to get the lock, either because we're unwilling to
> -	 * wait, or because some signal happened */
> -	_debug("abort");
> -	if (list_empty(&vnode->granted_locks) &&
> -	    vnode->pending_locks.next == &fl->fl_u.afs.link) {
> -		if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
> -			/* kick the next pending lock into having a go */
> -			list_del_init(&fl->fl_u.afs.link);
> -			afs_lock_may_be_available(vnode);
> -		}
> -	} else {
> -		list_del_init(&fl->fl_u.afs.link);
> +	case -EWOULDBLOCK:
> +		/* The server doesn't have a lock-waiting queue, so the client
> +		 * will have to retry.  The server will break the outstanding
> +		 * callbacks on a file when a lock is released.
> +		 */
> +		_debug("would block");
> +		ASSERT(list_empty(&vnode->granted_locks));
> +		ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
> +		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
> +		goto need_to_wait;
> +
> +	case 0:
> +		_debug("acquired");
> +		break;
>  	}
> -	spin_unlock(&vnode->lock);
> -	goto error;
>  
> -acquired_server_lock:
>  	/* we've acquired a server lock, but it needs to be renewed after 5
>  	 * mins */
> -	spin_lock(&vnode->lock);
> +	vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
>  	afs_schedule_lock_extension(vnode);
> -	if (type == AFS_LOCK_READ)
> -		set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
> -	else
> -		set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
> -sharing_existing_lock:
> +
> +share_existing_lock:
>  	/* the lock has been granted as far as we're concerned... */
>  	fl->fl_u.afs.state = AFS_LOCK_GRANTED;
>  	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
> +
>  given_lock:
>  	/* ... but we do still need to get the VFS's blessing */
> -	ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
> -	ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
> -				(1 << AFS_VNODE_WRITELOCKED))) != 0);
> +	spin_unlock(&vnode->lock);
> +
>  	ret = posix_lock_file(file, fl, NULL);
>  	if (ret < 0)
>  		goto vfs_rejected_lock;
> -	spin_unlock(&vnode->lock);
>  
> -	/* again, make sure we've got a callback on this file and, again, make
> +	/* Again, make sure we've got a callback on this file and, again, make
>  	 * sure that our view of the data version is up to date (we ignore
> -	 * errors incurred here and deal with the consequences elsewhere) */
> +	 * errors incurred here and deal with the consequences elsewhere).
> +	 */
>  	afs_validate(vnode, key);
> +	_leave(" = 0");
> +	return 0;
>  
> -error:
> -	spin_unlock(&inode->i_lock);
> +need_to_wait:
> +	/* We're going to have to wait.  Either this client doesn't have a lock
> +	 * on the server yet and we need to wait for a callback to occur, or
> +	 * the client does have a lock on the server, but it belongs to some
> +	 * other process(es) and is incompatible with the lock we want.
> +	 */
> +	ret = -EAGAIN;
> +	if (fl->fl_flags & FL_SLEEP) {
> +		spin_unlock(&vnode->lock);
> +
> +		_debug("sleep");
> +		ret = wait_event_interruptible(fl->fl_wait,
> +					       fl->fl_u.afs.state != AFS_LOCK_PENDING);
> +
> +		spin_lock(&vnode->lock);
> +	}
> +
> +	if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
> +		goto given_lock;
> +	if (fl->fl_u.afs.state < 0)
> +		ret = fl->fl_u.afs.state;
> +
> +abort_attempt:
> +	/* we aren't going to get the lock, either because we're unwilling to
> +	 * wait, or because some signal happened */
> +	_debug("abort");
> +	afs_dequeue_lock(vnode, fl);
> +
> +error_unlock:
> +	spin_unlock(&vnode->lock);
>  	_leave(" = %d", ret);
>  	return ret;
>  
>  vfs_rejected_lock:
> -	/* the VFS rejected the lock we just obtained, so we have to discard
> -	 * what we just got */
> +	/* The VFS rejected the lock we just obtained, so we have to discard
> +	 * what we just got.  We defer this to the lock manager work item to
> +	 * deal with.
> +	 */
>  	_debug("vfs refused %d", ret);
> +	spin_lock(&vnode->lock);
>  	list_del_init(&fl->fl_u.afs.link);
>  	if (list_empty(&vnode->granted_locks))
> -		afs_defer_unlock(vnode, key);
> -	goto abort_attempt;
> +		afs_defer_unlock(vnode);
> +	goto error_unlock;
>  }
>  
>  /*
> @@ -499,34 +579,21 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
>   */
>  static int afs_do_unlk(struct file *file, struct file_lock *fl)
>  {
> -	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
> -	struct key *key = afs_file_key(file);
> +	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
>  	int ret;
>  
>  	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
>  
> +	/* Flush all pending writes before doing anything with locks. */
> +	vfs_fsync(file, 0);
> +
>  	/* only whole-file unlocks are supported */
>  	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
>  		return -EINVAL;
>  
> -	fl->fl_ops = &afs_lock_ops;
> -	INIT_LIST_HEAD(&fl->fl_u.afs.link);
> -	fl->fl_u.afs.state = AFS_LOCK_PENDING;
> -
> -	spin_lock(&vnode->lock);
>  	ret = posix_lock_file(file, fl, NULL);
> -	if (ret < 0) {
> -		spin_unlock(&vnode->lock);
> -		_leave(" = %d [vfs]", ret);
> -		return ret;
> -	}
> -
> -	/* discard the server lock only if all granted locks are gone */
> -	if (list_empty(&vnode->granted_locks))
> -		afs_defer_unlock(vnode, key);
> -	spin_unlock(&vnode->lock);
> -	_leave(" = 0");
> -	return 0;
> +	_leave(" = %d [%u]", ret, vnode->lock_state);
> +	return ret;
>  }
>  
>  /*
> @@ -534,7 +601,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
>   */
>  static int afs_do_getlk(struct file *file, struct file_lock *fl)
>  {
> -	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
> +	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
>  	struct key *key = afs_file_key(file);
>  	int ret, lock_count;
>  
> @@ -542,29 +609,25 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
>  
>  	fl->fl_type = F_UNLCK;
>  
> -	inode_lock(&vnode->vfs_inode);
> -
>  	/* check local lock records first */
> -	ret = 0;
>  	posix_test_lock(file, fl);
>  	if (fl->fl_type == F_UNLCK) {
>  		/* no local locks; consult the server */
>  		ret = afs_fetch_status(vnode, key);
>  		if (ret < 0)
>  			goto error;
> -		lock_count = vnode->status.lock_count;
> -		if (lock_count) {
> -			if (lock_count > 0)
> -				fl->fl_type = F_RDLCK;
> -			else
> -				fl->fl_type = F_WRLCK;
> -			fl->fl_start = 0;
> -			fl->fl_end = OFFSET_MAX;
> -		}
> +
> +		lock_count = READ_ONCE(vnode->status.lock_count);
> +		if (lock_count > 0)
> +			fl->fl_type = F_RDLCK;
> +		else
> +			fl->fl_type = F_WRLCK;
> +		fl->fl_start = 0;
> +		fl->fl_end = OFFSET_MAX;
>  	}
>  
> +	ret = 0;
>  error:
> -	inode_unlock(&vnode->vfs_inode);
>  	_leave(" = %d [%hd]", ret, fl->fl_type);
>  	return ret;
>  }
> @@ -574,7 +637,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
>   */
>  int afs_lock(struct file *file, int cmd, struct file_lock *fl)
>  {
> -	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
> +	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
>  
>  	_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
>  	       vnode->fid.vid, vnode->fid.vnode, cmd,
> @@ -597,7 +660,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
>   */
>  int afs_flock(struct file *file, int cmd, struct file_lock *fl)
>  {
> -	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
> +	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
>  
>  	_enter("{%x:%u},%d,{t=%x,fl=%x}",
>  	       vnode->fid.vid, vnode->fid.vnode, cmd,
> @@ -627,9 +690,13 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
>   */
>  static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
>  {
> +	struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
> +
>  	_enter("");
>  
> +	spin_lock(&vnode->lock);
>  	list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
> +	spin_unlock(&vnode->lock);
>  }
>  
>  /*
> @@ -638,7 +705,12 @@ static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
>   */
>  static void afs_fl_release_private(struct file_lock *fl)
>  {
> +	struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
> +
>  	_enter("");
>  
> -	list_del_init(&fl->fl_u.afs.link);
> +	spin_lock(&vnode->lock);
> +	afs_dequeue_lock(vnode, fl);
> +	_debug("state %u for %p", vnode->lock_state, vnode);
> +	spin_unlock(&vnode->lock);
>  }

I don't know about this...unlocking the lock on the server from
fl_release_private is rather unusual.

fl_release_private is called as we're freeing a file_lock of any sort.
That includes both lock requests and lock records. Not all of those
necessarily correspond to a lock on the server.

> diff --git a/fs/afs/internal.h b/fs/afs/internal.h
> index bd8dcee7e066..e03910cebdd4 100644
> --- a/fs/afs/internal.h
> +++ b/fs/afs/internal.h
> @@ -430,6 +430,16 @@ struct afs_volume {
>  	u8			name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
>  };
>  
> +enum afs_lock_state {
> +	AFS_VNODE_LOCK_NONE,		/* The vnode has no lock on the server */
> +	AFS_VNODE_LOCK_WAITING_FOR_CB,	/* We're waiting for the server to break the callback */
> +	AFS_VNODE_LOCK_SETTING,		/* We're asking the server for a lock */
> +	AFS_VNODE_LOCK_GRANTED,		/* We have a lock on the server */
> +	AFS_VNODE_LOCK_EXTENDING,	/* We're extending a lock on the server */
> +	AFS_VNODE_LOCK_NEED_UNLOCK,	/* We need to unlock on the server */
> +	AFS_VNODE_LOCK_UNLOCKING,	/* We're telling the server to unlock */
> +};
> +
>  /*
>   * AFS inode private data
>   */
> @@ -454,18 +464,16 @@ struct afs_vnode {
>  #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
>  #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
>  #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
> -#define AFS_VNODE_LOCKING	6		/* set if waiting for lock on vnode */
> -#define AFS_VNODE_READLOCKED	7		/* set if vnode is read-locked on the server */
> -#define AFS_VNODE_WRITELOCKED	8		/* set if vnode is write-locked on the server */
> -#define AFS_VNODE_UNLOCKING	9		/* set if vnode is being unlocked on the server */
> -#define AFS_VNODE_AUTOCELL	10		/* set if Vnode is an auto mount point */
> -#define AFS_VNODE_PSEUDODIR	11		/* set if Vnode is a pseudo directory */
> +#define AFS_VNODE_AUTOCELL	6		/* set if Vnode is an auto mount point */
> +#define AFS_VNODE_PSEUDODIR	7 		/* set if Vnode is a pseudo directory */
>  
>  	struct list_head	wb_keys;	/* List of keys available for writeback */
>  	struct list_head	pending_locks;	/* locks waiting to be granted */
>  	struct list_head	granted_locks;	/* locks granted on this file */
>  	struct delayed_work	lock_work;	/* work to be done in locking */
> -	struct key		*unlock_key;	/* key to be used in unlocking */
> +	struct key		*lock_key;	/* Key to be used in lock ops */
> +	enum afs_lock_state	lock_state : 8;
> +	afs_lock_type_t		lock_type : 8;
>  
>  	/* outstanding callback notification on this file */
>  	struct afs_cb_interest	*cb_interest;	/* Server on which this resides */
> @@ -843,6 +851,7 @@ extern void afs_clear_permits(struct afs_vnode *);
>  extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
>  extern void afs_zap_permits(struct rcu_head *);
>  extern struct key *afs_request_key(struct afs_cell *);
> +extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
>  extern int afs_permission(struct inode *, int);
>  extern void __exit afs_clean_up_permit_cache(void);
>  
> diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
> index e728ca1776c9..d04511fb3879 100644
> --- a/fs/afs/rotate.c
> +++ b/fs/afs/rotate.c
> @@ -46,8 +46,7 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
>  		return false;
>  	}
>  
> -	if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
> -	    test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
> +	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
>  		fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
>  	return true;
>  }
> @@ -117,7 +116,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
>  	case VSALVAGING:	m = "being salvaged";	break;
>  	default:		m = "busy";		break;
>  	}
> -	
> +
>  	pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
>  }
>  
> @@ -438,24 +437,67 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
>  
>  	_enter("");
>  
> -	if (!cbi) {
> -		fc->ac.error = -ESTALE;
> +	switch (fc->ac.error) {
> +	case SHRT_MAX:
> +		if (!cbi) {
> +			fc->ac.error = -ESTALE;
> +			fc->flags |= AFS_FS_CURSOR_STOP;
> +			return false;
> +		}
> +
> +		fc->cbi = afs_get_cb_interest(vnode->cb_interest);
> +
> +		read_lock(&cbi->server->fs_lock);
> +		alist = rcu_dereference_protected(cbi->server->addresses,
> +						  lockdep_is_held(&cbi->server->fs_lock));
> +		afs_get_addrlist(alist);
> +		read_unlock(&cbi->server->fs_lock);
> +		if (!alist) {
> +			fc->ac.error = -ESTALE;
> +			fc->flags |= AFS_FS_CURSOR_STOP;
> +			return false;
> +		}
> +
> +		fc->ac.alist = alist;
> +		fc->ac.addr  = NULL;
> +		fc->ac.start = READ_ONCE(alist->index);
> +		fc->ac.index = fc->ac.start;
> +		fc->ac.error = 0;
> +		fc->ac.begun = false;
> +		goto iterate_address;
> +
> +	case 0:
> +	default:
> +		/* Success or local failure.  Stop. */
>  		fc->flags |= AFS_FS_CURSOR_STOP;
> +		_leave(" = f [okay/local %d]", fc->ac.error);
>  		return false;
> -	}
>  
> -	read_lock(&cbi->server->fs_lock);
> -	alist = afs_get_addrlist(cbi->server->addresses);
> -	read_unlock(&cbi->server->fs_lock);
> -	if (!alist) {
> -		fc->ac.error = -ESTALE;
> +	case -ECONNABORTED:
>  		fc->flags |= AFS_FS_CURSOR_STOP;
> +		_leave(" = f [abort]");
>  		return false;
> +
> +	case -ENETUNREACH:
> +	case -EHOSTUNREACH:
> +	case -ECONNREFUSED:
> +	case -ETIMEDOUT:
> +	case -ETIME:
> +		_debug("no conn");
> +		goto iterate_address;
>  	}
>  
> -	fc->ac.alist = alist;
> -	fc->ac.error = 0;
> -	return true;
> +iterate_address:
> +	/* Iterate over the current server's address list to try and find an
> +	 * address on which it will respond to us.
> +	 */
> +	if (afs_iterate_addresses(&fc->ac)) {
> +		_leave(" = t");
> +		return true;
> +	}
> +
> +	afs_end_cursor(&fc->ac);
> +	return false;
>  }
>  
>  /*
> diff --git a/fs/afs/security.c b/fs/afs/security.c
> index 46a881a4d08f..2b00097101b3 100644
> --- a/fs/afs/security.c
> +++ b/fs/afs/security.c
> @@ -284,8 +284,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
>   * permitted to be accessed with this authorisation, and if so, what access it
>   * is granted
>   */
> -static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
> -			    afs_access_t *_access)
> +int afs_check_permit(struct afs_vnode *vnode, struct key *key,
> +		     afs_access_t *_access)
>  {
>  	struct afs_permits *permits;
>  	bool valid = false;
> diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
> index 26bad7032bba..0ab3f8457839 100644
> --- a/fs/afs/server_list.c
> +++ b/fs/afs/server_list.c
> @@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
>  {
>  	int i;
>  
> -	if (refcount_dec_and_test(&slist->usage)) {
> +	if (slist && refcount_dec_and_test(&slist->usage)) {
>  		for (i = 0; i < slist->nr_servers; i++) {
>  			afs_put_cb_interest(net, slist->servers[i].cb_interest);
>  			afs_put_server(net, slist->servers[i].server);
> 

-- 
Jeff Layton <jlayton at redhat.com>



More information about the linux-afs mailing list