[PATCH 7/7] um: simplify IRQ handling code
Anton Ivanov
anton.ivanov at kot-begemot.co.uk
Mon Nov 30 11:30:19 EST 2020
On 23/11/2020 19:56, Johannes Berg wrote:
> From: Johannes Berg <johannes.berg at intel.com>
>
> Reduce dynamic allocations (and thereby cache misses) by simply
> embedding the registration data for IRQs in the irq_entry, we
> never supported these being really dynamic anyway as only one
> was ever allowed ("Trying to reregister ...").
>
> Lockless behaviour is preserved by removing the FD from the poll
> set appropriately, but we use reg->events to indicate whether or
> not this entry is used, rather than dynamically allocating them.
>
> Also port the list of IRQ entries to list_head instead of the
> current open-coded singly-linked list implementation, just for
> sanity.
This one is broken. I will look exactly where. It is somewhere in the IRQ delete/re-request logic.
How to test
run iperf -s in the UML with a vector network driver.
run iperf -c to the UML instance from the host in a loop
Try to ifup/ifdown the vecX interface inside the UML.
With master it works fine. With this patch it fails.
A.
> Signed-off-by: Johannes Berg <johannes.berg at intel.com>
> ---
> arch/um/kernel/irq.c | 369 +++++++++++++++----------------------------
> 1 file changed, 127 insertions(+), 242 deletions(-)
>
> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
> index 9e8f776bb43a..bbf5a466b44c 100644
> --- a/arch/um/kernel/irq.c
> +++ b/arch/um/kernel/irq.c
> @@ -29,26 +29,23 @@ extern void free_irqs(void);
> * This is why we keep a small irq_reg array for each fd -
> * one entry per IRQ type
> */
> -
> struct irq_reg {
> void *id;
> - enum um_irq_type type;
> int irq;
> + /* it's cheaper to store this than to query it */
> int events;
> bool active;
> bool pending;
> - bool purge;
> };
>
> struct irq_entry {
> - struct irq_entry *next;
> + struct list_head list;
> int fd;
> - struct irq_reg *irq_array[NUM_IRQ_TYPES];
> + struct irq_reg reg[NUM_IRQ_TYPES];
> };
>
> -static struct irq_entry *active_fds;
> -
> static DEFINE_SPINLOCK(irq_lock);
> +static LIST_HEAD(active_fds);
> static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
>
> static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
> @@ -61,12 +58,13 @@ static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
> */
> if (irq->active) {
> irq->active = false;
> +
> do {
> irq->pending = false;
> do_IRQ(irq->irq, regs);
> - } while (irq->pending && (!irq->purge));
> - if (!irq->purge)
> - irq->active = true;
> + } while (irq->pending);
> +
> + irq->active = true;
> } else {
> irq->pending = true;
> }
> @@ -75,9 +73,7 @@ static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
> void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
> {
> struct irq_entry *irq_entry;
> - struct irq_reg *irq;
> -
> - int n, i, j;
> + int n, i;
>
> while (1) {
> /* This is now lockless - epoll keeps back-referencesto the irqs
> @@ -96,21 +92,18 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
> }
>
> for (i = 0; i < n ; i++) {
> - /* Epoll back reference is the entry with 2 irq_reg
> - * leaves - one for each irq type.
> - */
> - irq_entry = (struct irq_entry *)
> - os_epoll_get_data_pointer(i);
> - for (j = 0; j < NUM_IRQ_TYPES ; j++) {
> - irq = irq_entry->irq_array[j];
> - if (irq == NULL)
> + enum um_irq_type t;
> +
> + irq_entry = os_epoll_get_data_pointer(i);
> +
> + for (t = 0; t < NUM_IRQ_TYPES; t++) {
> + int events = irq_entry->reg[t].events;
> +
> + if (!events)
> continue;
> - if (os_epoll_triggered(i, irq->events) > 0)
> - irq_io_loop(irq, regs);
> - if (irq->purge) {
> - irq_entry->irq_array[j] = NULL;
> - kfree(irq);
> - }
> +
> + if (os_epoll_triggered(i, events) > 0)
> + irq_io_loop(&irq_entry->reg[t], regs);
> }
> }
> }
> @@ -118,32 +111,59 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
> free_irqs();
> }
>
> -static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
> +static struct irq_entry *get_irq_entry_by_fd(int fd)
> {
> - int i;
> - int events = 0;
> - struct irq_reg *irq;
> + struct irq_entry *walk;
>
> - for (i = 0; i < NUM_IRQ_TYPES ; i++) {
> - irq = irq_entry->irq_array[i];
> - if (irq != NULL)
> - events = irq->events | events;
> - }
> - if (events > 0) {
> - /* os_add_epoll will call os_mod_epoll if this already exists */
> - return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
> + lockdep_assert_held(&irq_lock);
> +
> + list_for_each_entry(walk, &active_fds, list) {
> + if (walk->fd == fd)
> + return walk;
> }
> - /* No events - delete */
> - return os_del_epoll_fd(irq_entry->fd);
> +
> + return NULL;
> +}
> +
> +static void free_irq_entry(struct irq_entry *to_free, bool remove)
> +{
> + if (!to_free)
> + return;
> +
> + if (remove)
> + os_del_epoll_fd(to_free->fd);
> + list_del(&to_free->list);
> + kfree(to_free);
> }
>
> +static bool update_irq_entry(struct irq_entry *entry)
> +{
> + enum um_irq_type i;
> + int events = 0;
> +
> + for (i = 0; i < NUM_IRQ_TYPES; i++)
> + events |= entry->reg[i].events;
>
> + if (events) {
> + /* will modify (instead of add) if needed */
> + os_add_epoll_fd(events, entry->fd, entry);
> + return true;
> + }
> +
> + os_del_epoll_fd(entry->fd);
> + return false;
> +}
> +
> +static void update_or_free_irq_entry(struct irq_entry *entry)
> +{
> + if (!update_irq_entry(entry))
> + free_irq_entry(entry, false);
> +}
>
> static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
> {
> - struct irq_reg *new_fd;
> struct irq_entry *irq_entry;
> - int i, err, events;
> + int err, events = os_event_mask(type);
> unsigned long flags;
>
> err = os_set_fd_async(fd);
> @@ -151,73 +171,33 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
> goto out;
>
> spin_lock_irqsave(&irq_lock, flags);
> -
> - /* Check if we have an entry for this fd */
> -
> - err = -EBUSY;
> - for (irq_entry = active_fds;
> - irq_entry != NULL; irq_entry = irq_entry->next) {
> - if (irq_entry->fd == fd)
> - break;
> - }
> -
> - if (irq_entry == NULL) {
> - /* This needs to be atomic as it may be called from an
> - * IRQ context.
> - */
> - irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
> - if (irq_entry == NULL) {
> - printk(KERN_ERR
> - "Failed to allocate new IRQ entry\n");
> + irq_entry = get_irq_entry_by_fd(fd);
> + if (irq_entry) {
> + /* cannot register the same FD twice with the same type */
> + if (WARN_ON(irq_entry->reg[type].events)) {
> + err = -EALREADY;
> goto out_unlock;
> }
> - irq_entry->fd = fd;
> - for (i = 0; i < NUM_IRQ_TYPES; i++)
> - irq_entry->irq_array[i] = NULL;
> - irq_entry->next = active_fds;
> - active_fds = irq_entry;
> - }
> -
> - /* Check if we are trying to re-register an interrupt for a
> - * particular fd
> - */
>
> - if (irq_entry->irq_array[type] != NULL) {
> - printk(KERN_ERR
> - "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
> - irq, fd, type, dev_id
> - );
> - goto out_unlock;
> + /* temporarily disable to avoid IRQ-side locking */
> + os_del_epoll_fd(fd);
> } else {
> - /* New entry for this fd */
> -
> - err = -ENOMEM;
> - new_fd = kmalloc(sizeof(struct irq_reg), GFP_ATOMIC);
> - if (new_fd == NULL)
> + irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
> + if (!irq_entry) {
> + err = -ENOMEM;
> goto out_unlock;
> -
> - events = os_event_mask(type);
> -
> - *new_fd = ((struct irq_reg) {
> - .id = dev_id,
> - .irq = irq,
> - .type = type,
> - .events = events,
> - .active = true,
> - .pending = false,
> - .purge = false
> - });
> - /* Turn off any IO on this fd - allows us to
> - * avoid locking the IRQ loop
> - */
> - os_del_epoll_fd(irq_entry->fd);
> - irq_entry->irq_array[type] = new_fd;
> + }
> + irq_entry->fd = fd;
> + list_add_tail(&irq_entry->list, &active_fds);
> + maybe_sigio_broken(fd);
> }
>
> - /* Turn back IO on with the correct (new) IO event mask */
> - assign_epoll_events_to_irq(irq_entry);
> + irq_entry->reg[type].irq = irq;
> + irq_entry->reg[type].active = true;
> + irq_entry->reg[type].events = events;
> +
> + WARN_ON(!update_irq_entry(irq_entry));
> spin_unlock_irqrestore(&irq_lock, flags);
> - maybe_sigio_broken(fd);
>
> return 0;
> out_unlock:
> @@ -227,104 +207,10 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
> }
>
> /*
> - * Walk the IRQ list and dispose of any unused entries.
> - * Should be done under irq_lock.
> + * Remove the entry or entries for a specific FD, if you
> + * don't want to remove all the possible entries then use
> + * um_free_irq() or deactivate_fd() instead.
> */
> -
> -static void garbage_collect_irq_entries(void)
> -{
> - int i;
> - bool reap;
> - struct irq_entry *walk;
> - struct irq_entry *previous = NULL;
> - struct irq_entry *to_free;
> -
> - if (active_fds == NULL)
> - return;
> - walk = active_fds;
> - while (walk != NULL) {
> - reap = true;
> - for (i = 0; i < NUM_IRQ_TYPES ; i++) {
> - if (walk->irq_array[i] != NULL) {
> - reap = false;
> - break;
> - }
> - }
> - if (reap) {
> - if (previous == NULL)
> - active_fds = walk->next;
> - else
> - previous->next = walk->next;
> - to_free = walk;
> - } else {
> - to_free = NULL;
> - }
> - walk = walk->next;
> - kfree(to_free);
> - }
> -}
> -
> -/*
> - * Walk the IRQ list and get the descriptor for our FD
> - */
> -
> -static struct irq_entry *get_irq_entry_by_fd(int fd)
> -{
> - struct irq_entry *walk = active_fds;
> -
> - while (walk != NULL) {
> - if (walk->fd == fd)
> - return walk;
> - walk = walk->next;
> - }
> - return NULL;
> -}
> -
> -
> -/*
> - * Walk the IRQ list and dispose of an entry for a specific
> - * device and number. Note - if sharing an IRQ for read
> - * and write for the same FD it will be disposed in either case.
> - * If this behaviour is undesirable use different IRQ ids.
> - */
> -
> -#define IGNORE_IRQ 1
> -#define IGNORE_DEV (1<<1)
> -
> -static void do_free_by_irq_and_dev(
> - struct irq_entry *irq_entry,
> - unsigned int irq,
> - void *dev,
> - int flags
> -)
> -{
> - int i;
> - struct irq_reg *to_free;
> -
> - for (i = 0; i < NUM_IRQ_TYPES ; i++) {
> - if (irq_entry->irq_array[i] != NULL) {
> - if (
> - ((flags & IGNORE_IRQ) ||
> - (irq_entry->irq_array[i]->irq == irq)) &&
> - ((flags & IGNORE_DEV) ||
> - (irq_entry->irq_array[i]->id == dev))
> - ) {
> - /* Turn off any IO on this fd - allows us to
> - * avoid locking the IRQ loop
> - */
> - os_del_epoll_fd(irq_entry->fd);
> - to_free = irq_entry->irq_array[i];
> - irq_entry->irq_array[i] = NULL;
> - assign_epoll_events_to_irq(irq_entry);
> - if (to_free->active)
> - to_free->purge = true;
> - else
> - kfree(to_free);
> - }
> - }
> - }
> -}
> -
> void free_irq_by_fd(int fd)
> {
> struct irq_entry *to_free;
> @@ -332,58 +218,64 @@ void free_irq_by_fd(int fd)
>
> spin_lock_irqsave(&irq_lock, flags);
> to_free = get_irq_entry_by_fd(fd);
> - if (to_free != NULL) {
> - do_free_by_irq_and_dev(
> - to_free,
> - -1,
> - NULL,
> - IGNORE_IRQ | IGNORE_DEV
> - );
> - }
> - garbage_collect_irq_entries();
> + free_irq_entry(to_free, true);
> spin_unlock_irqrestore(&irq_lock, flags);
> }
> EXPORT_SYMBOL(free_irq_by_fd);
>
> static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
> {
> - struct irq_entry *to_free;
> + struct irq_entry *entry;
> unsigned long flags;
>
> spin_lock_irqsave(&irq_lock, flags);
> - to_free = active_fds;
> - while (to_free != NULL) {
> - do_free_by_irq_and_dev(
> - to_free,
> - irq,
> - dev,
> - 0
> - );
> - to_free = to_free->next;
> + list_for_each_entry(entry, &active_fds, list) {
> + enum um_irq_type i;
> +
> + for (i = 0; i < NUM_IRQ_TYPES; i++) {
> + struct irq_reg *reg = &entry->reg[i];
> +
> + if (!reg->events)
> + continue;
> + if (reg->irq != irq)
> + continue;
> + if (reg->id != dev)
> + continue;
> +
> + os_del_epoll_fd(entry->fd);
> + reg->events = 0;
> + update_or_free_irq_entry(entry);
> + goto out;
> + }
> }
> - garbage_collect_irq_entries();
> +out:
> spin_unlock_irqrestore(&irq_lock, flags);
> }
>
> -
> void deactivate_fd(int fd, int irqnum)
> {
> - struct irq_entry *to_free;
> + struct irq_entry *entry;
> unsigned long flags;
> + enum um_irq_type i;
>
> os_del_epoll_fd(fd);
> +
> spin_lock_irqsave(&irq_lock, flags);
> - to_free = get_irq_entry_by_fd(fd);
> - if (to_free != NULL) {
> - do_free_by_irq_and_dev(
> - to_free,
> - irqnum,
> - NULL,
> - IGNORE_DEV
> - );
> + entry = get_irq_entry_by_fd(fd);
> + if (!entry)
> + goto out;
> +
> + for (i = 0; i < NUM_IRQ_TYPES; i++) {
> + if (!entry->reg[i].events)
> + continue;
> + if (entry->reg[i].irq == irqnum)
> + entry->reg[i].events = 0;
> }
> - garbage_collect_irq_entries();
> +
> + update_or_free_irq_entry(entry);
> +out:
> spin_unlock_irqrestore(&irq_lock, flags);
> +
> ignore_sigio_fd(fd);
> }
> EXPORT_SYMBOL(deactivate_fd);
> @@ -396,24 +288,17 @@ EXPORT_SYMBOL(deactivate_fd);
> */
> int deactivate_all_fds(void)
> {
> - struct irq_entry *to_free;
> + struct irq_entry *entry;
>
> /* Stop IO. The IRQ loop has no lock so this is our
> * only way of making sure we are safe to dispose
> * of all IRQ handlers
> */
> os_set_ioignore();
> - to_free = active_fds;
> - while (to_free != NULL) {
> - do_free_by_irq_and_dev(
> - to_free,
> - -1,
> - NULL,
> - IGNORE_IRQ | IGNORE_DEV
> - );
> - to_free = to_free->next;
> - }
> - /* don't garbage collect - we can no longer call kfree() here */
> +
> + /* we can no longer call kfree() here so just deactivate */
> + list_for_each_entry(entry, &active_fds, list)
> + os_del_epoll_fd(entry->fd);
> os_close_epoll_fd();
> return 0;
> }
--
Anton R. Ivanov
https://www.kot-begemot.co.uk/
More information about the linux-um
mailing list