[PATCH v4 20/21] usb: dwc2: host: Totally redo the microframe scheduler

Wed Jan 20 21:04:31 PST 2016

This totally reimplements the microframe scheduler in dwc2 to attempt to
handle periodic splits properly.  The old code didn't even try, so this
was a significant effort since periodic splits are one of the most
complicated things in USB.

I've attempted to keep the old "don't use the microframe" schduler
around for now, but not sure it's needed.  It has also only been lightly
tested.

I think it's pretty certain that this scheduler isn't perfect and might
have some bugs, but it seems much better than what was there before.
With this change my stressful USB test (USB webcam + USB audio + some
keyboards) crackles less.

Signed-off-by: Douglas Anderson <dianders at chromium.org>
---
Changes in v4:
- Figured out what the microframe scheduler was supposed to do.
- Microframe rewrite is totally different from v3, hopefully more right.
- Microframe rewrite is later in the series now.

Changes in v3:
- The uframe scheduler patch is folded into optimization series.
- Optimize uframe scheduler "single uframe" case a little.
- uframe scheduler now atop logging patches.
- uframe scheduler now before delayed bandwidth release patches.
- Add defines like EARLY_FRAME_USEC
- Reorder dwc2_deschedule_periodic() in prep for future patches.
- uframe scheduler now shows real usefulness w/ future patches!
- Assuming single_tt is new for v3; not terribly well tested (yet).
- Keep track and use our uframe new for v3.

Changes in v2:
- Totally rewrote uframe scheduler again after writing test code.
- uframe scheduler atop delayed bandwidth release patches.

 drivers/usb/dwc2/core.h      |   86 ++-
 drivers/usb/dwc2/hcd.c       |   87 ++-
 drivers/usb/dwc2/hcd.h       |   79 ++-
 drivers/usb/dwc2/hcd_queue.c | 1208 +++++++++++++++++++++++++++++++++++-------
 4 files changed, 1272 insertions(+), 188 deletions(-)

diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 52cbea28d0e9..d8da912311b7 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -682,7 +682,9 @@ struct dwc2_hregs_backup {
  *                      This value is in microseconds per (micro)frame. The
  *                      assumption is that all periodic transfers may occur in
  *                      the same (micro)frame.
- * @frame_usecs:        Internal variable used by the microframe scheduler
+ * @hs_periodic_bitmap: Bitmap used by the microframe scheduler any time the
+ *                      host is in high speed mode; low speed schedules are
+ *                      stored elsewhere since we need one per TT.
  * @frame_number:       Frame number read from the core at SOF. The value ranges
  *                      from 0 to HFNUM_MAX_FRNUM.
  * @periodic_qh_count:  Count of periodic QHs, if using several eps. Used for
@@ -780,6 +782,85 @@ struct dwc2_hsotg {
 #define DWC2_CORE_REV_3_00a	0x4f54300a
 
 #if IS_ENABLED(CONFIG_USB_DWC2_HOST) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
+
+/*
+ * Constants related to high speed periodic scheduling
+ *
+ * We have a periodic schedule that is DWC2_HS_SCHEDULE_UFRAMES long.  From a
+ * reservation point of view it's assumed that the schedule goes right back to
+ * the beginning after the end of the schedule.
+ *
+ * What does that mean for scheduling things with a long interval?  It means
+ * we'll reserve time for them in every possible microframe that they could
+ * ever be scheduled in.  ...but we'll still only actually schedule them as
+ * often as they were requested.
+ *
+ * We keep our schedule in a "bitmap" structure.  This simplifies having
+ * to keep track of and merge intervals: we just let the bitmap code do most
+ * of the heavy lifting.  In a way scheduling is much like memory allocation.
+ *
+ * We schedule 100us per uframe or 80% of 125us (the maximum amount you're
+ * supposed to schedule for periodic transfers).  That's according to spec.
+ *
+ * Note that though we only schedule 80% of each microframe, the bitmap that we
+ * keep the schedule in is tightly packed (AKA it doesn't have 100us worth of
+ * space for each uFrame).
+ *
+ * Requirements:
+ * - DWC2_HS_SCHEDULE_UFRAMES must even divide 0x4000 (HFNUM_MAX_FRNUM + 1)
+ * - DWC2_HS_SCHEDULE_UFRAMES must be 8 times DWC2_LS_SCHEDULE_FRAMES (probably
+ *   could be any multiple of 8 times DWC2_LS_SCHEDULE_FRAMES, but there might
+ *   be bugs).  The 8 comes from the USB spec: number of microframes per frame.
+ */
+#define DWC2_US_PER_UFRAME		125
+#define DWC2_HS_PERIODIC_US_PER_UFRAME	100
+
+#define DWC2_HS_SCHEDULE_UFRAMES	8
+#define DWC2_HS_SCHEDULE_US		(DWC2_HS_SCHEDULE_UFRAMES * \
+					 DWC2_HS_PERIODIC_US_PER_UFRAME)
+
+/*
+ * Constants related to low speed scheduling
+ *
+ * For high speed we schedule every 1us.  For low speed that's a bit overkill,
+ * so we make up a unit called a "slice" that's worth 25us.  There are 40
+ * slices in a full frame and we can schedule 36 of those (90%) for periodic
+ * transfers.
+ *
+ * Our low speed schedule can be as short as 1 frame or could be longer.  When
+ * we only schedule 1 frame it means that we'll need to reserve a time every
+ * frame even for things that only transfer very rarely, so something that runs
+ * every 2048 frames will get time reserved in every frame.  Our low speed
+ * schedule can be longer and we'll be able to handle more overlap, but that
+ * will come at increased memory cost and increased time to schedule.
+ *
+ * Note: one other advantage of a short low speed schedule is that if we mess
+ * up and miss scheduling we can jump in and use any of the slots that we
+ * happened to reserve.
+ *
+ * With 25 us per slice and 1 frame in the schedule, we only need 4 bytes for
+ * the schedule.  There will be one schedule per TT.
+ *
+ * Requirements:
+ * - DWC2_US_PER_SLICE must evenly divide DWC2_LS_PERIODIC_US_PER_FRAME.
+ */
+#define DWC2_US_PER_SLICE	25
+#define DWC2_SLICES_PER_UFRAME	(DWC2_US_PER_UFRAME / DWC2_US_PER_SLICE)
+
+#define DWC2_ROUND_US_TO_SLICE(us) \
+				(DIV_ROUND_UP((us), DWC2_US_PER_SLICE) * \
+				 DWC2_US_PER_SLICE)
+
+#define DWC2_LS_PERIODIC_US_PER_FRAME \
+				900
+#define DWC2_LS_PERIODIC_SLICES_PER_FRAME \
+				(DWC2_LS_PERIODIC_US_PER_FRAME / \
+				 DWC2_US_PER_SLICE)
+
+#define DWC2_LS_SCHEDULE_FRAMES	1
+#define DWC2_LS_SCHEDULE_SLICES	(DWC2_LS_SCHEDULE_FRAMES * \
+				 DWC2_LS_PERIODIC_SLICES_PER_FRAME)
+
 	union dwc2_hcd_internal_flags {
 		u32 d32;
 		struct {
@@ -803,7 +884,8 @@ struct dwc2_hsotg {
 	struct list_head periodic_sched_queued;
 	struct list_head split_order;
 	u16 periodic_usecs;
-	u16 frame_usecs[8];
+	unsigned long hs_periodic_bitmap[
+		DIV_ROUND_UP(DWC2_HS_SCHEDULE_US, BITS_PER_LONG)];
 	u16 frame_number;
 	u16 periodic_qh_count;
 	bool bus_suspended;
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 69a2a175100a..9d17759b46df 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2247,6 +2247,90 @@ void dwc2_host_hub_info(struct dwc2_hsotg *hsotg, void *context, int *hub_addr,
 	*hub_port = urb->dev->ttport;
 }
 
+/**
+ * dwc2_host_get_tt_info() - Get the dwc2_tt associated with context
+ *
+ * This will get the dwc2_tt structure (and ttport) associated with the given
+ * context (which is really just a struct urb pointer).
+ *
+ * The first time this is called for a given TT we allocate memory for our
+ * structure.  When everyone is done and has called dwc2_host_put_tt_info()
+ * then the refcount for the structure will go to 0 and we'll free it.
+ *
+ * @hsotg:     The HCD state structure for the DWC OTG controller.
+ * @qh:        The QH structure.
+ * @context:   The priv pointer from a struct dwc2_hcd_urb.
+ * @mem_flags: Flags for allocating memory.
+ * @ttport:    We'll return this device's port number here.  That's used to
+ *             reference into the bitmap if we're on a multi_tt hub.
+ *
+ * Return: a pointer to a struct dwc2_tt.  Don't forget to call
+ *         dwc2_host_put_tt_info()!  Returns NULL upon memory alloc failure.
+ */
+
+struct dwc2_tt *dwc2_host_get_tt_info(struct dwc2_hsotg *hsotg, void *context,
+				      gfp_t mem_flags, int *ttport)
+{
+	struct urb *urb = context;
+	struct dwc2_tt *dwc_tt = NULL;
+
+	if (urb->dev->tt) {
+		*ttport = urb->dev->ttport;
+
+		dwc_tt = urb->dev->tt->hcpriv;
+		if (dwc_tt == NULL) {
+			size_t bitmap_size;
+
+			/*
+			 * For single_tt we need one schedule.  For multi_tt
+			 * we need one per port.
+			 */
+			bitmap_size = DWC2_ELEMENTS_PER_LS_BITMAP *
+				      sizeof(dwc_tt->periodic_bitmaps[0]);
+			if (urb->dev->tt->multi)
+				bitmap_size *= urb->dev->tt->hub->maxchild;
+
+			dwc_tt = kzalloc(sizeof(*dwc_tt) + bitmap_size,
+					 mem_flags);
+			if (dwc_tt == NULL)
+				return NULL;
+
+			dwc_tt->usb_tt = urb->dev->tt;
+			dwc_tt->usb_tt->hcpriv = dwc_tt;
+		}
+
+		dwc_tt->refcount++;
+	}
+
+	return dwc_tt;
+}
+
+/**
+ * dwc2_host_put_tt_info() - Put the dwc2_tt from dwc2_host_get_tt_info()
+ *
+ * Frees resources allocated by dwc2_host_get_tt_info() if all current holders
+ * of the structure are done.
+ *
+ * It's OK to call this with NULL.
+ *
+ * @hsotg:     The HCD state structure for the DWC OTG controller.
+ * @dwc_tt:    The pointer returned by dwc2_host_get_tt_info.
+ */
+void dwc2_host_put_tt_info(struct dwc2_hsotg *hsotg, struct dwc2_tt *dwc_tt)
+{
+	/* Model kfree and make put of NULL a no-op */
+	if (dwc_tt == NULL)
+		return;
+
+	WARN_ON(dwc_tt->refcount < 1);
+
+	dwc_tt->refcount--;
+	if (!dwc_tt->refcount) {
+		dwc_tt->usb_tt->hcpriv = NULL;
+		kfree(dwc_tt);
+	}
+}
+
 int dwc2_host_get_speed(struct dwc2_hsotg *hsotg, void *context)
 {
 	struct urb *urb = context;
@@ -3190,9 +3274,6 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg, int irq)
 		hsotg->hc_ptr_array[i] = channel;
 	}
 
-	if (hsotg->core_params->uframe_sched > 0)
-		dwc2_hcd_init_usecs(hsotg);
-
 	/* Initialize hsotg start work */
 	INIT_DELAYED_WORK(&hsotg->start_work, dwc2_hcd_start_func);
 
diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h
index fd266ac53a28..140b1511a131 100644
--- a/drivers/usb/dwc2/hcd.h
+++ b/drivers/usb/dwc2/hcd.h
@@ -212,6 +212,43 @@ enum dwc2_transaction_type {
 	DWC2_TRANSACTION_ALL,
 };
 
+/* The number of elements per LS bitmap (per port on multi_tt) */
+#define DWC2_ELEMENTS_PER_LS_BITMAP	DIV_ROUND_UP(DWC2_LS_SCHEDULE_SLICES, \
+						     BITS_PER_LONG)
+
+/**
+ * struct dwc2_tt - dwc2 data associated with a usb_tt
+ *
+ * @refcount:           Number of Queue Heads (QHs) holding a reference.
+ * @usb_tt:             Pointer back to the official usb_tt.
+ * @periodic_bitmaps:   Bitmap for which parts of the 1ms frame are accounted
+ *                      for already.  Each is DWC2_ELEMENTS_PER_LS_BITMAP
+ *			elements (so sizeof(long) times that in bytes).
+ *
+ * This structure is stored in the hcpriv of the official usb_tt.
+ */
+struct dwc2_tt {
+	int refcount;
+	struct usb_tt *usb_tt;
+	unsigned long periodic_bitmaps[];
+};
+
+/**
+ * struct dwc2_hs_transfer_time - Info about a transfer on the high speed bus.
+ *
+ * @start_schedule_usecs:  The start time on the main bus schedule.  Note that
+ *                         the main bus schedule is tightly packed and this
+ *			   time should be interpreted as tightly packed (so
+ *			   uFrame 0 starts at 0 us, uFrame 1 starts at 100 us
+ *			   instead of 125 us).
+ * @duration_us:           How long this transfer goes.
+ */
+
+struct dwc2_hs_transfer_time {
+	u32 start_schedule_us;
+	u16 duration_us;
+};
+
 /**
  * struct dwc2_qh - Software queue head structure
  *
@@ -237,18 +274,33 @@ enum dwc2_transaction_type {
  * @td_first:           Index of first activated isochronous transfer descriptor
  * @td_last:            Index of last activated isochronous transfer descriptor
  * @host_us:            Bandwidth in microseconds per transfer as seen by host
+ * @device_us:          Bandwidth in microseconds per transfer as seen by device
  * @host_interval:      Interval between transfers as seen by the host.  If
  *                      the host is high speed and the device is low speed this
  *                      will be 8 times device interval.
- * @next_active_frame:  (Micro)frame before we next need to put something on
+ * @device_interval:    Interval between transfers as seen by the device.
+ *                      interval.
+ * @next_active_frame:  (Micro)frame _before_ we next need to put something on
  *                      the bus.  We'll move the qh to active here.  If the
  *                      host is in high speed mode this will be a uframe.  If
  *                      the host is in low speed mode this will be a full frame.
  * @start_active_frame: If we are partway through a split transfer, this will be
  *			what next_active_frame was when we started.  Otherwise
  *			it should always be the same as next_active_frame.
- * @assigned_uframe:    The uframe (0 -7) assigned by dwc2_find_uframe().
- * @frame_usecs:        Internal variable used by the microframe scheduler
+ * @num_hs_transfers:   Number of transfers in hs_transfers.
+ *                      Normally this is 1 but can be more than one for splits.
+ *                      Always >= 1 unless the host is in low/full speed mode.
+ * @hs_transfers:       Transfers that are scheduled as seen by the high speed
+ *                      bus.  Not used if host is in low or full speed mode (but
+ *                      note that it IS USED if the device is low or full speed
+ *                      as long as the HOST is in high speed mode).
+ * @ls_start_schedule_slice: Start time (in slices) on the low speed bus
+ *                           schedule that's being used by this device.  This
+ *			     will be on the periodic_bitmap in a
+ *                           "struct dwc2_tt".  Not used if this device is high
+ *                           speed.  Note that this is in "schedule slice" which
+ *                           is tightly packed.
+ * @ls_duration_us:     Duration on the low speed bus schedule.
  * @ntd:                Actual number of transfer descriptors in a list
  * @qtd_list:           List of QTDs for this QH
  * @channel:            Host channel currently processing transfers for this QH
@@ -261,8 +313,12 @@ enum dwc2_transaction_type {
  *                      descriptor and indicates original XferSize value for the
  *                      descriptor
  * @unreserve_timer:    Timer for releasing periodic reservation.
+ * @dwc2_tt:            Pointer to our tt info (or NULL if no tt).
+ * @ttport:             Port number within our tt.
  * @tt_buffer_dirty     True if clear_tt_buffer_complete is pending
  * @unreserve_pending:  True if we planned to unreserve but haven't yet.
+ * @schedule_low_speed: True if we have a low/full speed component (either the
+ *			host is in low/full speed mode or do_split).
  *
  * A Queue Head (QH) holds the static characteristics of an endpoint and
  * maintains a list of transfers (QTDs) for that endpoint. A QH structure may
@@ -280,11 +336,14 @@ struct dwc2_qh {
 	u8 td_first;
 	u8 td_last;
 	u16 host_us;
+	u16 device_us;
 	u16 host_interval;
+	u16 device_interval;
 	u16 next_active_frame;
 	u16 start_active_frame;
-	u16 assigned_uframe;
-	u16 frame_usecs[8];
+	s16 num_hs_transfers;
+	struct dwc2_hs_transfer_time hs_transfers[DWC2_HS_SCHEDULE_UFRAMES];
+	u32 ls_start_schedule_slice;
 	u16 ntd;
 	struct list_head qtd_list;
 	struct dwc2_host_chan *channel;
@@ -294,8 +353,11 @@ struct dwc2_qh {
 	u32 desc_list_sz;
 	u32 *n_bytes;
 	struct timer_list unreserve_timer;
+	struct dwc2_tt *dwc_tt;
+	int ttport;
 	unsigned tt_buffer_dirty:1;
 	unsigned unreserve_pending:1;
+	unsigned schedule_low_speed:1;
 };
 
 /**
@@ -462,7 +524,6 @@ extern void dwc2_hcd_queue_transactions(struct dwc2_hsotg *hsotg,
 
 /* Schedule Queue Functions */
 /* Implemented in hcd_queue.c */
-extern void dwc2_hcd_init_usecs(struct dwc2_hsotg *hsotg);
 extern struct dwc2_qh *dwc2_hcd_qh_create(struct dwc2_hsotg *hsotg,
 					  struct dwc2_hcd_urb *urb,
 					  gfp_t mem_flags);
@@ -728,6 +789,12 @@ extern void dwc2_host_start(struct dwc2_hsotg *hsotg);
 extern void dwc2_host_disconnect(struct dwc2_hsotg *hsotg);
 extern void dwc2_host_hub_info(struct dwc2_hsotg *hsotg, void *context,
 			       int *hub_addr, int *hub_port);
+extern struct dwc2_tt *dwc2_host_get_tt_info(struct dwc2_hsotg *hsotg,
+					     void *context, gfp_t mem_flags,
+					     int *ttport);
+
+extern void dwc2_host_put_tt_info(struct dwc2_hsotg *hsotg,
+				  struct dwc2_tt *dwc_tt);
 extern int dwc2_host_get_speed(struct dwc2_hsotg *hsotg, void *context);
 extern void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd,
 			       int status);
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index 7f54eeb2bf5f..83c467cd6045 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -136,116 +136,933 @@ static int dwc2_check_periodic_bandwidth(struct dwc2_hsotg *hsotg,
 }
 
 /**
- * Microframe scheduler
- * track the total use in hsotg->frame_usecs
- * keep each qh use in qh->frame_usecs
- * when surrendering the qh then donate the time back
+ * pmap_schedule() - Schedule time in a periodic bitmap (pmap).
+ *
+ * @map:             The bitmap representing the schedule; will be updated
+ *                   upon success.
+ * @bits_per_period: The schedule represents several periods.  This is how many
+ *                   bits are in each period.  It's assumed that the beginning
+ *                   of the schedule will repeat after its end.
+ * @periods_in_map:  The number of periods in the schedule.
+ * @num_bits:        The number of bits we need per period we want to reserve
+ *                   in this function call.
+ * @interval:        How often we need to be scheduled for the reservation this
+ *                   time.  1 means every period.  2 means every other period.
+ *                   ...you get the picture?
+ * @start:           The bit number to start at.  Normally 0.  Must be within
+ *                   the interval or we return failure right away.
+ * @only_one_period: Normally we'll allow picking a start anywhere within the
+ *                   first interval, since we can still make all repetition
+ *                   requirements by doing that.  However, if you pass true
+ *                   here then we'll return failure if we can't fit within
+ *                   the period that "start" is in.
+ *
+ * The idea here is that we want to schedule time for repeating events that all
+ * want the same resource.  The resource is divided into fixed-sized periods
+ * and the events want to repeat every "interval" periods.  The schedule
+ * granularity is one bit.
+ *
+ * To keep things "simple", we'll represent our schedule with a bitmap that
+ * contains a fixed number of periods.  This gets rid of a lot of complexity
+ * but does mean that we need to handle things specially (and non-ideally) if
+ * the number of the periods in the schedule doesn't match well with the
+ * intervals that we're trying to schedule.
+ *
+ * Here's an explanation of the scheme we'll implement, assuming 8 periods.
+ * - If interval is 1, we need to take up space in each of the 8
+ *   periods we're scheduling.  Easy.
+ * - If interval is 2, we need to take up space in half of the
+ *   periods.  Again, easy.
+ * - If interval is 3, we actually need to fall back to interval 1.
+ *   Why?  Because we might need time in any period.  AKA for the
+ *   first 8 periods, we'll be in slot 0, 3, 6.  Then we'll be
+ *   in slot 1, 4, 7.  Then we'll be in 2, 5.  Then we'll be back to
+ *   0, 3, and 6.  Since we could be in any frame we need to reserve
+ *   for all of them.  Sucks, but that's what you gotta do.  Note that
+ *   if we were instead scheduling 8 * 3 = 24 we'd do much better, but
+ *   then we need more memory and time to do scheduling.
+ * - If interval is 4, easy.
+ * - If interval is 5, we again need interval 1.  The schedule will be
+ *   0, 5, 2, 7, 4, 1, 6, 3, 0
+ * - If interval is 6, we need interval 2.  0, 6, 4, 2.
+ * - If interval is 7, we need interval 1.
+ * - If interval is 8, we need interval 8.
+ *
+ * If you do the math, you'll see that we need to pretend that interval is
+ * equal to the greatest_common_divisor(interval, periods_in_map).
+ *
+ * Note that at the moment this function tends to front-pack the schedule.
+ * In some cases that's really non-ideal (it's hard to schedule things that
+ * need to repeat every period).  In other cases it's perfect (you can easily
+ * schedule bigger, less often repeating things).
+ *
+ * Here's the algorithm in action (8 periods, 5 bits per period):
+ *  |**   |     |**   |     |**   |     |**   |     |   OK 2 bits, intv 2 at 0
+ *  |*****|  ***|*****|  ***|*****|  ***|*****|  ***|   OK 3 bits, intv 3 at 2
+ *  |*****|* ***|*****|  ***|*****|* ***|*****|  ***|   OK 1 bits, intv 4 at 5
+ *  |**   |*    |**   |     |**   |*    |**   |     | Remv 3 bits, intv 3 at 2
+ *  |***  |*    |***  |     |***  |*    |***  |     |   OK 1 bits, intv 6 at 2
+ *  |**** |*  * |**** |   * |**** |*  * |**** |   * |   OK 1 bits, intv 1 at 3
+ *  |**** |**** |**** | *** |**** |**** |**** | *** |   OK 2 bits, intv 2 at 6
+ *  |*****|*****|*****| ****|*****|*****|*****| ****|   OK 1 bits, intv 1 at 4
+ *  |*****|*****|*****| ****|*****|*****|*****| ****| FAIL 1 bits, intv 1
+ *  |  ***|*****|  ***| ****|  ***|*****|  ***| ****| Remv 2 bits, intv 2 at 0
+ *  |  ***| ****|  ***| ****|  ***| ****|  ***| ****| Remv 1 bits, intv 4 at 5
+ *  |   **| ****|   **| ****|   **| ****|   **| ****| Remv 1 bits, intv 6 at 2
+ *  |    *| ** *|    *| ** *|    *| ** *|    *| ** *| Remv 1 bits, intv 1 at 3
+ *  |    *|    *|    *|    *|    *|    *|    *|    *| Remv 2 bits, intv 2 at 6
+ *  |     |     |     |     |     |     |     |     | Remv 1 bits, intv 1 at 4
+ *  |**   |     |**   |     |**   |     |**   |     |   OK 2 bits, intv 2 at 0
+ *  |***  |     |**   |     |***  |     |**   |     |   OK 1 bits, intv 4 at 2
+ *  |*****|     |** **|     |*****|     |** **|     |   OK 2 bits, intv 2 at 3
+ *  |*****|*    |** **|     |*****|*    |** **|     |   OK 1 bits, intv 4 at 5
+ *  |*****|***  |** **| **  |*****|***  |** **| **  |   OK 2 bits, intv 2 at 6
+ *  |*****|*****|** **| ****|*****|*****|** **| ****|   OK 2 bits, intv 2 at 8
+ *  |*****|*****|*****| ****|*****|*****|*****| ****|   OK 1 bits, intv 4 at 12
+ *
+ * This function is pretty generic and could be easily abstracted if anything
+ * needed similar scheduling.
+ *
+ * Returns either -ENOSPC or a >= 0 start bit which should be passed to the
+ * unschedule routine.  The map bitmap will be updated on a non-error result.
  */
-static const unsigned short max_uframe_usecs[] = {
-	100, 100, 100, 100, 100, 100, 30, 0
-};
+static int pmap_schedule(unsigned long *map, int bits_per_period,
+			 int periods_in_map, int num_bits,
+			 int interval, int start, bool only_one_period)
+{
+	int interval_bits;
+	int to_reserve;
+	int first_end;
+	int i;
+
+	if (num_bits > bits_per_period)
+		return -ENOSPC;
+
+	/* Adjust interval as per description */
+	interval = gcd(interval, periods_in_map);
+
+	interval_bits = bits_per_period * interval;
+	to_reserve = periods_in_map / interval;
 
-void dwc2_hcd_init_usecs(struct dwc2_hsotg *hsotg)
+	/* If start has gotten us past interval then we can't schedule */
+	if (start >= interval_bits)
+		return -ENOSPC;
+
+	if (only_one_period)
+		/* Must fit within same period as start; end at begin of next */
+		first_end = (start / bits_per_period + 1) * bits_per_period;
+	else
+		/* Can fit anywhere in the first interval */
+		first_end = interval_bits;
+
+	/*
+	 * We'll try to pick the first repetition, then see if that time
+	 * is free for each of the subsequent repetitions.  If it's not
+	 * we'll adjust the start time for the next search of the first
+	 * repetition.
+	 */
+	while (start + num_bits <= first_end) {
+		int end;
+
+		/* Need to stay within this period */
+		end = (start / bits_per_period + 1) * bits_per_period;
+
+		/* Look for num_bits us in this microframe starting at start */
+		start = bitmap_find_next_zero_area(map, end, start, num_bits,
+						   0);
+
+		/*
+		 * We should get start >= end if we fail.  We might be
+		 * able to check the next microframe depending on the
+		 * interval, so continue on (start already updated).
+		 */
+		if (start >= end) {
+			start = end;
+			continue;
+		}
+
+		/* At this point we have a valid point for first one */
+		for (i = 1; i < to_reserve; i++) {
+			int ith_start = start + interval_bits * i;
+			int ith_end = end + interval_bits * i;
+			int ret;
+
+			/* Use this as a dumb "check if bits are 0" */
+			ret = bitmap_find_next_zero_area(
+				map, ith_start + num_bits, ith_start, num_bits,
+				0);
+
+			/* We got the right place, continue checking */
+			if (ret == ith_start)
+				continue;
+
+			/* Move start up for next time and exit for loop */
+			ith_start = bitmap_find_next_zero_area(
+				map, ith_end, ith_start, num_bits, 0);
+			if (ith_start >= ith_end)
+				/* Need a while new period next time */
+				start = end;
+			else
+				start = ith_start - interval_bits * i;
+			break;
+		}
+
+		/* If didn't exit the for loop with a break, we have success */
+		if (i == to_reserve)
+			break;
+	}
+
+	if (start + num_bits > first_end)
+		return -ENOSPC;
+
+	for (i = 0; i < to_reserve; i++) {
+		int ith_start = start + interval_bits * i;
+
+		bitmap_set(map, ith_start, num_bits);
+	}
+
+	return start;
+}
+
+/**
+ * pmap_unschedule() - Undo work done by pmap_schedule()
+ *
+ * @map:             See pmap_schedule().
+ * @bits_per_period: See pmap_schedule().
+ * @periods_in_map:  See pmap_schedule().
+ * @num_bits:        The number of bits that was passed to schedule.
+ * @interval:        The interval that was passed to schedule.
+ * @start:           The return value from pmap_schedule().
+ */
+static void pmap_unschedule(unsigned long *map, int bits_per_period,
+			    int periods_in_map, int num_bits,
+			    int interval, int start)
 {
+	int interval_bits;
+	int to_release;
 	int i;
 
-	for (i = 0; i < 8; i++)
-		hsotg->frame_usecs[i] = max_uframe_usecs[i];
+	/* Adjust interval as per description in pmap_schedule() */
+	interval = gcd(interval, periods_in_map);
+
+	interval_bits = bits_per_period * interval;
+	to_release = periods_in_map / interval;
+
+	for (i = 0; i < to_release; i++) {
+		int ith_start = start + interval_bits * i;
+
+		bitmap_clear(map, ith_start, num_bits);
+	}
 }
 
-static int dwc2_find_single_uframe(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
+/*
+ * cat_printf() - A printf() + strcat() helper
+ *
+ * This is useful for concatenating a bunch of strings where each string is
+ * constructed using printf.
+ *
+ * @buf:   The destination buffer; will be updated to point after the printed
+ *         data.
+ * @size:  The number of bytes in the buffer (includes space for '\0').
+ * @fmt:   The format for printf.
+ * @...:   The args for printf.
+ */
+static void cat_printf(char **buf, size_t *size, const char *fmt, ...)
 {
-	unsigned short utime = qh->host_us;
+	va_list args;
 	int i;
 
-	for (i = 0; i < 8; i++) {
-		/* At the start hsotg->frame_usecs[i] = max_uframe_usecs[i] */
-		if (utime <= hsotg->frame_usecs[i]) {
-			hsotg->frame_usecs[i] -= utime;
-			qh->frame_usecs[i] += utime;
-			return i;
-		}
+	if (*size == 0)
+		return;
+
+	va_start(args, fmt);
+	i = vsnprintf(*buf, *size, fmt, args);
+	va_end(args);
+
+	if (i >= *size) {
+		*buf[*size - 1] = '\0';
+		*buf += *size;
+		*size = 0;
+	} else {
+		*buf += i;
+		*size -= i;
 	}
-	return -ENOSPC;
 }
 
 /*
- * use this for FS apps that can span multiple uframes
+ * pmap_print() - Print the given periodic map
+ *
+ * Will attempt to print out the periodic schedule.
+ *
+ * @map:             See pmap_schedule().
+ * @bits_per_period: See pmap_schedule().
+ * @periods_in_map:  See pmap_schedule().
+ * @period_name:     The name of 1 period, like "uFrame"
+ * @units:           The name of the units, like "us".
+ * @print_fn:        The function to call for printing.
+ * @print_data:      Opaque data to pass to the print function.
+ */
+static void pmap_print(unsigned long *map, int bits_per_period,
+		       int periods_in_map, const char *period_name,
+		       const char *units,
+		       void (*print_fn)(const char *str, void *data),
+		       void *print_data)
+{
+	int period;
+
+	for (period = 0; period < periods_in_map; period++) {
+		char tmp[64];
+		char *buf = tmp;
+		size_t buf_size = sizeof(tmp);
+		int period_start = period * bits_per_period;
+		int period_end = period_start + bits_per_period;
+		int start = 0;
+		int count = 0;
+		bool printed = false;
+		int i;
+
+		for (i = period_start; i < period_end + 1; i++) {
+			/* Handle case when ith bit is set */
+			if (i < period_end &&
+			    bitmap_find_next_zero_area(map, i + 1,
+						       i, 1, 0) != i) {
+				if (count == 0)
+					start = i - period_start;
+				count++;
+				continue;
+			}
+
+			/* ith bit isn't set; don't care if count == 0 */
+			if (count == 0)
+				continue;
+
+			if (!printed)
+				cat_printf(&buf, &buf_size, "%s %d: ",
+					   period_name, period);
+			else
+				cat_printf(&buf, &buf_size, ", ");
+			printed = true;
+
+			cat_printf(&buf, &buf_size, "%d %s -%3d %s", start,
+				   units, start + count - 1, units);
+			count = 0;
+		}
+
+		if (printed)
+			print_fn(tmp, print_data);
+	}
+}
+
+/**
+ * dwc2_get_ls_map() - Get the map used for the given qh
+ *
+ * @hsotg: The HCD state structure for the DWC OTG controller.
+ * @qh:    QH for the periodic transfer.
+ *
+ * We'll always get the periodic map out of our TT.  Note that even if we're
+ * running the host straight in low speed / full speed mode it appears as if
+ * a TT is allocated for us, so we'll use it.  If that ever changes we can
+ * add logic here to get a map out of "hsotg" if !qh->do_split.
+ *
+ * Returns: the map or NULL if a map couldn't be found.
+ */
+static unsigned long *dwc2_get_ls_map(struct dwc2_hsotg *hsotg,
+				      struct dwc2_qh *qh)
+{
+	unsigned long *map;
+
+	/* Don't expect to be missing a TT and be doing low speed scheduling */
+	if (WARN_ON(!qh->dwc_tt))
+		return NULL;
+
+	/* Get the map and adjust if this is a multi_tt hub */
+	map = qh->dwc_tt->periodic_bitmaps;
+	if (qh->dwc_tt->usb_tt->multi)
+		map += DWC2_ELEMENTS_PER_LS_BITMAP * qh->ttport;
+
+	return map;
+}
+
+struct dwc2_qh_print_data {
+	struct dwc2_hsotg *hsotg;
+	struct dwc2_qh *qh;
+};
+
+/**
+ * dwc2_qh_print() - Helper function for dwc2_qh_schedule_print()
+ *
+ * @str:  The string to print
+ * @data: A pointer to a struct dwc2_qh_print_data
  */
-static int dwc2_find_multi_uframe(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
+static void dwc2_qh_print(const char *str, void *data)
 {
-	unsigned short utime = qh->host_us;
-	unsigned short xtime;
-	int t_left;
+	struct dwc2_qh_print_data *print_data = data;
+
+	dwc2_sch_dbg(print_data->hsotg, "QH=%p ...%s\n", print_data->qh, str);
+}
+
+/**
+ * dwc2_qh_schedule_print() - Print the periodic schedule
+ *
+ * @hsotg: The HCD state structure for the DWC OTG controller.
+ * @qh:    QH to print.
+ */
+static void dwc2_qh_schedule_print(struct dwc2_hsotg *hsotg,
+				   struct dwc2_qh *qh)
+{
+	struct dwc2_qh_print_data print_data = { hsotg, qh };
 	int i;
-	int j;
-	int k;
 
-	for (i = 0; i < 8; i++) {
-		if (hsotg->frame_usecs[i] <= 0)
+	/*
+	 * The printing functions are quite slow and inefficient.
+	 * If we don't have tracing turned on, don't run unless the special
+	 * define is turned on.
+	 */
+#ifndef DWC2_PRINT_SCHEDULE
+	return;
+#endif
+
+	if (qh->schedule_low_speed) {
+		unsigned long *map = dwc2_get_ls_map(hsotg, qh);
+
+		dwc2_sch_dbg(hsotg, "QH=%p LS/FS trans: %d=>%d us @ %d us",
+			     qh, qh->device_us,
+			     DWC2_ROUND_US_TO_SLICE(qh->device_us),
+			     DWC2_US_PER_SLICE * qh->ls_start_schedule_slice);
+
+		if (map) {
+			dwc2_sch_dbg(hsotg,
+				     "QH=%p Whole low/full speed map %p now:\n",
+				     qh, map);
+			pmap_print(map, DWC2_LS_PERIODIC_SLICES_PER_FRAME,
+				   DWC2_LS_SCHEDULE_FRAMES, "Frame ", "slices",
+				   dwc2_qh_print, &print_data);
+		}
+	}
+
+	for (i = 0; i < qh->num_hs_transfers; i++) {
+		struct dwc2_hs_transfer_time *trans_time = qh->hs_transfers + i;
+		int uframe = trans_time->start_schedule_us /
+			     DWC2_HS_PERIODIC_US_PER_UFRAME;
+		int rel_us = trans_time->start_schedule_us %
+			     DWC2_HS_PERIODIC_US_PER_UFRAME;
+
+		dwc2_sch_dbg(hsotg,
+			     "QH=%p HS trans #%d: %d us @ uFrame %d + %d us\n",
+			     qh, i, trans_time->duration_us, uframe, rel_us);
+	}
+	if (qh->num_hs_transfers) {
+		dwc2_sch_dbg(hsotg, "QH=%p Whole high speed map now:\n", qh);
+		pmap_print(hsotg->hs_periodic_bitmap,
+			   DWC2_HS_PERIODIC_US_PER_UFRAME,
+			   DWC2_HS_SCHEDULE_UFRAMES, "uFrame", "us",
+			   dwc2_qh_print, &print_data);
+	}
+
+}
+
+/**
+ * dwc2_ls_pmap_schedule() - Schedule a low speed QH
+ *
+ * @hsotg:        The HCD state structure for the DWC OTG controller.
+ * @qh:           QH for the periodic transfer.
+ * @search_slice: We'll start trying to schedule at the passed slice.
+ *                Remember that slices are the units of the low speed
+ *                schedule (think 25us or so).
+ *
+ * Wraps pmap_schedule() with the right parameters for low speed scheduling.
+ *
+ * Normally we schedule low speed devices on the map associated with the TT.
+ *
+ * Returns: 0 for success or an error code.
+ */
+static int dwc2_ls_pmap_schedule(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
+				 int search_slice)
+{
+	int slices = DIV_ROUND_UP(qh->device_us, DWC2_US_PER_SLICE);
+	unsigned long *map = dwc2_get_ls_map(hsotg, qh);
+	int slice;
+
+	if (map == NULL)
+		return -EINVAL;
+
+	/*
+	 * Schedule on the proper low speed map with our low speed scheduling
+	 * parameters.  Note that we use the "device_interval" here since
+	 * we want the low speed interval and the only way we'd be in this
+	 * function is if the device is low speed.
+	 *
+	 * If we happen to be doing low speed and high speed scheduling for the
+	 * same transaction (AKA we have a split) we always do low speed first.
+	 * That means we can always pass "false" for only_one_period (that
+	 * parameters is only useful when we're trying to get one schedule to
+	 * match what we already planned in the other schedule).
+	 */
+	slice = pmap_schedule(map, DWC2_LS_PERIODIC_SLICES_PER_FRAME,
+			      DWC2_LS_SCHEDULE_FRAMES, slices,
+			      qh->device_interval, search_slice, false);
+
+	if (slice < 0)
+		return slice;
+
+	qh->ls_start_schedule_slice = slice;
+	return 0;
+}
+
+/**
+ * dwc2_ls_pmap_unschedule() - Undo work done by dwc2_ls_pmap_schedule()
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static void dwc2_ls_pmap_unschedule(struct dwc2_hsotg *hsotg,
+				    struct dwc2_qh *qh)
+{
+	int slices = DIV_ROUND_UP(qh->device_us, DWC2_US_PER_SLICE);
+	unsigned long *map = dwc2_get_ls_map(hsotg, qh);
+
+	/* Schedule should have failed, so no worries about no error code */
+	if (map == NULL)
+		return;
+
+	pmap_unschedule(map, DWC2_LS_PERIODIC_SLICES_PER_FRAME,
+			DWC2_LS_SCHEDULE_FRAMES, slices, qh->device_interval,
+			qh->ls_start_schedule_slice);
+}
+
+/**
+ * dwc2_hs_pmap_schedule - Schedule in the main high speed schedule
+ *
+ * This will schedule something on the main dwc2 schedule.
+ *
+ * We'll start looking in qh->hs_transfers[index].start_schedule_us.  We'll
+ * update this with the result upon success.  We also use the duration from
+ * the same structure.
+ *
+ * @hsotg:           The HCD state structure for the DWC OTG controller.
+ * @qh:              QH for the periodic transfer.
+ * @only_one_period: If true we will limit ourselves to just looking at
+ *                   one period (aka one 100us chunk).  This is used if we have
+ *                   already scheduled something on the low speed schedule and
+ *                   need to find something that matches on the high speed one.
+ * @index:           The index into qh->hs_transfers that we're working with.
+ *
+ * Returns: 0 for success or an error code.  Upon success the
+ *          dwc2_hs_transfer_time specified by "index" will be updated.
+ */
+static int dwc2_hs_pmap_schedule(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
+				 bool only_one_period, int index)
+{
+	struct dwc2_hs_transfer_time *trans_time = qh->hs_transfers + index;
+	int us;
+
+	us = pmap_schedule(hsotg->hs_periodic_bitmap,
+			   DWC2_HS_PERIODIC_US_PER_UFRAME,
+			   DWC2_HS_SCHEDULE_UFRAMES, trans_time->duration_us,
+			   qh->host_interval, trans_time->start_schedule_us,
+			   only_one_period);
+
+	if (us < 0)
+		return us;
+
+	trans_time->start_schedule_us = us;
+	return 0;
+}
+
+/**
+ * dwc2_ls_pmap_unschedule() - Undo work done by dwc2_hs_pmap_schedule()
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static void dwc2_hs_pmap_unschedule(struct dwc2_hsotg *hsotg,
+				    struct dwc2_qh *qh, int index)
+{
+	struct dwc2_hs_transfer_time *trans_time = qh->hs_transfers + index;
+
+	pmap_unschedule(hsotg->hs_periodic_bitmap,
+			DWC2_HS_PERIODIC_US_PER_UFRAME,
+			DWC2_HS_SCHEDULE_UFRAMES, trans_time->duration_us,
+			qh->host_interval, trans_time->start_schedule_us);
+}
+
+/**
+ * dwc2_uframe_schedule_split - Schedule a QH for a periodic split xfer.
+ *
+ * This is the most complicated thing in USB.  We have to find matching time
+ * in both the global high speed schedule for the port and the low speed
+ * schedule for the TT associated with the given device.
+ *
+ * Being here means that the host must be running in high speed mode and the
+ * device is in low or full speed mode (and behind a hub).
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static int dwc2_uframe_schedule_split(struct dwc2_hsotg *hsotg,
+				      struct dwc2_qh *qh)
+{
+	int bytecount = dwc2_hb_mult(qh->maxp) * dwc2_max_packet(qh->maxp);
+	int ls_search_slice;
+	int err = 0;
+	int host_interval_in_sched;
+
+	/*
+	 * The interval (how often to repeat) in the actual host schedule.
+	 * See pmap_schedule() for gcd() explanation.
+	 */
+	host_interval_in_sched = gcd(qh->host_interval,
+				     DWC2_HS_SCHEDULE_UFRAMES);
+
+	/*
+	 * We always try to find space in the low speed schedule first, then
+	 * try to find high speed time that matches.  If we don't, we'll bump
+	 * up the place we start searching in the low speed schedule and try
+	 * again.  To start we'll look right at the beginning of the low speed
+	 * schedule.
+	 *
+	 * Note that this will tend to front-load the high speed schedule.
+	 * We may eventually want to try to avoid this by either considering
+	 * both schedules together or doing some sort of round robin.
+	 */
+	ls_search_slice = 0;
+
+	while (ls_search_slice < DWC2_LS_SCHEDULE_SLICES) {
+		int start_s_uframe;
+		int ssplit_s_uframe;
+		int second_s_uframe;
+		int rel_uframe;
+		int first_count;
+		int middle_count;
+		int end_count;
+		int first_data_bytes;
+		int other_data_bytes;
+		int i;
+
+		if (qh->schedule_low_speed) {
+			err = dwc2_ls_pmap_schedule(hsotg, qh, ls_search_slice);
+
+			/*
+			 * If we got an error here there's no other magic we
+			 * can do, so bail.  All the looping above is only
+			 * helpful to redo things if we got a low speed slot
+			 * and then couldn't find a matching high speed slot.
+			 */
+			if (err)
+				return err;
+		} else {
+			/* Must be missing the tt structure?  Why? */
+			WARN_ON_ONCE(1);
+		}
+
+		/*
+		 * This will give us a number 0 - 7 if
+		 * DWC2_LS_SCHEDULE_FRAMES == 1, or 0 - 15 if == 2, or ...
+		 */
+		start_s_uframe = qh->ls_start_schedule_slice /
+				 DWC2_SLICES_PER_UFRAME;
+
+		/* Get a number that's always 0 - 7 */
+		rel_uframe = (start_s_uframe % 8);
+
+		/*
+		 * If we were going to start in uframe 7 then we would need to
+		 * issue a start split in uframe 6, which spec says is not OK.
+		 * Move on to the next full frame (assuming there is one).
+		 *
+		 * See 11.18.4 Host Split Transaction Scheduling Requirements
+		 * bullet 1.
+		 */
+		if (rel_uframe == 7) {
+			if (qh->schedule_low_speed)
+				dwc2_ls_pmap_unschedule(hsotg, qh);
+			ls_search_slice =
+				(qh->ls_start_schedule_slice /
+				 DWC2_LS_PERIODIC_SLICES_PER_FRAME + 1) *
+				DWC2_LS_PERIODIC_SLICES_PER_FRAME;
 			continue;
+		}
 
 		/*
-		 * we need n consecutive slots so use j as a start slot
-		 * j plus j+1 must be enough time (for now)
+		 * For ISOC in:
+		 * - start split            (frame -1)
+		 * - complete split w/ data (frame +1)
+		 * - complete split w/ data (frame +2)
+		 * - ...
+		 * - complete split w/ data (frame +num_data_packets)
+		 * - complete split w/ data (frame +num_data_packets+1)
+		 * - complete split w/ data (frame +num_data_packets+2, max 8)
+		 *   ...though if frame was "0" then max is 7...
+		 *
+		 * For ISOC out we might need to do:
+		 * - start split w/ data    (frame -1)
+		 * - start split w/ data    (frame +0)
+		 * - ...
+		 * - start split w/ data    (frame +num_data_packets-2)
+		 *
+		 * For INTERRUPT in we might need to do:
+		 * - start split            (frame -1)
+		 * - complete split w/ data (frame +1)
+		 * - complete split w/ data (frame +2)
+		 * - complete split w/ data (frame +3, max 8)
+		 *
+		 * For INTERRUPT out we might need to do:
+		 * - start split w/ data    (frame -1)
+		 * - complete split         (frame +1)
+		 * - complete split         (frame +2)
+		 * - complete split         (frame +3, max 8)
+		 *
+		 * Start adjusting!
 		 */
-		xtime = hsotg->frame_usecs[i];
-		for (j = i + 1; j < 8; j++) {
-			/*
-			 * if we add this frame remaining time to xtime we may
-			 * be OK, if not we need to test j for a complete frame
-			 */
-			if (xtime + hsotg->frame_usecs[j] < utime) {
-				if (hsotg->frame_usecs[j] <
-							max_uframe_usecs[j])
-					continue;
+		ssplit_s_uframe = (start_s_uframe +
+				   host_interval_in_sched - 1) %
+				  host_interval_in_sched;
+		if (qh->ep_type == USB_ENDPOINT_XFER_ISOC && !qh->ep_is_in)
+			second_s_uframe = start_s_uframe;
+		else
+			second_s_uframe = start_s_uframe + 1;
+
+		/* First data transfer might not be all 188 bytes. */
+		first_data_bytes = 188 -
+			DIV_ROUND_UP(188 * (qh->ls_start_schedule_slice %
+					    DWC2_SLICES_PER_UFRAME),
+				     DWC2_SLICES_PER_UFRAME);
+		if (first_data_bytes > bytecount)
+			first_data_bytes = bytecount;
+		other_data_bytes = bytecount - first_data_bytes;
+
+		/*
+		 * For now, skip OUT xfers where first xfer is partial
+		 *
+		 * Main dwc2 code assumes:
+		 * - INT transfers never get split in two.
+		 * - ISOC transfers can always transfer 188 bytes the first
+		 *   time.
+		 *
+		 * Until that code is fixed, try again if the first transfer
+		 * couldn't transfer everything.
+		 *
+		 * This code can be removed if/when the rest of dwc2 handles
+		 * the above cases.  Until it's fixed we just won't be able
+		 * to schedule quite as tightly.
+		 */
+		if (!qh->ep_is_in &&
+		    (first_data_bytes != min_t(int, 188, bytecount))) {
+			dwc2_sch_dbg(hsotg,
+				     "QH=%p avoiding broken 1st xfer (%d, %d)\n",
+				     qh, first_data_bytes, bytecount);
+			if (qh->schedule_low_speed)
+				dwc2_ls_pmap_unschedule(hsotg, qh);
+			ls_search_slice = (start_s_uframe + 1) *
+				DWC2_SLICES_PER_UFRAME;
+			continue;
+		}
+
+		/* Start by assuming transfers for the bytes */
+		qh->num_hs_transfers = 1 + DIV_ROUND_UP(other_data_bytes, 188);
+
+		/*
+		 * Everything except ISOC OUT has extra transfers.  Rules are
+		 * complicated.  See 11.18.4 Host Split Transaction Scheduling
+		 * Requirements bullet 3.
+		 */
+		if (qh->ep_type == USB_ENDPOINT_XFER_INT) {
+			if (rel_uframe == 6)
+				qh->num_hs_transfers += 2;
+			else
+				qh->num_hs_transfers += 3;
+
+			if (qh->ep_is_in) {
+				/*
+				 * First is start split, middle/end is data.
+				 * Allocate full data bytes for all data.
+				 */
+				first_count = 4;
+				middle_count = bytecount;
+				end_count = bytecount;
+			} else {
+				/*
+				 * First is data, middle/end is complete.
+				 * First transfer and second can have data.
+				 * Rest should just have complete split.
+				 */
+				first_count = first_data_bytes;
+				middle_count = max_t(int, 4, other_data_bytes);
+				end_count = 4;
 			}
-			if (xtime >= utime) {
-				t_left = utime;
-				for (k = i; k < 8; k++) {
-					t_left -= hsotg->frame_usecs[k];
-					if (t_left <= 0) {
-						qh->frame_usecs[k] +=
-							hsotg->frame_usecs[k]
-								+ t_left;
-						hsotg->frame_usecs[k] = -t_left;
-						return i;
-					} else {
-						qh->frame_usecs[k] +=
-							hsotg->frame_usecs[k];
-						hsotg->frame_usecs[k] = 0;
-					}
-				}
+		} else {
+			if (qh->ep_is_in) {
+				int last;
+
+				/* Account for the start split */
+				qh->num_hs_transfers++;
+
+				/* Calculate "L" value from spec */
+				last = rel_uframe + qh->num_hs_transfers + 1;
+
+				/* Start with basic case */
+				if (last <= 6)
+					qh->num_hs_transfers += 2;
+				else
+					qh->num_hs_transfers += 1;
+
+				/* Adjust downwards */
+				if (last >= 6 && rel_uframe == 0)
+					qh->num_hs_transfers--;
+
+				/* 1st = start; rest can contain data */
+				first_count = 4;
+				middle_count = min_t(int, 188, bytecount);
+				end_count = middle_count;
+			} else {
+				/* All contain data, last might be smaller */
+				first_count = first_data_bytes;
+				middle_count = min_t(int, 188,
+						     other_data_bytes);
+				end_count = other_data_bytes % 188;
 			}
-			/* add the frame time to x time */
-			xtime += hsotg->frame_usecs[j];
-			/* we must have a fully available next frame or break */
-			if (xtime < utime &&
-			   hsotg->frame_usecs[j] == max_uframe_usecs[j])
-				continue;
 		}
+
+		/* Assign durations per uFrame */
+		qh->hs_transfers[0].duration_us = HS_USECS_ISO(first_count);
+		for (i = 1; i < qh->num_hs_transfers - 1; i++)
+			qh->hs_transfers[i].duration_us =
+				HS_USECS_ISO(middle_count);
+		if (qh->num_hs_transfers > 1)
+			qh->hs_transfers[qh->num_hs_transfers - 1].duration_us =
+				HS_USECS_ISO(end_count);
+
+		/*
+		 * Assign start us.  The call below to dwc2_hs_pmap_schedule()
+		 * will start with these numbers but may adjust within the same
+		 * microframe.
+		 */
+		qh->hs_transfers[0].start_schedule_us =
+			ssplit_s_uframe * DWC2_HS_PERIODIC_US_PER_UFRAME;
+		for (i = 1; i < qh->num_hs_transfers; i++)
+			qh->hs_transfers[i].start_schedule_us =
+				((second_s_uframe + i - 1) %
+				 DWC2_HS_SCHEDULE_UFRAMES) *
+				DWC2_HS_PERIODIC_US_PER_UFRAME;
+
+		/* Try to schedule with filled in hs_transfers above */
+		for (i = 0; i < qh->num_hs_transfers; i++) {
+			err = dwc2_hs_pmap_schedule(hsotg, qh, true, i);
+			if (err)
+				break;
+		}
+
+		/* If we scheduled all w/out breaking out then we're all good */
+		if (i == qh->num_hs_transfers)
+			break;
+
+		for (; i >= 0; i--)
+			dwc2_hs_pmap_unschedule(hsotg, qh, i);
+
+		if (qh->schedule_low_speed)
+			dwc2_ls_pmap_unschedule(hsotg, qh);
+
+		/* Try again starting in the next microframe */
+		ls_search_slice = (start_s_uframe + 1) * DWC2_SLICES_PER_UFRAME;
 	}
-	return -ENOSPC;
+
+	if (ls_search_slice >= DWC2_LS_SCHEDULE_SLICES)
+		return -ENOSPC;
+
+	return 0;
 }
 
-static int dwc2_find_uframe(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
+/**
+ * dwc2_uframe_schedule_hs - Schedule a QH for a periodic high speed xfer.
+ *
+ * Basically this just wraps dwc2_hs_pmap_schedule() to provide a clean
+ * interface.
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static int dwc2_uframe_schedule_hs(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
+{
+	/* In non-split host and device time are the same */
+	WARN_ON(qh->host_us != qh->device_us);
+	WARN_ON(qh->host_interval != qh->device_interval);
+	WARN_ON(qh->num_hs_transfers != 1);
+
+	/* We'll have one transfer; init start to 0 before calling scheduler */
+	qh->hs_transfers[0].start_schedule_us = 0;
+	qh->hs_transfers[0].duration_us = qh->host_us;
+
+	return dwc2_hs_pmap_schedule(hsotg, qh, false, 0);
+}
+
+/**
+ * dwc2_uframe_schedule_ls - Schedule a QH for a periodic low/full speed xfer.
+ *
+ * Basically this just wraps dwc2_ls_pmap_schedule() to provide a clean
+ * interface.
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static int dwc2_uframe_schedule_ls(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
+{
+	/* In non-split host and device time are the same */
+	WARN_ON(qh->host_us != qh->device_us);
+	WARN_ON(qh->host_interval != qh->device_interval);
+	WARN_ON(!qh->schedule_low_speed);
+
+	/* Run on the main low speed schedule (no split = no hub = no TT) */
+	return dwc2_ls_pmap_schedule(hsotg, qh, 0);
+}
+
+/**
+ * dwc2_uframe_schedule - Schedule a QH for a periodic xfer.
+ *
+ * Calls one of the 3 sub-function depending on what type of transfer this QH
+ * is for.  Also adds some printing.
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static int dwc2_uframe_schedule(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 {
 	int ret;
 
-	if (qh->dev_speed == USB_SPEED_HIGH) {
-		/* if this is a hs transaction we need a full frame */
-		ret = dwc2_find_single_uframe(hsotg, qh);
-	} else {
-		/*
-		 * if this is a fs transaction we may need a sequence
-		 * of frames
-		 */
-		ret = dwc2_find_multi_uframe(hsotg, qh);
-	}
+	if (qh->dev_speed == USB_SPEED_HIGH)
+		ret = dwc2_uframe_schedule_hs(hsotg, qh);
+	else if (!qh->do_split)
+		ret = dwc2_uframe_schedule_ls(hsotg, qh);
+	else
+		ret = dwc2_uframe_schedule_split(hsotg, qh);
+
+	if (ret)
+		dwc2_sch_dbg(hsotg, "QH=%p Failed to schedule %d\n", qh, ret);
+	else
+		dwc2_qh_schedule_print(hsotg, qh);
+
 	return ret;
 }
 
 /**
+ * dwc2_uframe_unschedule - Undoes dwc2_uframe_schedule().
+ *
+ * @hsotg:       The HCD state structure for the DWC OTG controller.
+ * @qh:          QH for the periodic transfer.
+ */
+static void dwc2_uframe_unschedule(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
+{
+	int i;
+
+	for (i = 0; i < qh->num_hs_transfers; i++)
+		dwc2_hs_pmap_unschedule(hsotg, qh, i);
+
+	if (qh->schedule_low_speed)
+		dwc2_ls_pmap_unschedule(hsotg, qh);
+
+	dwc2_sch_dbg(hsotg, "QH=%p Unscheduled\n", qh);
+}
+
+/**
  * dwc2_pick_first_frame() - Choose 1st frame for qh that's already scheduled
  *
  * Takes a qh that has already been scheduled (which means we know we have the
@@ -265,6 +1082,7 @@ static void dwc2_pick_first_frame(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 	u16 frame_number;
 	u16 earliest_frame;
 	u16 next_active_frame;
+	u16 relative_frame;
 	u16 interval;
 
 	/*
@@ -292,8 +1110,36 @@ static void dwc2_pick_first_frame(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 		goto exit;
 	}
 
-	/* Adjust interval as per high speed schedule which has 8 uFrame */
-	interval = gcd(qh->host_interval, 8);
+	if (qh->dev_speed == USB_SPEED_HIGH || qh->do_split) {
+		/*
+		 * We're either at high speed or we're doing a split (which
+		 * means we're talking high speed to a hub).  In any case
+		 * the first frame should be based on when the first scheduled
+		 * event is.
+		 */
+		WARN_ON(qh->num_hs_transfers < 1);
+
+		relative_frame = qh->hs_transfers[0].start_schedule_us /
+				 DWC2_HS_PERIODIC_US_PER_UFRAME;
+
+		/* Adjust interval as per high speed schedule */
+		interval = gcd(qh->host_interval, DWC2_HS_SCHEDULE_UFRAMES);
+
+	} else {
+		/*
+		 * Low or full speed directly on dwc2.  Just about the same
+		 * as high speed but on a different schedule and with slightly
+		 * different adjustments.  Note that this works because when
+		 * the host and device are both low speed then frames in the
+		 * controller tick at low speed.
+		 */
+		relative_frame = qh->ls_start_schedule_slice /
+				 DWC2_LS_PERIODIC_SLICES_PER_FRAME;
+		interval = gcd(qh->host_interval, DWC2_LS_SCHEDULE_FRAMES);
+	}
+
+	/* Scheduler messed up if frame is past interval */
+	WARN_ON(relative_frame >= interval);
 
 	/*
 	 * We know interval must divide (HFNUM_MAX_FRNUM + 1) now that we've
@@ -310,7 +1156,7 @@ static void dwc2_pick_first_frame(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 	 * scheduled for.
 	 */
 	next_active_frame = dwc2_frame_num_inc(next_active_frame,
-					       qh->assigned_uframe);
+					       relative_frame);
 
 	/*
 	 * We actually need 1 frame before since the next_active_frame is
@@ -350,21 +1196,23 @@ static int dwc2_do_reserve(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 {
 	int status;
 
-	if (hsotg->core_params->uframe_sched > 0) {
-		status = dwc2_find_uframe(hsotg, qh);
-		if (status >= 0)
-			qh->assigned_uframe = status;
-	} else {
-		status = dwc2_periodic_channel_available(hsotg);
-		if (status) {
-			dev_info(hsotg->dev,
-				 "%s: No host channel available for periodic transfer\n",
-				 __func__);
-			return status;
-		}
+	/*
+	 * NOTE: need to figure out if we're using the microframe scheduler
+	 * whether can we actually use more channels as long as they're not
+	 * all active in the same microframe.  For now, don't allow.
+	 */
+	status = dwc2_periodic_channel_available(hsotg);
+	if (status) {
+		dev_info(hsotg->dev,
+			"%s: No host channel available for periodic transfer\n",
+			__func__);
+		return status;
+	}
 
+	if (hsotg->core_params->uframe_sched <= 0)
 		status = dwc2_check_periodic_bandwidth(hsotg, qh);
-	}
+	else
+		status = dwc2_uframe_schedule(hsotg, qh);
 
 	if (status) {
 		dev_dbg(hsotg->dev,
@@ -373,9 +1221,8 @@ static int dwc2_do_reserve(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 		return status;
 	}
 
-	if (hsotg->core_params->uframe_sched <= 0)
-		/* Reserve periodic channel */
-		hsotg->periodic_channels++;
+	/* Reserve periodic channel */
+	hsotg->periodic_channels++;
 
 	/* Update claimed usecs per (micro)frame */
 	hsotg->periodic_usecs += qh->host_us;
@@ -409,17 +1256,13 @@ static void dwc2_do_unreserve(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 	/* Update claimed usecs per (micro)frame */
 	hsotg->periodic_usecs -= qh->host_us;
 
-	if (hsotg->core_params->uframe_sched > 0) {
-		int i;
+	/* Release periodic channel reservation */
+	hsotg->periodic_channels--;
 
-		for (i = 0; i < 8; i++) {
-			hsotg->frame_usecs[i] += qh->frame_usecs[i];
-			qh->frame_usecs[i] = 0;
-		}
-	} else {
-		/* Release periodic channel reservation */
-		hsotg->periodic_channels--;
-	}
+	if (hsotg->core_params->uframe_sched <= 0)
+		return;
+
+	dwc2_uframe_unschedule(hsotg, qh);
 }
 
 /**
@@ -604,88 +1447,81 @@ static void dwc2_deschedule_periodic(struct dwc2_hsotg *hsotg,
  * @qh:    The QH to init
  * @urb:   Holds the information about the device/endpoint needed to initialize
  *         the QH
+ * @mem_flags: Flags for allocating memory.
  */
 static void dwc2_qh_init(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
-			 struct dwc2_hcd_urb *urb)
+			 struct dwc2_hcd_urb *urb, gfp_t mem_flags)
 {
-	int dev_speed, hub_addr, hub_port;
+	int dev_speed = dwc2_host_get_speed(hsotg, urb->priv);
+	u8 ep_type = dwc2_hcd_get_pipe_type(&urb->pipe_info);
+	bool ep_is_in = !!dwc2_hcd_is_pipe_in(&urb->pipe_info);
+	bool ep_is_isoc = (ep_type == USB_ENDPOINT_XFER_ISOC);
+	bool ep_is_int = (ep_type == USB_ENDPOINT_XFER_INT);
+	u32 hprt = dwc2_readl(hsotg->regs + HPRT0);
+	u32 prtspd = (hprt & HPRT0_SPD_MASK) >> HPRT0_SPD_SHIFT;
+	bool do_split = (prtspd == HPRT0_SPD_HIGH_SPEED &&
+			 dev_speed != USB_SPEED_HIGH);
+	int maxp = dwc2_hcd_get_mps(&urb->pipe_info);
+	int bytecount = dwc2_hb_mult(maxp) * dwc2_max_packet(maxp);
 	char *speed, *type;
 
-	dev_vdbg(hsotg->dev, "%s()\n", __func__);
-
 	/* Initialize QH */
 	qh->hsotg = hsotg;
 	setup_timer(&qh->unreserve_timer, dwc2_unreserve_timer_fn,
 		    (unsigned long)qh);
-	qh->ep_type = dwc2_hcd_get_pipe_type(&urb->pipe_info);
-	qh->ep_is_in = dwc2_hcd_is_pipe_in(&urb->pipe_info) ? 1 : 0;
+	qh->ep_type = ep_type;
+	qh->ep_is_in = ep_is_in;
 
 	qh->data_toggle = DWC2_HC_PID_DATA0;
-	qh->maxp = dwc2_hcd_get_mps(&urb->pipe_info);
+	qh->maxp = maxp;
 	INIT_LIST_HEAD(&qh->qtd_list);
 	INIT_LIST_HEAD(&qh->qh_list_entry);
 
-	/* FS/LS Endpoint on HS Hub, NOT virtual root hub */
-	dev_speed = dwc2_host_get_speed(hsotg, urb->priv);
+	qh->do_split = do_split;
+	qh->dev_speed = dev_speed;
 
-	dwc2_host_hub_info(hsotg, urb->priv, &hub_addr, &hub_port);
+	if (ep_is_int || ep_is_isoc) {
+		/* Compute scheduling parameters once and save them */
+		int host_speed = do_split ? USB_SPEED_HIGH : dev_speed;
+		struct dwc2_tt *dwc_tt = dwc2_host_get_tt_info(hsotg, urb->priv,
+							       mem_flags,
+							       &qh->ttport);
+		int device_ns;
 
-	if ((dev_speed == USB_SPEED_LOW || dev_speed == USB_SPEED_FULL) &&
-	    hub_addr != 0 && hub_addr != 1) {
-		dev_vdbg(hsotg->dev,
-			 "QH init: EP %d: TT found at hub addr %d, for port %d\n",
-			 dwc2_hcd_get_ep_num(&urb->pipe_info), hub_addr,
-			 hub_port);
-		qh->do_split = 1;
-	}
+		qh->dwc_tt = dwc_tt;
 
-	if (qh->ep_type == USB_ENDPOINT_XFER_INT ||
-	    qh->ep_type == USB_ENDPOINT_XFER_ISOC) {
-		/* Compute scheduling parameters once and save them */
-		u32 hprt, prtspd;
-
-		/* Todo: Account for split transfers in the bus time */
-		int bytecount =
-			dwc2_hb_mult(qh->maxp) * dwc2_max_packet(qh->maxp);
-
-		qh->host_us = NS_TO_US(usb_calc_bus_time(qh->do_split ?
-			      USB_SPEED_HIGH : dev_speed, qh->ep_is_in,
-			      qh->ep_type == USB_ENDPOINT_XFER_ISOC,
-			      bytecount));
-
-		qh->host_interval = urb->interval;
-		dwc2_sch_dbg(hsotg, "QH=%p init nxt=%04x, fn=%04x, int=%#x\n",
-			     qh, qh->next_active_frame, hsotg->frame_number,
-			     qh->host_interval);
-#if 0
-		/* Increase interrupt polling rate for debugging */
-		if (qh->ep_type == USB_ENDPOINT_XFER_INT)
-			qh->host_interval = 8;
-#endif
-		hprt = dwc2_readl(hsotg->regs + HPRT0);
-		prtspd = (hprt & HPRT0_SPD_MASK) >> HPRT0_SPD_SHIFT;
-		if (prtspd == HPRT0_SPD_HIGH_SPEED &&
-		    (dev_speed == USB_SPEED_LOW ||
-		     dev_speed == USB_SPEED_FULL)) {
-			qh->host_interval *= 8;
-			dwc2_sch_dbg(hsotg,
-				     "QH=%p init*8 nxt=%04x, fn=%04x, int=%#x\n",
-				     qh, qh->next_active_frame,
-				     hsotg->frame_number, qh->host_interval);
+		qh->host_us = NS_TO_US(usb_calc_bus_time(host_speed, ep_is_in,
+				       ep_is_isoc, bytecount));
+		device_ns = usb_calc_bus_time(dev_speed, ep_is_in,
+					      ep_is_isoc, bytecount);
 
-		}
-		dev_dbg(hsotg->dev, "interval=%d\n", qh->host_interval);
-	}
+		if (do_split && dwc_tt)
+			device_ns += dwc_tt->usb_tt->think_time;
+		qh->device_us = NS_TO_US(device_ns);
 
-	dev_vdbg(hsotg->dev, "DWC OTG HCD QH Initialized\n");
-	dev_vdbg(hsotg->dev, "DWC OTG HCD QH - qh = %p\n", qh);
-	dev_vdbg(hsotg->dev, "DWC OTG HCD QH - Device Address = %d\n",
-		 dwc2_hcd_get_dev_addr(&urb->pipe_info));
-	dev_vdbg(hsotg->dev, "DWC OTG HCD QH - Endpoint %d, %s\n",
-		 dwc2_hcd_get_ep_num(&urb->pipe_info),
-		 dwc2_hcd_is_pipe_in(&urb->pipe_info) ? "IN" : "OUT");
 
-	qh->dev_speed = dev_speed;
+		qh->device_interval = urb->interval;
+		qh->host_interval = urb->interval * (do_split ? 8 : 1);
+
+		/*
+		 * Schedule low speed if we're running the host in low or
+		 * full speed OR if we've got a "TT" to deal with to access this
+		 * device.
+		 */
+		qh->schedule_low_speed = prtspd != HPRT0_SPD_HIGH_SPEED ||
+					 dwc_tt;
+
+		if (do_split) {
+			/* We won't know num transfers until we schedule */
+			qh->num_hs_transfers = -1;
+		} else if (dev_speed == USB_SPEED_HIGH) {
+			qh->num_hs_transfers = 1;
+		} else {
+			qh->num_hs_transfers = 0;
+		}
+
+		/* We'll schedule later when we have something to do */
+	}
 
 	switch (dev_speed) {
 	case USB_SPEED_LOW:
@@ -701,7 +1537,6 @@ static void dwc2_qh_init(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
 		speed = "?";
 		break;
 	}
-	dev_vdbg(hsotg->dev, "DWC OTG HCD QH - Speed = %s\n", speed);
 
 	switch (qh->ep_type) {
 	case USB_ENDPOINT_XFER_ISOC:
@@ -721,13 +1556,21 @@ static void dwc2_qh_init(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
 		break;
 	}
 
-	dev_vdbg(hsotg->dev, "DWC OTG HCD QH - Type = %s\n", type);
-
-	if (qh->ep_type == USB_ENDPOINT_XFER_INT) {
-		dev_vdbg(hsotg->dev, "DWC OTG HCD QH - usecs = %d\n",
-			 qh->host_us);
-		dev_vdbg(hsotg->dev, "DWC OTG HCD QH - interval = %d\n",
-			 qh->host_interval);
+	dwc2_sch_dbg(hsotg, "QH=%p Init %s, %s speed, %d bytes:\n", qh, type,
+		     speed, bytecount);
+	dwc2_sch_dbg(hsotg, "QH=%p ...addr=%d, ep=%d, %s\n", qh,
+		     dwc2_hcd_get_dev_addr(&urb->pipe_info),
+		     dwc2_hcd_get_ep_num(&urb->pipe_info),
+		     ep_is_in ? "IN" : "OUT");
+	if (ep_is_int || ep_is_isoc) {
+		dwc2_sch_dbg(hsotg,
+			     "QH=%p ...duration: host=%d us, device=%d us\n",
+			     qh, qh->host_us, qh->device_us);
+		dwc2_sch_dbg(hsotg, "QH=%p ...interval: host=%d, device=%d\n",
+			     qh, qh->host_interval, qh->device_interval);
+		if (qh->schedule_low_speed)
+			dwc2_sch_dbg(hsotg, "QH=%p ...low speed schedule=%p\n",
+				     qh, dwc2_get_ls_map(hsotg, qh));
 	}
 }
 
@@ -755,7 +1598,7 @@ struct dwc2_qh *dwc2_hcd_qh_create(struct dwc2_hsotg *hsotg,
 	if (!qh)
 		return NULL;
 
-	dwc2_qh_init(hsotg, qh, urb);
+	dwc2_qh_init(hsotg, qh, urb, mem_flags);
 
 	if (hsotg->core_params->dma_desc_enable > 0 &&
 	    dwc2_hcd_qh_init_ddma(hsotg, qh, mem_flags) < 0) {
@@ -787,6 +1630,7 @@ void dwc2_hcd_qh_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 		dwc2_do_unreserve(hsotg, qh);
 		spin_unlock_irqrestore(&hsotg->lock, flags);
 	}
+	dwc2_host_put_tt_info(hsotg, qh->dwc_tt);
 
 	if (qh->desc_list)
 		dwc2_hcd_qh_free_ddma(hsotg, qh);
@@ -902,6 +1746,8 @@ static int dwc2_next_for_periodic_split(struct dwc2_hsotg *hsotg,
 	u16 incr;
 
 	/*
+	 * See dwc2_uframe_schedule_split() for split scheduling.
+	 *
 	 * Basically: increment 1 normally, but 2 right after the start split
 	 * (except for ISOC out).
 	 */
@@ -1004,9 +1850,17 @@ static int dwc2_next_periodic_start(struct dwc2_hsotg *hsotg,
 	if (qh->start_active_frame == qh->next_active_frame ||
 	    dwc2_frame_num_gt(prev_frame_number, qh->start_active_frame)) {
 		u16 ideal_start = qh->start_active_frame;
+		int periods_in_map;
 
-		/* Adjust interval as per gcd with plan length. */
-		interval = gcd(interval, 8);
+		/*
+		 * Adjust interval as per gcd with map size.
+		 * See pmap_schedule() for more details here.
+		 */
+		if (qh->do_split || qh->dev_speed == USB_SPEED_HIGH)
+			periods_in_map = DWC2_HS_SCHEDULE_UFRAMES;
+		else
+			periods_in_map = DWC2_LS_SCHEDULE_FRAMES;
+		interval = gcd(interval, periods_in_map);
 
 		do {
 			qh->start_active_frame = dwc2_frame_num_inc(
-- 
2.7.0.rc3.207.g0ac5344