# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.600   -> 1.600.1.1
#	drivers/usb/host/uhci-hcd.h	1.3     -> 1.4    
#	drivers/usb/host/uhci-hcd.c	1.10    -> 1.11   
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 02/09/04	david-b@pacbell.net	1.600.1.1
# [PATCH] uhci, doc + cleanup
# 
# Another UHCI patch.  I'm sending this since Dan said he was going to
# start teaching "uhci-hcd" how to do control and interrupt queueing,
# and this may help.  Granted it checks out (I didn't test the part
# that has a chance to break, though it "looks right"), I think it
# should get merged in at some point.  What it does:
# 
#    - updates and adds some comments/docs
#    - gets rid of a "magic number" calling convention, instead passing
#      an explicit flag UHCI_PTR_DEPTH or UHCI_PTR_BREADTH (self-doc :)
#    - deletes bits of unused/dead code
#    - updates the append-to-qh code:
#        * start using list_for_each() ... clearer than handcrafted
#          loops, and it prefetches too.  Lots of places should get
#          updated to do this, IMO.
#        * re-orders some stuff to fix a sequencing problem
#        * adds ascii-art to show how the urb queueing is done
#          (based on some email Johannes sent me recently)
# 
# That sequencing problem is that when splicing a QH between A and B,
# it currently splices A-->QH before QH-->B ... so that if the HC is
# looking at that chunk of schedule at that time, everything starting
# at B will be ignored during the rest of that frame.  (Since the QH
# is initted to have UHCI_PTR_TERM next, stopping the schedule scan.)
# 
# I said "problem" not "bug" since in the current code it would probably
# (what does that "PIIX bug" do??) just reduce control/bulk throughput.
# That's because the logic is only appending towards the  end of each
# frame's schedule, where the FSBR loopback kicks in.
# --------------------------------------------
#
diff -Nru a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
--- a/drivers/usb/host/uhci-hcd.c	Thu Sep  5 08:51:48 2002
+++ b/drivers/usb/host/uhci-hcd.c	Thu Sep  5 08:51:48 2002
@@ -105,12 +105,10 @@
 /* to make sure it doesn't hog all of the bandwidth */
 #define DEPTH_INTERVAL 5
 
-#define MAX_URB_LOOP	2048		/* Maximum number of linked URB's */
-
 /*
  * Technically, updating td->status here is a race, but it's not really a
  * problem. The worst that can happen is that we set the IOC bit again
- * generating a spurios interrupt. We could fix this by creating another
+ * generating a spurious interrupt. We could fix this by creating another
  * QH and leaving the IOC bit always set, but then we would have to play
  * games with the FSBR code to make sure we get the correct order in all
  * the cases. I don't think it's worth the effort
@@ -273,7 +271,7 @@
 /*
  * Inserts a td into qh list at the top.
  */
-static void uhci_insert_tds_in_qh(struct uhci_qh *qh, struct urb *urb, int breadth)
+static void uhci_insert_tds_in_qh(struct uhci_qh *qh, struct urb *urb, u32 breadth)
 {
 	struct list_head *tmp, *head;
 	struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
@@ -290,7 +288,7 @@
 	td = list_entry(tmp, struct uhci_td, list);
 
 	/* Add the first TD to the QH element pointer */
-	qh->element = cpu_to_le32(td->dma_handle) | (breadth ? 0 : UHCI_PTR_DEPTH);
+	qh->element = cpu_to_le32(td->dma_handle) | breadth;
 
 	ptd = td;
 
@@ -301,7 +299,7 @@
 
 		tmp = tmp->next;
 
-		ptd->link = cpu_to_le32(td->dma_handle) | (breadth ? 0 : UHCI_PTR_DEPTH);
+		ptd->link = cpu_to_le32(td->dma_handle) | breadth;
 
 		ptd = td;
 	}
@@ -311,10 +309,6 @@
 
 static void uhci_free_td(struct uhci_hcd *uhci, struct uhci_td *td)
 {
-/*
-	if (!list_empty(&td->list) || !list_empty(&td->fl_list))
-		dbg("td %p is still in URB list!", td);
-*/
 	if (!list_empty(&td->list))
 		dbg("td %p is still in list!", td);
 	if (!list_empty(&td->fl_list))
@@ -365,43 +359,57 @@
 }
 
 /*
+ * Append this urb's qh after the last qh in skelqh->list
  * MUST be called with uhci->frame_list_lock acquired
+ *
+ * Note that urb_priv.queue_list doesn't have a separate queue head;
+ * it's a ring with every element "live".
  */
 static void _uhci_insert_qh(struct uhci_hcd *uhci, struct uhci_qh *skelqh, struct urb *urb)
 {
 	struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-	struct list_head *head, *tmp;
+	struct list_head *tmp;
 	struct uhci_qh *lqh;
 
 	/* Grab the last QH */
 	lqh = list_entry(skelqh->list.prev, struct uhci_qh, list);
 
+	/* Patch this endpoint's URBs' QHs to point to the next skelQH:
+	 *    SkelQH --> ... lqh --> NewQH --> NextSkelQH
+	 * Do this first, so the HC always sees the right QH after this one.
+	 */
+	list_for_each (tmp, &urbp->queue_list) {
+		struct urb_priv *turbp =
+			list_entry(tmp, struct urb_priv, queue_list);
+
+		turbp->qh->link = lqh->link;
+	}
+	urbp->qh->link = lqh->link;
+	wmb();				/* Ordering is important */
+
+	/* Patch QHs for previous endpoint's queued URBs?  HC goes
+	 * here next, not to the NextSkelQH it now points to.
+	 *
+	 *    lqh --> td ... --> qh ... --> td --> qh ... --> td
+	 *     |                 |                 |
+	 *     v                 v                 v
+	 *     +<----------------+-----------------+
+	 *     v
+	 *    NewQH --> td ... --> td
+	 *     |
+	 *     v
+	 *    ...
+	 *
+	 * The HC could see (and use!) any of these as we write them.
+	 */
 	if (lqh->urbp) {
-		head = &lqh->urbp->queue_list;
-		tmp = head->next;
-		while (head != tmp) {
+		list_for_each (tmp, &lqh->urbp->queue_list) {
 			struct urb_priv *turbp =
 				list_entry(tmp, struct urb_priv, queue_list);
 
-			tmp = tmp->next;
-
 			turbp->qh->link = cpu_to_le32(urbp->qh->dma_handle) | UHCI_PTR_QH;
 		}
 	}
-
-	head = &urbp->queue_list;
-	tmp = head->next;
-	while (head != tmp) {
-		struct urb_priv *turbp =
-			list_entry(tmp, struct urb_priv, queue_list);
-
-		tmp = tmp->next;
-
-		turbp->qh->link = lqh->link;
-	}
-
-	urbp->qh->link = lqh->link;
-	mb();				/* Ordering is important */
 	lqh->link = cpu_to_le32(urbp->qh->dma_handle) | UHCI_PTR_QH;
 
 	list_add_tail(&urbp->qh->list, &skelqh->list);
@@ -416,6 +424,9 @@
 	spin_unlock_irqrestore(&uhci->frame_list_lock, flags);
 }
 
+/* start removal of qh from schedule; it finishes next frame.
+ * TDs should be unlinked before this is called.
+ */
 static void uhci_remove_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
 {
 	unsigned long flags;
@@ -869,12 +880,12 @@
 	urbp->qh = qh;
 	qh->urbp = urbp;
 
-	/* Low speed or small transfers gets a different queue and treatment */
+	/* Low speed transfers get a different queue, and won't hog the bus */
 	if (urb->dev->speed == USB_SPEED_LOW) {
-		uhci_insert_tds_in_qh(qh, urb, 0);
+		uhci_insert_tds_in_qh(qh, urb, UHCI_PTR_DEPTH);
 		uhci_insert_qh(uhci, uhci->skel_ls_control_qh, urb);
 	} else {
-		uhci_insert_tds_in_qh(qh, urb, 1);
+		uhci_insert_tds_in_qh(qh, urb, UHCI_PTR_BREADTH);
 		uhci_insert_qh(uhci, uhci->skel_hs_control_qh, urb);
 		uhci_inc_fsbr(uhci, urb);
 	}
@@ -914,9 +925,9 @@
 	urbp->qh->urbp = urbp;
 
 	/* One TD, who cares about Breadth first? */
-	uhci_insert_tds_in_qh(urbp->qh, urb, 0);
+	uhci_insert_tds_in_qh(urbp->qh, urb, UHCI_PTR_DEPTH);
 
-	/* Low speed or small transfers gets a different queue and treatment */
+	/* Low speed transfers get a different queue */
 	if (urb->dev->speed == USB_SPEED_LOW)
 		uhci_insert_qh(uhci, uhci->skel_ls_control_qh, urb);
 	else
@@ -1242,8 +1253,8 @@
 	urbp->qh = qh;
 	qh->urbp = urbp;
 
-	/* Always assume breadth first */
-	uhci_insert_tds_in_qh(qh, urb, 1);
+	/* Always breadth first */
+	uhci_insert_tds_in_qh(qh, urb, UHCI_PTR_BREADTH);
 
 	if (eurb)
 		uhci_append_queued_urb(uhci, eurb, urb);
diff -Nru a/drivers/usb/host/uhci-hcd.h b/drivers/usb/host/uhci-hcd.h
--- a/drivers/usb/host/uhci-hcd.h	Thu Sep  5 08:51:48 2002
+++ b/drivers/usb/host/uhci-hcd.h	Thu Sep  5 08:51:48 2002
@@ -65,6 +65,7 @@
 #define UHCI_PTR_TERM		cpu_to_le32(0x0001)
 #define UHCI_PTR_QH		cpu_to_le32(0x0002)
 #define UHCI_PTR_DEPTH		cpu_to_le32(0x0004)
+#define UHCI_PTR_BREADTH	cpu_to_le32(0x0000)
 
 #define UHCI_NUMFRAMES		1024	/* in the frame list [array] */
 #define UHCI_MAX_SOF_NUMBER	2047	/* in an SOF packet */
@@ -80,6 +81,19 @@
 
 struct urb_priv;
 
+/* One role of a QH is to hold a queue of TDs for some endpoint.  Each QH is
+ * used with one URB, and qh->element (updated by the HC) is either:
+ *   - the next unprocessed TD for the URB, or
+ *   - UHCI_PTR_TERM (when there's no more traffic for this endpoint), or
+ *   - the QH for the next URB queued to the same endpoint.
+ *
+ * The other role of a QH is to serve as a "skeleton" framelist entry, so we
+ * can easily splice a QH for some endpoint into the schedule at the right
+ * place.  Then qh->element is UHCI_PTR_TERM.
+ *
+ * In the frame list, qh->link maintains a list of QHs seen by the HC:
+ *     skel1 --> ep1-qh --> ep2-qh --> ... --> skel2 --> ...
+ */
 struct uhci_qh {
 	/* Hardware fields */
 	__u32 link;			/* Next queue */
@@ -156,6 +170,9 @@
  *
  * Alas, not anymore, we have more than 4 words for software, woops.
  * Everything still works tho, surprise! -jerdfelt
+ *
+ * td->link points to either another TD (not necessarily for the same urb or
+ * even the same endpoint), or nothing (PTR_TERM), or a QH (for queued urbs)
  */
 struct uhci_td {
 	/* Hardware fields */
@@ -172,7 +189,7 @@
 
 	struct list_head list;		/* P: urb->lock */
 
-	int frame;
+	int frame;			/* for iso: what frame? */
 	struct list_head fl_list;	/* P: uhci->frame_list_lock */
 } __attribute__((aligned(16)));
 
@@ -217,6 +234,22 @@
  *
  * To keep with Linus' nomenclature, this is called the QH skeleton. These
  * labels (below) are only signficant to the root hub's QH's
+ *
+ *
+ * NOTE:  That ASCII art doesn't match the current (August 2002) code, in
+ * more ways than just not using QHs for ISO.
+ *
+ * NOTE:  Another way to look at the UHCI schedules is to compare them to what
+ * other host controller interfaces use.  EHCI, OHCI, and UHCI all have tables
+ * of transfers that the controller scans, frame by frame, and which hold the
+ * scheduled periodic transfers.  The key differences are that UHCI
+ *
+ *   (a) puts control and bulk transfers into that same table; the others
+ *       have separate data structures for non-periodic transfers.
+ *   (b) lets QHs be linked from TDs, not just other QHs, since they don't
+ *       hold endpoint data.  this driver chooses to use one QH per URB.
+ *   (c) needs more TDs, since it uses one per packet.  the data toggle
+ *       is stored in those TDs, along with all other endpoint state.
  */
 
 #define UHCI_NUM_SKELTD		10
