bug in tulip_rx? corrected patch

Wolfgang Walter wolfgang.walter@stusta.mhn.de
Wed Nov 24 21:23:23 1999


Hi,

a just realised that my patch is buggy :-). Of course, if I drop the packet
I must not give it to netif_rx() :-).

Wolfgang Walter

Here is the corrected version:


--- 2.2.14pre4/drivers/net/tulip.c	Fri Nov  5 18:02:35 1999
+++ 2.2.14pre4-m/drivers/net/tulip.c	Thu Nov 25 03:48:23 1999
@@ -509,6 +509,7 @@
 	int interrupt;				/* In-interrupt flag. */
 	unsigned int cur_rx, cur_tx;		/* The next free ring entry */
 	unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
+	unsigned int nr_skbs_rx;			/* number of skbs */
 	unsigned int tx_full:1;				/* The Tx queue is full. */
 	unsigned int full_duplex:1;			/* Full-duplex operation requested. */
 	unsigned int full_duplex_lock:1;
@@ -2484,6 +2485,7 @@
 	tp->tx_full = 0;
 	tp->cur_rx = tp->cur_tx = 0;
 	tp->dirty_rx = tp->dirty_tx = 0;
+	tp->nr_skbs_rx = 0;
 
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		tp->rx_ring[i].status = 0x00000000;
@@ -2503,6 +2505,7 @@
 		tp->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
+		tp->nr_skbs_rx++;
 		skb->dev = dev;			/* Mark as being used by this device. */
 		tp->rx_ring[i].status = cpu_to_le32(DescOwned);	/* Owned by Tulip chip */
 		tp->rx_ring[i].buffer1 = virt_to_le32desc(skb->tail);
@@ -2595,6 +2598,13 @@
 	dev->interrupt = 1;
 #endif
 
+	/* tp->nr_skbs_rx == 0 can only happen if we never succeeded to allocate at least
+	 * one skb (which we usually do at initialisation time.
+	 * We then hope do do so now or any later non-RD/RU interrupt
+	 */
+	if (tp->nr_skbs_rx == 0)
+		work_budget -= tulip_rx(dev);
+
 	do {
 		csr5 = inl(ioaddr + CSR5);
 		/* Acknowledge all of the current interrupt sources ASAP. */
@@ -2729,7 +2739,7 @@
 			break;
 		}
 	} while (1);
-
+	
 	if (tulip_debug > 4)
 		printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n",
 			   dev->name, inl(ioaddr + CSR5));
@@ -2797,7 +2807,7 @@
 #endif
 			/* Check if the packet is long enough to accept without copying
 			   to a minimally-sized skbuff. */
-			if (pkt_len < rx_copybreak
+			if ((pkt_len < rx_copybreak || tp->nr_skbs_rx < 2)
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
@@ -2809,9 +2819,10 @@
 					   pkt_len);
 #endif
 				work_done++;
-			} else { 	/* Pass up the skb already on the Rx ring. */
+			} else if (tp->nr_skbs_rx > 1) { 	/* Pass up the skb already on the Rx ring. */
 				char *temp = skb_put(skb = tp->rx_skbuff[entry], pkt_len);
 				tp->rx_skbuff[entry] = NULL;
+				tp->nr_skbs_rx--;
 #ifndef final_version
 				if (le32desc_to_virt(tp->rx_ring[entry].buffer1) != temp)
 					printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
@@ -2820,14 +2831,20 @@
 						   le32desc_to_virt(tp->rx_ring[entry].buffer1),
 						   skb->head, temp);
 #endif
-			}
-			skb->protocol = eth_type_trans(skb, dev);
-			netif_rx(skb);
-			dev->last_rx = jiffies;
-			tp->stats.rx_packets++;
+			} else {
+				/* we have only one skb; we must keep it: drop the packet */
+				tp->stats.rx_dropped++;
+				skb = NULL;
+			}
+			if (skb) {
+				skb->protocol = eth_type_trans(skb, dev);
+				netif_rx(skb);
+				dev->last_rx = jiffies;
+				tp->stats.rx_packets++;
 #if LINUX_VERSION_CODE > 0x20127
-			tp->stats.rx_bytes += pkt_len;
+				tp->stats.rx_bytes += pkt_len;
 #endif
+			}
 		}
 		entry = (++tp->cur_rx) % RX_RING_SIZE;
 	}
@@ -2840,11 +2857,55 @@
 			skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ);
 			if (skb == NULL)
 				break;
+			tp->nr_skbs_rx++;
 			skb->dev = dev;			/* Mark as being used by this device. */
 			tp->rx_ring[entry].buffer1 = virt_to_le32desc(skb->tail);
 			work_done++;
 		}
 		tp->rx_ring[entry].status = cpu_to_le32(DescOwned);
+	}
+
+	/*
+	 * We must catch the situation:
+	 * 	the card run out of RX-buffers when this interrupt was triggered (but we have a skb available)
+	 * 	
+	 * This is the case, if
+	 * 	tp->nr_skbs_rx > 0
+	 * and
+	 * 	    all buffers are dirty
+	 * 	<=> tp->dirty_rx + RX_RING_SIZE == tp->cur_rx
+	 * 	<=> tp->cur_rx has no skb associated
+	 * 
+	 * Why? 
+	 * =>
+	 * If at this stage all buffers are dirty this means that
+	 * 	tp->dirty_rx + RX_RING_SIZE == tp->cur_rx
+	 * must hold. This means that the above refill did not change tp->dirty_rx at all
+	 * which means that the associated skb must be null. As this is the same in the ring as
+	 * tp->cur_rx represents, there is no skb associated with tp->cur_rx.
+	 * 
+	 * <=
+	 * If tp->cur_rx has no skb associated the rx-loop must be terminated with 
+	 * rx_work_limit<0 (tp->rx_ring[entry].status & cpu_to_le32(DescOwned) can't be
+	 * true if no skb is associated). This means that
+	 * 	tp->dirty_rx + RX_RING_SIZE == tp->cur_rx
+	 * holds which means that all buffers are dirty
+	 * 
+	 */
+	entry = tp->cur_rx % RX_RING_SIZE;
+	if (tp->nr_skbs_rx > 0 && tp->rx_skbuff[entry] == NULL) {
+		int i;
+		/* search the one free buffer */
+		for (i = 0; i < RX_RING_SIZE; i++) {
+			if (tp->rx_skbuff[i] != NULL) {
+				tp->rx_skbuff[entry] = tp->rx_skbuff[i];
+				tp->rx_ring[entry].buffer1 = tp->rx_ring[i].buffer1;
+				tp->rx_skbuff[i] = NULL;
+				tp->rx_ring[entry].status = cpu_to_le32(DescOwned);
+				tp->dirty_rx++;
+				break;
+			}
+		}
 	}
 
 	return work_done;