tulip bug, alternative fix, #3 (tested)
Stephen R. van den Berg
srb@cuci.nl
Sun Dec 12 15:44:32 1999
Wolfgang Walter wrote:
>I sent description of the bugs to this list and to Donald Becker. As I haven't
>got an answer from him, yet, I don't know if he is already working on a fix or
>what he thinks about this patch. Maybe he's already working on a better fix.
Well, I looked at the patch. Haven't yet doublechecked the exact
IO-commands, but, a casual comparison of what you did and past discussions
on the list reveals that you simply disable the "Too much work during
interrupt" compensations. Donald remarked that this is not a good idea.
I'm not sure if he's right (didn't have time to check the logic yet), but
simply skipping it seems almost a bit too easy.
Comparing his patches for v0.91u with the ones you supplied, I come up
with a merged version (still suitable for standalone operation with
2.2.x) that takes the INTR_MITIGATION support into account (I'm not even
sure if that can make a difference in this lockup case).
Oh yes, incidentally, your patches seem relative to a different 0.91g version
than the one I have (and picked up from Donald's website). Tiny differences,
but enough to screw up the diffs/patch.
If people could review this merged version (even test it perhaps?), and
tell me if I made a mess out of it or not? This patch is relative to
the v0.91g which I got from Donald's website.
--- tulip.c.91g Sat Dec 11 19:37:43 1999
+++ ../tulip.c Sun Dec 12 06:27:12 1999
@@ -18,7 +18,7 @@
*/
#define SMP_CHECK
-static const char version[] = "tulip.c:v0.91g 7/16/99 becker@cesdis.gsfc.nasa.gov\n";
+static const char version[] = "tulip.c:v0.91gB 7/16/99 becker@cesdis.gsfc.nasa.gov\n";
/* A few user-configurable values. */
@@ -51,6 +51,7 @@
bonding and packet priority.
There are no ill effects from too-large receive rings. */
#define TX_RING_SIZE 16
+#define TX_QUEUE_LEN 10
#define RX_RING_SIZE 32
/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
@@ -361,7 +362,7 @@
HAS_MII=1, HAS_MEDIA_TABLE=2, CSR12_IN_SROM=4, ALWAYS_CHECK_MII=8,
HAS_PWRDWN=0x10, MC_HASH_ONLY=0x20, /* Hash-only multicast filter. */
HAS_PNICNWAY=0x80, HAS_NWAY143=0x40, /* Uses internal NWay xcvr. */
- HAS_8023X=0x100,
+ HAS_INTR_MITIGATION=0x100, HAS_8023X=0x400,
};
static struct tulip_chip_table {
char *chip_name;
@@ -375,7 +376,8 @@
{ "Digital DS21140 Tulip", 128, 0x0001ebef,
HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, tulip_timer },
{ "Digital DS21143 Tulip", 128, 0x0801fbff,
- HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_PWRDWN | HAS_NWAY143,
+ HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_PWRDWN | HAS_NWAY143
+ | HAS_INTR_MITIGATION,
t21142_timer },
{ "Lite-On 82c168 PNIC", 256, 0x0001ebef,
HAS_MII | HAS_PNICNWAY, pnic_timer },
@@ -433,7 +435,7 @@
/* The bits in the CSR5 status registers, mostly interrupt sources. */
enum status_bits {
- TimerInt=0x800, TPLnkFail=0x1000, TPLnkPass=0x10,
+ TimerInt=0x800, SytemError=0x2000, TPLnkFail=0x1000, TPLnkPass=0x10,
NormalIntr=0x10000, AbnormalIntr=0x8000,
RxJabber=0x200, RxDied=0x100, RxNoBuf=0x80, RxIntr=0x40,
TxFIFOUnderflow=0x20, TxJabber=0x08, TxNoBuf=0x04, TxDied=0x02, TxIntr=0x01,
@@ -530,6 +532,9 @@
int cur_index; /* Current media index. */
int saved_if_port;
unsigned char pci_bus, pci_devfn;
+ int ttimer;
+ int susp_rx;
+ unsigned long nir;
int pad0, pad1; /* Used for 8-byte alignment */
};
@@ -548,6 +553,7 @@
static void tulip_tx_timeout(struct device *dev);
static void tulip_init_ring(struct device *dev);
static int tulip_start_xmit(struct sk_buff *skb, struct device *dev);
+static int tulip_refill_rx(struct device *dev);
static int tulip_rx(struct device *dev);
static void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
static int tulip_close(struct device *dev);
@@ -2484,6 +2490,9 @@
tp->tx_full = 0;
tp->cur_rx = tp->cur_tx = 0;
tp->dirty_rx = tp->dirty_tx = 0;
+ tp->susp_rx = 0;
+ tp->ttimer = 0;
+ tp->nir = 0;
for (i = 0; i < RX_RING_SIZE; i++) {
tp->rx_ring[i].status = 0x00000000;
@@ -2545,11 +2554,11 @@
tp->tx_skbuff[entry] = skb;
tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data);
- if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */
+ if (tp->cur_tx - tp->dirty_tx < TX_QUEUE_LEN/2) {/* Typical path */
flag = 0x60000000; /* No interrupt */
- } else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) {
+ } else if (tp->cur_tx - tp->dirty_tx == TX_QUEUE_LEN/2) {
flag = 0xe0000000; /* Tx-done intr. */
- } else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) {
+ } else if (tp->cur_tx - tp->dirty_tx < TX_QUEUE_LEN - 2) {
flag = 0x60000000; /* No Tx-done intr. */
} else { /* Leave room for set_rx_mode() to fill entries. */
tp->tx_full = 1;
@@ -2578,7 +2587,15 @@
struct device *dev = (struct device *)dev_instance;
struct tulip_private *tp = (struct tulip_private *)dev->priv;
long ioaddr = dev->base_addr;
- int csr5, work_budget = max_interrupt_work;
+ int csr5;
+ int entry;
+ int missed;
+ int rx = 0;
+ int tx = 0;
+ int oi = 0;
+ int maxrx = RX_RING_SIZE;
+ int maxtx = TX_RING_SIZE;
+ int maxoi = TX_RING_SIZE;
#if defined(__i386__) && defined(SMP_CHECK)
if (test_and_set_bit(0, (void*)&dev->interrupt)) {
@@ -2596,6 +2613,8 @@
dev->interrupt = 1;
#endif
+ tp->nir++;
+
do {
csr5 = inl(ioaddr + CSR5);
/* Acknowledge all of the current interrupt sources ASAP. */
@@ -2608,10 +2627,12 @@
if ((csr5 & (NormalIntr|AbnormalIntr)) == 0)
break;
- if (csr5 & (RxIntr | RxNoBuf))
- work_budget -= tulip_rx(dev);
+ if (csr5 & (RxIntr | RxNoBuf)) {
+ rx += tulip_rx(dev);
+ tulip_refill_rx(dev);
+ }
- if (csr5 & (TxNoBuf | TxDied | TxIntr)) {
+ if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) {
unsigned int dirty_tx;
for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0;
@@ -2656,6 +2677,7 @@
/* Free the original skb. */
dev_free_skb(tp->tx_skbuff[entry]);
tp->tx_skbuff[entry] = 0;
+ tx++;
}
#ifndef final_version
@@ -2667,7 +2689,7 @@
#endif
if (tp->tx_full && dev->tbusy
- && tp->cur_tx - dirty_tx < TX_RING_SIZE - 2) {
+ && tp->cur_tx - dirty_tx < TX_QUEUE_LEN - 4) {
/* The ring is no longer full, clear tbusy. */
tp->tx_full = 0;
dev->tbusy = 0;
@@ -2698,38 +2720,77 @@
/* Restart the transmit process. */
outl(tp->csr6 | 0x0002, ioaddr + CSR6);
outl(tp->csr6 | 0x2002, ioaddr + CSR6);
+ outl(0, ioaddr + CSR1);
}
if (csr5 & RxDied) { /* Missed a Rx frame. */
tp->stats.rx_errors++;
tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff;
outl(tp->csr6 | 0x2002, ioaddr + CSR6);
}
- if (csr5 & TimerInt) {
- if (tulip_debug > 2)
- printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
- dev->name, csr5);
- outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
- }
if (csr5 & (TPLnkPass | TPLnkFail | 0x08000000)) {
if (tp->link_change)
(tp->link_change)(dev, csr5);
}
+ if (csr5 & SytemError) {
+ printk(KERN_ERR "%s: (%lu) System Error occured\n", dev->name, tp->nir);
+ }
/* Clear all error sources, included undocumented ones! */
outl(0x0800f7ba, ioaddr + CSR5);
+ oi++;
}
- if (--work_budget < 0) {
+ if (csr5 & TimerInt) {
+#if 0
+ if (tulip_debug > 2)
+ printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
+ dev->name, csr5);
+ outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
+#endif
+ tp->ttimer = 0;
+ oi++;
+ }
+ if (tx > maxtx || rx > maxrx || oi > maxoi) {
+#if 0
if (tulip_debug > 1)
+#endif
printk(KERN_WARNING "%s: Too much work during an interrupt, "
- "csr5=0x%8.8x.\n", dev->name, csr5);
+ "csr5=0x%8.8x. (%lu) (%d,%d,%d)\n", dev->name, csr5, tp->nir, tx, rx, oi);
/* Acknowledge all interrupt sources. */
+#if 1
outl(0x8001ffff, ioaddr + CSR5);
- /* Clear all interrupting sources, set timer to re-enable. */
- outl(((~csr5) & 0x0001ebef) | 0x0800, ioaddr + CSR7);
- outl(12, ioaddr + CSR11);
+ if (tp->flags & HAS_INTR_MITIGATION)
+ outl(0x45240000, ioaddr + CSR11);
+ else {
+ /* Mask all interrupting sources, set timer to e-enable. */
+ outl(((~csr5) & 0x0001ebef) | AbnormalIntr | TimerInt,
+ ioaddr + CSR7);
+ outl(0x0012, ioaddr + CSR11);
+ }
+ tp->ttimer = 1;
+#endif
break;
}
} while (1);
+ tulip_refill_rx(dev);
+
+ /* check if we card is in suspend mode */
+ entry = tp->dirty_rx % RX_RING_SIZE;
+ if (tp->rx_skbuff[entry] == NULL) {
+ printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", dev->name, tp->nir, tp->cur_rx, tp->ttimer, rx);
+ if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) {
+ printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", dev->name, tp->nir);
+ outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt,
+ ioaddr + CSR7);
+ outl(TimerInt, ioaddr + CSR5);
+ outl(12, ioaddr + CSR11);
+ tp->ttimer = 1;
+ }
+ }
+
+ if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) {
+ tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed;
+ }
+
if (tulip_debug > 3)
printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n",
dev->name, inl(ioaddr + CSR5));
@@ -2742,13 +2803,36 @@
return;
}
+static int tulip_refill_rx(struct device *dev)
+{
+ struct tulip_private *tp = (struct tulip_private *)dev->priv;
+ int entry;
+ int refilled = 0;
+
+ /* Refill the Rx ring buffers. */
+ for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) {
+ entry = tp->dirty_rx % RX_RING_SIZE;
+ if (tp->rx_skbuff[entry] == NULL) {
+ struct sk_buff *skb;
+ skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ);
+ if (skb == NULL)
+ break;
+ skb->dev = dev; /* Mark as being used by this device. */
+ tp->rx_ring[entry].buffer1 = virt_to_bus(skb->tail);
+ refilled++;
+ }
+ tp->rx_ring[entry].status = cpu_to_le32(DescOwned);
+ }
+ return refilled;
+}
+
static int
tulip_rx(struct device *dev)
{
struct tulip_private *tp = (struct tulip_private *)dev->priv;
int entry = tp->cur_rx % RX_RING_SIZE;
int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
- int work_done = 0;
+ int received = 0;
if (tulip_debug > 4)
printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry,
@@ -2810,7 +2894,6 @@
memcpy(skb_put(skb, pkt_len),
bus_to_virt(tp->rx_ring[entry].buffer1), pkt_len);
#endif
- work_done++;
} else { /* Pass up the skb already on the Rx ring. */
char *temp = skb_put(skb = tp->rx_skbuff[entry], pkt_len);
tp->rx_skbuff[entry] = NULL;
@@ -2830,25 +2913,11 @@
tp->stats.rx_bytes += pkt_len;
#endif
}
+ received++;
entry = (++tp->cur_rx) % RX_RING_SIZE;
}
- /* Refill the Rx ring buffers. */
- for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) {
- entry = tp->dirty_rx % RX_RING_SIZE;
- if (tp->rx_skbuff[entry] == NULL) {
- struct sk_buff *skb;
- skb = tp->rx_skbuff[entry] = dev_alloc_skb(PKT_BUF_SZ);
- if (skb == NULL)
- break;
- skb->dev = dev; /* Mark as being used by this device. */
- tp->rx_ring[entry].buffer1 = virt_to_bus(skb->tail);
- work_done++;
- }
- tp->rx_ring[entry].status = DescOwned;
- }
-
- return work_done;
+ return received;
}
static int
--
Sincerely, srb@cuci.nl
Stephen R. van den Berg (AKA BuGless).
Climate is what you expect. Weather is what you get.