Re: Oops in scsi_send_eh_cmnd 2.6.21-rc5-git6,7,10,13

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



James Bottomley said:

> On Thu, 2007-04-05 at 19:21 -0700, Andrew Burgess wrote:
> > 2.6.20.4 with your patch dies in the memcpy (as does 21-gitN)
> > 
> > 2.6.20.4 without your patch dies in the subsequent __free_page
> > with a null pointer ref at 000...008
> > 
> > James should I try your posted patch? On which kernel?
> 
> Well, mine will work, but will also cover up the problem.  It looks like
> the scatterlist is getting corrupted by the HBA driver somehow ... we
> really need to root cause this, otherwise other things will go wrong in
> strange ways.
> 
> Just to confirm, the HBA with the problem is 3w-xxxx?

Yes. The 3w-xxxx.c driver changed between 2.6.18 and 2.6.20 but
nothing jumps out to my untrained eyes. Here's the diff:
Also, I should mention that the working kernel is a fedora
rpm (2.6.18-1.2798.fc6) so I don't know what patches are in it.
The vmlinuz is dated Oct 6 2006.

--------------------------------------------------------------------

root@athlon:kernel # diff -u linux-2.6.20-rt5/drivers/scsi/3w-xxxx.c linux-2.6.18.6/drivers/scsi/3w-xxxx.c
--- linux-2.6.20-rt5/drivers/scsi/3w-xxxx.c     2007-02-04 10:44:54.000000000 -0800
+++ linux-2.6.18.6/drivers/scsi/3w-xxxx.c       2006-12-16 16:21:00.000000000 -0800
@@ -6,7 +6,7 @@
                     Arnaldo Carvalho de Melo <[email protected]>
                      Brad Strand <[email protected]>
 
-   Copyright (C) 1999-2007 3ware Inc.
+   Copyright (C) 1999-2005 3ware Inc.
 
    Kernel compatiblity By:     Andre Hedrick <[email protected]>
    Non-Copyright (C) 2000      Andre Hedrick <[email protected]>
@@ -191,9 +191,6 @@
                  before shutting down card.
                  Change to new 'change_queue_depth' api.
                  Fix 'handled=1' ISR usage, remove bogus IRQ check.
-   1.26.02.002 - Free irq handler in __tw_shutdown().
-                 Turn on RCD bit for caching mode page.
-                 Serialize reset code.
 */
 
 #include <linux/module.h>
@@ -217,7 +214,7 @@
 #include "3w-xxxx.h"
 
 /* Globals */
-#define TW_DRIVER_VERSION "1.26.02.002"
+#define TW_DRIVER_VERSION "1.26.02.001"
 static TW_Device_Extension *tw_device_extension_list[TW_MAX_SLOT];
 static int tw_device_extension_count = 0;
 static int twe_major = -1;
@@ -229,7 +226,7 @@
 MODULE_VERSION(TW_DRIVER_VERSION);
 
 /* Function prototypes */
-static int tw_reset_device_extension(TW_Device_Extension *tw_dev);
+static int tw_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_reset);
 
 /* Functions */
 
@@ -987,12 +984,24 @@
                        /* Now wait for the command to complete */
                        timeout = wait_event_timeout(tw_dev->ioctl_wqueue,
tw_dev->chrdev_request_id == TW_IOCTL_CHRDEV_FREE, timeout);
 
+                       /* See if we reset while waiting for the ioctl to complete */
+                       if (test_bit(TW_IN_RESET, &tw_dev->flags)) {
+                               clear_bit(TW_IN_RESET, &tw_dev->flags);
+                               retval = -ERESTARTSYS;
+                               goto out2;
+                       }
+
                        /* We timed out, and didn't get an interrupt */
                        if (tw_dev->chrdev_request_id != TW_IOCTL_CHRDEV_FREE) {
                                /* Now we need to reset the board */
                                printk(KERN_WARNING "3w-xxxx: scsi%d: Character ioctl (0x%x) timed
out, resetting card.\n", tw_dev->host->host_no, cmd);
                                retval = -EIO;
-                               if (tw_reset_device_extension(tw_dev)) {
+                               spin_lock_irqsave(tw_dev->host->host_lock, flags);
+                               tw_dev->state[request_id] = TW_S_COMPLETED;
+                               tw_state_request_finish(tw_dev, request_id);
+                               tw_dev->posted_request_count--;
+                               spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
+                               if (tw_reset_device_extension(tw_dev, 1)) {
                                        printk(KERN_WARNING "3w-xxxx: tw_chrdev_ioctl(): Reset
failed for card %d.\n", tw_dev->host->host_no);
                                }
                                goto out2;
@@ -1327,7 +1336,7 @@
 } /* End tw_unmap_scsi_data() */
 
 /* This function will reset a device extension */
-static int tw_reset_device_extension(TW_Device_Extension *tw_dev)
+static int tw_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_reset) 
 {
        int i = 0;
        struct scsi_cmnd *srb;
@@ -1373,10 +1382,15 @@
                printk(KERN_WARNING "3w-xxxx: scsi%d: Reset sequence failed.\n",
tw_dev->host->host_no);
                return 1;
        }
-
        TW_ENABLE_AND_CLEAR_INTERRUPTS(tw_dev);
-       clear_bit(TW_IN_RESET, &tw_dev->flags);
-       tw_dev->chrdev_request_id = TW_IOCTL_CHRDEV_FREE;
+
+       /* Wake up any ioctl that was pending before the reset */
+       if ((tw_dev->chrdev_request_id == TW_IOCTL_CHRDEV_FREE) || (ioctl_reset)) {
+               clear_bit(TW_IN_RESET, &tw_dev->flags);
+       } else {
+               tw_dev->chrdev_request_id = TW_IOCTL_CHRDEV_FREE;
+               wake_up(&tw_dev->ioctl_wqueue);
+       }
 
        return 0;
 } /* End tw_reset_device_extension() */
@@ -1423,18 +1437,14 @@
                "WARNING: Command (0x%x) timed out, resetting card.\n",
                SCpnt->cmnd[0]);
 
-       /* Make sure we are not issuing an ioctl or resetting from ioctl */
-       mutex_lock(&tw_dev->ioctl_lock);
-
        /* Now reset the card and some of the device extension data */
-       if (tw_reset_device_extension(tw_dev)) {
+       if (tw_reset_device_extension(tw_dev, 0)) {
                printk(KERN_WARNING "3w-xxxx: scsi%d: Reset failed.\n", tw_dev->host->host_no);
                goto out;
        }
 
        retval = SUCCESS;
 out:
-       mutex_unlock(&tw_dev->ioctl_lock);
        return retval;
 } /* End tw_scsi_eh_reset() */
 
@@ -1650,9 +1660,9 @@
        request_buffer[4] = 0x8;        /* caching page */
        request_buffer[5] = 0xa;        /* page length */
        if (*flags & 0x1)
-               request_buffer[6] = 0x5;        /* WCE on, RCD on */
+               request_buffer[6] = 0x4;        /* WCE on */
        else
-               request_buffer[6] = 0x1;        /* WCE off, RCD on */
+               request_buffer[6] = 0x0;        /* WCE off */
        tw_transfer_internal(tw_dev, request_id, request_buffer,
                             sizeof(request_buffer));
 
@@ -2002,10 +2012,6 @@
        int retval = 1;
        TW_Device_Extension *tw_dev = (TW_Device_Extension *)SCpnt->device->host->hostdata;
 
-       /* If we are resetting due to timed out ioctl, report as busy */
-       if (test_bit(TW_IN_RESET, &tw_dev->flags))
-               return SCSI_MLQUEUE_HOST_BUSY;
-
        /* Save done function into Scsi_Cmnd struct */
        SCpnt->scsi_done = done;
                 
@@ -2072,7 +2078,8 @@
 } /* End tw_scsi_queue() */
 
 /* This function is the interrupt service routine */
-static irqreturn_t tw_interrupt(int irq, void *dev_instance) 
+static irqreturn_t tw_interrupt(int irq, void *dev_instance,
+                    struct pt_regs *regs) 
 {
        int request_id;
        u32 status_reg_value;
@@ -2094,10 +2101,6 @@
 
        handled = 1;
 
-       /* If we are resetting, bail */
-       if (test_bit(TW_IN_RESET, &tw_dev->flags))
-               goto tw_interrupt_bail;
-
        /* Check controller for errors */
        if (tw_check_bits(status_reg_value)) {
                dprintk(KERN_WARNING "3w-xxxx: tw_interrupt(): Unexpected bits.\n");
@@ -2274,9 +2277,6 @@
        /* Disable interrupts */
        TW_DISABLE_INTERRUPTS(tw_dev);
 
-       /* Free up the IRQ */
-       free_irq(tw_dev->tw_pci_dev->irq, tw_dev);
-
        printk(KERN_WARNING "3w-xxxx: Shutting down host %d.\n", tw_dev->host->host_no);
 
        /* Tell the card we are shutting down */
@@ -2445,6 +2445,9 @@
                twe_major = -1;
        }
 
+       /* Free up the IRQ */
+       free_irq(tw_dev->tw_pci_dev->irq, tw_dev);
+
        /* Shutdown the card */
        __tw_shutdown(tw_dev);
 
@@ -2483,7 +2486,7 @@
 {
        printk(KERN_WARNING "3ware Storage Controller device driver for Linux v%s.\n",
TW_DRIVER_VERSION);
 
-       return pci_register_driver(&tw_driver);
+       return pci_module_init(&tw_driver);
 } /* End tw_init() */
 
 /* This function is called on driver exit */


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux