01_scsi_timer_eh_timer_fix.patch
scmd->eh_timeout is used to resolve the race between command
completion and timeout. However, during error handling,
scsi_send_eh_cmnd uses scmd->eh_timeout. This creates a race
condition between eh and normal completion for a request which
has timed out and in the process of error handling. If the
request completes while scmd->eh_timeout is being used by eh,
eh timeout is lost and the command will be handled by both eh
and completion path. This patch fixes the race by making
scsi_send_eh_cmnd() use its own timer.
This patch adds shost->eh_timeout field. The name of the
field equals scmd->eh_timeout which is used for normal command
timeout. As this can be confusing, renaming scmd->eh_timeout
to something like scmd->cmd_timeout would be good.
Reworked such that timeout race window is kept at minimal
level as pointed out by James Bottomley.
Signed-off-by: Tejun Heo <[email protected]>
drivers/scsi/scsi_error.c | 26 ++++++++++++++++++++------
include/scsi/scsi_host.h | 1 +
2 files changed, 21 insertions(+), 6 deletions(-)
Index: scsi-reqfn-export/drivers/scsi/scsi_error.c
===================================================================
--- scsi-reqfn-export.orig/drivers/scsi/scsi_error.c 2005-04-19 23:28:33.000000000 +0900
+++ scsi-reqfn-export/drivers/scsi/scsi_error.c 2005-04-19 23:30:57.000000000 +0900
@@ -428,8 +428,10 @@ static int scsi_eh_completed_normally(st
* for some action to complete on the device. our only job is to
* record that it timed out, and to wake up the thread.
**/
-static void scsi_eh_times_out(struct scsi_cmnd *scmd)
+static void scsi_eh_times_out(unsigned long arg)
{
+ struct scsi_cmnd *scmd = (void *)arg;
+
scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
scmd));
@@ -448,9 +450,11 @@ static void scsi_eh_done(struct scsi_cmn
* if the timeout handler is already running, then just set the
* flag which says we finished late, and return. we have no
* way of stopping the timeout handler from running, so we must
- * always defer to it.
+ * always defer to it. note that by doing timer removal here
+ * the window in which normally completed commands are treated
+ * as timed out is kept at minimal level.
*/
- if (del_timer(&scmd->eh_timeout)) {
+ if (del_timer(scmd->device->host->eh_timeout)) {
scmd->request->rq_status = RQ_SCSI_DONE;
scmd->owner = SCSI_OWNER_ERROR_HANDLER;
@@ -478,6 +482,7 @@ static int scsi_send_eh_cmnd(struct scsi
{
struct scsi_device *sdev = scmd->device;
struct Scsi_Host *shost = sdev->host;
+ struct timer_list timer;
DECLARE_MUTEX_LOCKED(sem);
unsigned long flags;
int rtn = SUCCESS;
@@ -492,7 +497,15 @@ static int scsi_send_eh_cmnd(struct scsi
scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
(sdev->lun << 5 & 0xe0);
- scsi_add_timer(scmd, timeout, scsi_eh_times_out);
+ /*
+ * set up eh timer.
+ */
+ shost->eh_timeout = &timer;
+ init_timer(&timer);
+ timer.expires = jiffies + timeout;
+ timer.function = scsi_eh_times_out;
+ timer.data = (unsigned long)scmd;
+ add_timer(&timer);
/*
* set up the semaphore so we wait for the command to complete.
@@ -508,8 +521,6 @@ static int scsi_send_eh_cmnd(struct scsi
down(&sem);
scsi_log_completion(scmd, SUCCESS);
- shost->eh_action = NULL;
-
/*
* see if timeout. if so, tell the host to forget about it.
* in other words, we don't want a callback any more.
@@ -539,6 +550,9 @@ static int scsi_send_eh_cmnd(struct scsi
rtn = FAILED;
}
+ shost->eh_action = NULL;
+ shost->eh_timeout = NULL;
+
SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
__FUNCTION__, scmd, rtn));
Index: scsi-reqfn-export/include/scsi/scsi_host.h
===================================================================
--- scsi-reqfn-export.orig/include/scsi/scsi_host.h 2005-04-19 23:28:33.000000000 +0900
+++ scsi-reqfn-export/include/scsi/scsi_host.h 2005-04-19 23:30:57.000000000 +0900
@@ -442,6 +442,7 @@ struct Scsi_Host {
struct completion * eh_notify; /* wait for eh to begin or end */
struct semaphore * eh_action; /* Wait for specific actions on the
host. */
+ struct timer_list * eh_timeout; /* Used to timeout eh commands */
unsigned int eh_active:1; /* Indicates the eh thread is awake and active if
this is true. */
unsigned int eh_kill:1; /* set when killing the eh thread */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Index of Archives]
[Kernel Newbies]
[Netfilter]
[Bugtraq]
[Photo]
[Stuff]
[Gimp]
[Yosemite News]
[MIPS Linux]
[ARM Linux]
[Linux Security]
[Linux RAID]
[Video 4 Linux]
[Linux for the blind]
[Linux Resources]