Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 04 Aug 2004 15:33:34 +0200
From:      =?ISO-8859-1?Q?S=F8ren_Schmidt?= <sos@DeepCore.dk>
To:        Daniel Eriksson <daniel_k_eriksson@telia.com>
Cc:        'Ville-Pertti Keinonen' <will+freebsd-current@will.iki.fi>
Subject:   Re: ATA driver races with interrupts
Message-ID:  <4110E5AE.7050602@DeepCore.dk>
In-Reply-To: <!~!UENERkVCMDkAAQACAAAAAAAAAAAAAAAAABgAAAAAAAAA0VcX9IoJqUaXPS8MjT1PdsKAAAAQAAAAN7kcl8le1EWCe5i4TwG85QEAAAAA@telia.com>
References:  <!~!UENERkVCMDkAAQACAAAAAAAAAAAAAAAAABgAAAAAAAAA0VcX9IoJqUaXPS8MjT1PdsKAAAAQAAAAN7kcl8le1EWCe5i4TwG85QEAAAAA@telia.com>

Next in thread | Previous in thread | Raw E-Mail | Index | Archive | Help
This is a multi-part message in MIME format.
--------------010702090708090107090308
Content-Type: text/plain; charset=iso-8859-1; format=flowed
Content-Transfer-Encoding: 8bit

Daniel Eriksson wrote:
> Ville-Pertti Keinonen wrote:
> 
> 
>>The attached patch should enable serialization for the 
>>controller, which is the only completely reliable fix
>>(without chipset documentation) according to Søren.
>>Obviously it reduces performance since it doesn't 
>>allow both channels to operate simultaneously.
> 
> 
> After applying the patch to sources cvsuped 2004.08.04.01.00.00, everything
> seems to be working correctly. I've run a few stress-tests successfully, and
> I also started smartd (/usr/ports/sysutils/smartmontools/) which previously
> always managed to lock up one of the SATA channels.
> 
> I am going to run some more stress-tests later today, but it looks pretty
> promising.

OK, but that seriously hurts performance which I'd like to avoid if 
possible. Could you try the attached patch and let me know how that 
works out ? I does solve the problem here in a simulated setup, but as 
we all know simulated is just that :)


-- 
-Søren


--------------010702090708090107090308
Content-Type: text/plain;
 name="race-fix"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="race-fix"

Index: ata-all.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-all.c,v
retrieving revision 1.217
diff -u -r1.217 ata-all.c
--- ata-all.c	1 Aug 2004 12:31:38 -0000	1.217
+++ ata-all.c	3 Aug 2004 10:28:47 -0000
@@ -231,7 +231,6 @@
 int
 ata_reinit(struct ata_channel *ch)
 {
-    struct ata_request *request = ch->running;
     int devices, misdev, newdev;
 
     if (!ch->r_irq)
@@ -242,10 +241,8 @@
 	ata_printf(ch, -1, "reiniting channel ..\n");
     ATA_FORCELOCK_CH(ch);
     ch->flags |= ATA_IMMEDIATE_MODE;
-    ch->running = NULL;
     devices = ch->devices;
     ch->hw.reset(ch);
-    ATA_UNLOCK_CH(ch);
 
     if (bootverbose)
 	ata_printf(ch, -1, "resetting done ..\n");
@@ -254,10 +251,6 @@
     if ((misdev = devices & ~ch->devices)) {
 	if ((misdev & (ATA_ATA_MASTER | ATA_ATAPI_MASTER)) &&
 	    ch->device[MASTER].detach) {
-	    if (request && (request->device == &ch->device[MASTER])) {
-		request->result = ENXIO;
-		request->retries = 0;
-	    }
 	    ch->device[MASTER].detach(&ch->device[MASTER]);
 	    ata_fail_requests(ch, &ch->device[MASTER]);
 	    free(ch->device[MASTER].param, M_ATA);
@@ -265,10 +258,6 @@
 	}
 	if ((misdev & (ATA_ATA_SLAVE | ATA_ATAPI_SLAVE)) &&
 	    ch->device[SLAVE].detach) {
-	    if (request && (request->device == &ch->device[SLAVE])) {
-		request->result = ENXIO;
-		request->retries = 0;
-	    }
 	    ch->device[SLAVE].detach(&ch->device[SLAVE]);
 	    ata_fail_requests(ch, &ch->device[SLAVE]);
 	    free(ch->device[SLAVE].param, M_ATA);
@@ -276,6 +265,9 @@
 	}
     }
 
+    ch->running = NULL;
+    ATA_UNLOCK_CH(ch);
+
     /* identify what is present on the channel now */
     ata_identify_devices(ch);
 
Index: ata-all.h
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-all.h,v
retrieving revision 1.79
diff -u -r1.79 ata-all.h
--- ata-all.h	30 Apr 2004 16:21:34 -0000	1.79
+++ ata-all.h	3 Aug 2004 18:27:52 -0000
@@ -297,8 +297,9 @@
     u_int32_t			max_iosize;	/* DMA engine max IO size */
     u_int32_t			cur_iosize;	/* DMA engine current IO size */
     int				flags;
-#define ATA_DMA_ACTIVE			0x01	/* DMA transfer in progress */
-#define ATA_DMA_READ			0x02	/* transaction is a read */
+#define ATA_DMA_READ			0x01	/* transaction is a read */
+#define ATA_DMA_LOADED			0x02	/* DMA tables etc loaded */
+#define ATA_DMA_ACTIVE			0x04	/* DMA transfer in progress */
 
     void (*alloc)(struct ata_channel *ch);
     void (*free)(struct ata_channel *ch);
Index: ata-chipset.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-chipset.c,v
retrieving revision 1.77
diff -u -r1.77 ata-chipset.c
--- ata-chipset.c	30 Jul 2004 13:33:09 -0000	1.77
+++ ata-chipset.c	4 Aug 2004 15:12:22 -0000
@@ -1433,12 +1433,14 @@
 static int
 ata_promise_mio_dmastart(struct ata_channel *ch)
 {
+    ch->flags |= ATA_DMA_ACTIVE;
     return 0;
 }
 
 static int
 ata_promise_mio_dmastop(struct ata_channel *ch)
 {
+    ch->flags &= ~ATA_DMA_ACTIVE;
     /* get status XXX SOS */
     return 0;
 }
@@ -1777,6 +1779,7 @@
     ATA_IDX_OUTB(ch, ATA_BMCMD_PORT,
 		 ((ch->dma->flags & ATA_DMA_READ) ? ATA_BMCMD_WRITE_READ : 0) |
 		 ATA_BMCMD_START_STOP);
+    ch->flags |= ATA_DMA_ACTIVE;
     return 0;
 }
 
@@ -1795,6 +1798,7 @@
     error = ATA_IDX_INB(ch, ATA_BMSTAT_PORT);
     ATA_IDX_OUTB(ch, ATA_BMCMD_PORT,
 		 ATA_IDX_INB(ch, ATA_BMCMD_PORT) & ~ATA_BMCMD_START_STOP);
+    ch->flags &= ~ATA_DMA_ACTIVE;
     ATA_IDX_OUTB(ch, ATA_BMSTAT_PORT, ATA_BMSTAT_INTERRUPT | ATA_BMSTAT_ERROR); 
     return error;
 }
Index: ata-dma.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-dma.c,v
retrieving revision 1.126
diff -u -r1.126 ata-dma.c
--- ata-dma.c	13 Apr 2004 09:44:20 -0000	1.126
+++ ata-dma.c	3 Aug 2004 18:25:11 -0000
@@ -265,8 +265,7 @@
 		    dir ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
 
     ch->dma->cur_iosize = count;
-    ch->dma->flags = dir ? (ATA_DMA_ACTIVE | ATA_DMA_READ) : ATA_DMA_ACTIVE;
-
+    ch->dma->flags = dir ? (ATA_DMA_LOADED | ATA_DMA_READ) : ATA_DMA_LOADED;
     return 0;
 }
 
@@ -281,7 +280,6 @@
     bus_dmamap_unload(ch->dma->ddmatag, ch->dma->ddmamap);
 
     ch->dma->cur_iosize = 0;
-    ch->dma->flags = 0;
-
+    ch->dma->flags &= ~ATA_DMA_LOADED;
     return 0;
 }
Index: ata-lowlevel.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-lowlevel.c,v
retrieving revision 1.40
diff -u -r1.40 ata-lowlevel.c
--- ata-lowlevel.c	24 Jul 2004 19:03:28 -0000	1.40
+++ ata-lowlevel.c	3 Aug 2004 18:29:23 -0000
@@ -80,9 +80,6 @@
 	return ATA_OP_FINISHED;
     }
 
-    /* record the request as running */
-    ch->running = request;
-
     ATA_DEBUG_RQ(request, "transaction");
 
     /* disable ATAPI DMA writes if HW doesn't support it */
@@ -120,10 +117,8 @@
 		else
 		    printf("ATAPI_RESET timeout\n");
 
-		if (request->status & ATA_S_ERROR) {
+		if (request->status & ATA_S_ERROR)
 		    request->error = ATA_IDX_INB(ch, ATA_ERROR);
-		    //request->result = EIO;
-		}
 		break;
 	    }
 
@@ -139,7 +134,8 @@
 	    }
 	}
 	
-	/* return and wait for interrupt */
+	/* record the request as running and return for interrupt */
+	ch->running = request;
 	return ATA_OP_CONTINUES;
 
     /* ATA DMA data transfer commands */
@@ -168,7 +164,8 @@
 	    break;
 	}
 
-	/* return and wait for interrupt */
+	/* record the request as running and return for interrupt */
+	ch->running = request;
 	return ATA_OP_CONTINUES;
 
     /* ATAPI PIO commands */
@@ -192,8 +189,10 @@
 	}
 
 	/* command interrupt device ? just return and wait for interrupt */
-	if ((request->device->param->config & ATA_DRQ_MASK) == ATA_DRQ_INTR)
+	if ((request->device->param->config & ATA_DRQ_MASK) == ATA_DRQ_INTR) {
+	    ch->running = request;
 	    return ATA_OP_CONTINUES;
+	}
 
 	/* wait for ready to write ATAPI command block */
 	{
@@ -224,7 +223,8 @@
 			   (request->device->param->config & ATA_PROTO_MASK) ==
 			   ATA_PROTO_ATAPI_12 ? 6 : 8);
 
-	/* return and wait for interrupt */
+	/* record the request as running and return for interrupt */
+	ch->running = request;
 	return ATA_OP_CONTINUES;
 
     case ATA_R_ATAPI|ATA_R_DMA:
@@ -289,14 +289,14 @@
 	    break;
 	}
 
-	/* return and wait for interrupt */
+	/* record the request as running and return for interrupt */
+	ch->running = request;
 	return ATA_OP_CONTINUES;
     }
 
     /* request finish here */
-    if (ch->dma->flags & ATA_DMA_ACTIVE)
+    if (ch->dma->flags & ATA_DMA_LOADED)
 	ch->dma->unload(ch);
-    ch->running = NULL;
     return ATA_OP_FINISHED;
 }
 
Index: ata-pci.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-pci.c,v
retrieving revision 1.85
diff -u -r1.85 ata-pci.c
--- ata-pci.c	15 Jun 2004 11:02:09 -0000	1.85
+++ ata-pci.c	3 Aug 2004 18:21:50 -0000
@@ -454,6 +454,7 @@
 		 (ATA_IDX_INB(ch, ATA_BMCMD_PORT) & ~ATA_BMCMD_WRITE_READ) |
 		 ((ch->dma->flags & ATA_DMA_READ) ? ATA_BMCMD_WRITE_READ : 0) |
 		 ATA_BMCMD_START_STOP);
+    ch->dma->flags |= ATA_DMA_ACTIVE;
     return 0;
 }
 
@@ -465,6 +466,7 @@
     error = ATA_IDX_INB(ch, ATA_BMSTAT_PORT) & ATA_BMSTAT_MASK;
     ATA_IDX_OUTB(ch, ATA_BMCMD_PORT, 
 		 ATA_IDX_INB(ch, ATA_BMCMD_PORT) & ~ATA_BMCMD_START_STOP);
+    ch->dma->flags &= ~ATA_DMA_ACTIVE;
     ATA_IDX_OUTB(ch, ATA_BMSTAT_PORT, ATA_BMSTAT_INTERRUPT | ATA_BMSTAT_ERROR);
     return error;
 }
Index: ata-queue.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/ata/ata-queue.c,v
retrieving revision 1.29
diff -u -r1.29 ata-queue.c
--- ata-queue.c	1 Jun 2004 12:26:08 -0000	1.29
+++ ata-queue.c	1 Aug 2004 15:56:07 -0000
@@ -160,15 +160,8 @@
     if (ch->flags & ATA_IMMEDIATE_MODE)
 	return;
 
-    /* lock the ATA HW for this request */
-    mtx_lock(&ch->queue_mtx);
-    ch->locking(ch, ATA_LF_LOCK);
-    if (!ATA_LOCK_CH(ch)) {
-	mtx_unlock(&ch->queue_mtx);
-	return;
-    }
-
     /* if we dont have any work, ask the subdriver(s) */
+    mtx_lock(&ch->queue_mtx);
     if (TAILQ_EMPTY(&ch->ata_queue)) {
 	mtx_unlock(&ch->queue_mtx);
 	if (ch->device[MASTER].start)
@@ -177,7 +170,15 @@
 	    ch->device[SLAVE].start(&ch->device[SLAVE]);
 	mtx_lock(&ch->queue_mtx);
     }
+
+    /* if we have work todo, try to lock the ATA HW and start transaction */
     if ((request = TAILQ_FIRST(&ch->ata_queue))) {
+	ch->locking(ch, ATA_LF_LOCK);
+	if (!ATA_LOCK_CH(ch)) {
+	    mtx_unlock(&ch->queue_mtx);
+	    return;
+	}
+
 	TAILQ_REMOVE(&ch->ata_queue, request, chain);
 	mtx_unlock(&ch->queue_mtx);
 
@@ -192,15 +193,14 @@
 	/* kick HW into action and wait for interrupt if it flies*/
 	if (ch->hw.transaction(request) == ATA_OP_CONTINUES)
 	    return;
-    }
 
-    /* unlock ATA channel HW */
-    ATA_UNLOCK_CH(ch);
-    ch->locking(ch, ATA_LF_UNLOCK);
+	/* unlock ATA channel HW */
+	ATA_UNLOCK_CH(ch);
+	ch->locking(ch, ATA_LF_UNLOCK);
 
-    /* if we have a request here it failed and should be completed */
-    if (request)
+	/* finish up this (failed) request */
 	ata_finish(request);
+    }
     else
 	mtx_unlock(&ch->queue_mtx);
 }

--------------010702090708090107090308--



Want to link to this message? Use this URL: <http://docs.FreeBSD.org/cgi/mid.cgi?4110E5AE.7050602>