Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 02 Feb 2015 22:47:58 +0200
From:      Alexander Motin <mav@FreeBSD.org>
To:        Nathan Whitehorn <nwhitehorn@freebsd.org>,  src-committers@freebsd.org, svn-src-all@freebsd.org,  svn-src-head@freebsd.org
Subject:   Re: svn commit: r278111 - in head/sys/cam: . scsi
Message-ID:  <54CFE27E.8050602@FreeBSD.org>
In-Reply-To: <54CFDDFF.7060708@freebsd.org>
References:  <201502022023.t12KN6ir069698@svn.freebsd.org> <54CFDDFF.7060708@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 02.02.2015 22:28, Nathan Whitehorn wrote:
> You may want to look at PR 195479 on this topic. Does this also turn off
> the infinite console spam?

This patch was supposed to only workaround issues I see when storage has
a problem. In that case hiding messages is a bad idea, because admin
should see that something goes wrong. So I didn't hide them.

I don't know why would VMware report BUSY status regularly for disks
under load, as mentioned in that PR. My patch should partially help in
that case by avoiding errors reported to applications. But still each
error like that freezes the device operation for one second before
retry, so real cause still must be fixed or performance will heavily suffer.

> On 02/02/15 12:23, Alexander Motin wrote:
>> Author: mav
>> Date: Mon Feb  2 20:23:05 2015
>> New Revision: 278111
>> URL: https://svnweb.freebsd.org/changeset/base/278111
>>
>> Log:
>>    Retry indefinitely on SCSI BUSY status from VMware disks and CDs.
>>
>>    VMware returns BUSY status when storage has transient connectivity
>> issues.
>>    It is often better to wait and let VM admin fix the problem then
>> crash.
>>
>>    Discussed with:    ken
>>    MFC after:    1 week
>>
>> Modified:
>>    head/sys/cam/cam.h
>>    head/sys/cam/cam_periph.c
>>    head/sys/cam/scsi/scsi_cd.c
>>    head/sys/cam/scsi/scsi_da.c
>>
>> Modified: head/sys/cam/cam.h
>> ==============================================================================
>>
>> --- head/sys/cam/cam.h    Mon Feb  2 20:18:47 2015    (r278110)
>> +++ head/sys/cam/cam.h    Mon Feb  2 20:23:05 2015    (r278111)
>> @@ -121,7 +121,8 @@ enum {
>>       SF_QUIET_IR        = 0x04,    /* Be quiet about Illegal Request
>> reponses */
>>       SF_PRINT_ALWAYS        = 0x08,    /* Always print error status. */
>>       SF_NO_RECOVERY        = 0x10,    /* Don't do active error
>> recovery. */
>> -    SF_NO_RETRY        = 0x20    /* Don't do any retries. */
>> +    SF_NO_RETRY        = 0x20,    /* Don't do any retries. */
>> +    SF_RETRY_BUSY        = 0x40    /* Retry BUSY status. */
>>   };
>>
>>   /* CAM  Status field values */
>>
>> Modified: head/sys/cam/cam_periph.c
>> ==============================================================================
>>
>> --- head/sys/cam/cam_periph.c    Mon Feb  2 20:18:47 2015    (r278110)
>> +++ head/sys/cam/cam_periph.c    Mon Feb  2 20:23:05 2015    (r278111)
>> @@ -1359,8 +1359,8 @@ camperiphscsistatuserror(union ccb *ccb,
>>            * Restart the queue after either another
>>            * command completes or a 1 second timeout.
>>            */
>> -         if (ccb->ccb_h.retry_count > 0) {
>> -             ccb->ccb_h.retry_count--;
>> +        if ((sense_flags & SF_RETRY_BUSY) != 0 ||
>> +            (ccb->ccb_h.retry_count--) > 0) {
>>               error = ERESTART;
>>               *relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT
>>                         | RELSIM_RELEASE_AFTER_CMDCMPLT;
>>
>> Modified: head/sys/cam/scsi/scsi_cd.c
>> ==============================================================================
>>
>> --- head/sys/cam/scsi/scsi_cd.c    Mon Feb  2 20:18:47 2015    (r278110)
>> +++ head/sys/cam/scsi/scsi_cd.c    Mon Feb  2 20:23:05 2015    (r278111)
>> @@ -87,14 +87,16 @@ typedef enum {
>>       CD_Q_NONE        = 0x00,
>>       CD_Q_NO_TOUCH        = 0x01,
>>       CD_Q_BCD_TRACKS        = 0x02,
>> -    CD_Q_10_BYTE_ONLY    = 0x10
>> +    CD_Q_10_BYTE_ONLY    = 0x10,
>> +    CD_Q_RETRY_BUSY        = 0x40
>>   } cd_quirks;
>>
>>   #define CD_Q_BIT_STRING        \
>>       "\020"            \
>>       "\001NO_TOUCH"        \
>>       "\002BCD_TRACKS"    \
>> -    "\00510_BYTE_ONLY"
>> +    "\00510_BYTE_ONLY"    \
>> +    "\007RETRY_BUSY"
>>
>>   typedef enum {
>>       CD_FLAG_INVALID        = 0x0001,
>> @@ -189,6 +191,14 @@ static struct cd_quirk_entry cd_quirk_ta
>>       {
>>           { T_CDROM, SIP_MEDIA_REMOVABLE, "CHINON", "CD-ROM
>> CDS-535","*"},
>>           /* quirks */ CD_Q_BCD_TRACKS
>> +    },
>> +    {
>> +        /*
>> +         * VMware returns BUSY status when storage has transient
>> +         * connectivity problems, so better wait.
>> +         */
>> +        {T_CDROM, SIP_MEDIA_REMOVABLE, "NECVMWar", "VMware IDE
>> CDR10", "*"},
>> +        /*quirks*/ CD_Q_RETRY_BUSY
>>       }
>>   };
>>
>> @@ -2581,6 +2591,9 @@ cderror(union ccb *ccb, u_int32_t cam_fl
>>        * don't treat UAs as errors.
>>        */
>>       sense_flags |= SF_RETRY_UA;
>> +
>> +    if (softc->quirks & CD_Q_RETRY_BUSY)
>> +        sense_flags |= SF_RETRY_BUSY;
>>       return (cam_periph_error(ccb, cam_flags, sense_flags,
>>                    &softc->saved_ccb));
>>   }
>>
>> Modified: head/sys/cam/scsi/scsi_da.c
>> ==============================================================================
>>
>> --- head/sys/cam/scsi/scsi_da.c    Mon Feb  2 20:18:47 2015    (r278110)
>> +++ head/sys/cam/scsi/scsi_da.c    Mon Feb  2 20:23:05 2015    (r278111)
>> @@ -101,7 +101,8 @@ typedef enum {
>>       DA_Q_NO_PREVENT        = 0x04,
>>       DA_Q_4K            = 0x08,
>>       DA_Q_NO_RC16        = 0x10,
>> -    DA_Q_NO_UNMAP        = 0x20
>> +    DA_Q_NO_UNMAP        = 0x20,
>> +    DA_Q_RETRY_BUSY        = 0x40
>>   } da_quirks;
>>
>>   #define DA_Q_BIT_STRING        \
>> @@ -110,7 +111,9 @@ typedef enum {
>>       "\002NO_6_BYTE"        \
>>       "\003NO_PREVENT"    \
>>       "\0044K"        \
>> -    "\005NO_RC16"
>> +    "\005NO_RC16"        \
>> +    "\006NO_UNMAP"        \
>> +    "\007RETRY_BUSY"
>>
>>   typedef enum {
>>       DA_CCB_PROBE_RC        = 0x01,
>> @@ -359,6 +362,14 @@ static struct da_quirk_entry da_quirk_ta
>>           {T_DIRECT, SIP_MEDIA_FIXED, "STEC", "*", "*"},
>>           /*quirks*/ DA_Q_NO_UNMAP
>>       },
>> +    {
>> +        /*
>> +         * VMware returns BUSY status when storage has transient
>> +         * connectivity problems, so better wait.
>> +         */
>> +        {T_DIRECT, SIP_MEDIA_FIXED, "VMware", "Virtual disk", "*"},
>> +        /*quirks*/ DA_Q_RETRY_BUSY
>> +    },
>>       /* USB mass storage devices supported by umass(4) */
>>       {
>>           /*
>> @@ -3630,6 +3641,9 @@ daerror(union ccb *ccb, u_int32_t cam_fl
>>        * don't treat UAs as errors.
>>        */
>>       sense_flags |= SF_RETRY_UA;
>> +
>> +    if (softc->quirks & DA_Q_RETRY_BUSY)
>> +        sense_flags |= SF_RETRY_BUSY;
>>       return(cam_periph_error(ccb, cam_flags, sense_flags,
>>                   &softc->saved_ccb));
>>   }
>>
> 


-- 
Alexander Motin



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?54CFE27E.8050602>