From 2e48075daee1b452302706b6fa85d8f477cfccb2 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Tue, 25 Feb 2020 00:40:05 +0000 Subject: [PATCH] New ioctl DIOCGSECTORALIGN returns sector alignment parameters: struct disk_sectoralign { /* First aligned sector number. */ uint32_t dsa_firstaligned; /* Number of sectors per aligned unit. */ uint32_t dsa_alignment; }; - Teach wd(4) to get it from ATA. - Teach cgd(4) to pass it through from the underlying disk. - Teach dk(4) to pass it through with adjustments. - Teach zpool to take advantage of it. => XXX zpool doesn't seem to understand when the vdev's starting sector is misaligned. --- .../osnet/dist/uts/common/fs/zfs/vdev_disk.c | 18 ++++++- sys/dev/ata/wd.c | 47 ++++++++++++++++++- sys/dev/ata/wdvar.h | 1 + sys/dev/cgd.c | 27 +++++++++++ sys/dev/dkwedge/dk.c | 17 +++++++ sys/sys/disk.h | 6 +++ sys/sys/dkio.h | 3 ++ 7 files changed, 117 insertions(+), 2 deletions(-) diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c b/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c index 000174281f46..144b4abcf193 100644 --- a/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c +++ b/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c @@ -151,6 +151,7 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, struct disk *pdk; int error, cmd; struct partinfo pinfo; + struct disk_sectoralign dsa; /* * We must have a pathname, and it must be absolute. @@ -292,7 +293,22 @@ skip_open: *max_psize = *psize; *ashift = highbit(MAX(pinfo.pi_secsize, SPA_MINBLOCKSIZE)) - 1; - *pashift = *ashift; + + /* + * Try to determine whether the disk has a preferred physical + * sector size even if it can emulate a smaller logical sector + * size with r/m/w cycles, e.g. a disk with 4096-byte sectors + * that for compatibility claims to support 512-byte ones. + */ + if (VOP_IOCTL(vp, DIOCGSECTORALIGN, &dsa, FREAD, NOCRED) == 0) + *pashift = highbit(dsa.dsa_alignment * pinfo.pi_secsize) - 1; + else + *pashift = *ashift; + if (dsa.dsa_firstaligned % dsa.dsa_alignment) + printf("ZFS WARNING: vdev %s: sectors are misaligned" + " (alignment=%"PRIu32", firstaligned=%"PRIu32")\n", + vd->vdev_path, dsa.dsa_alignment, dsa.dsa_firstaligned); + vd->vdev_wholedisk = (pinfo.pi_offset == 0); /* XXXNETBSD */ /* diff --git a/sys/dev/ata/wd.c b/sys/dev/ata/wd.c index 039fd844c0d5..be12f307ac5f 100644 --- a/sys/dev/ata/wd.c +++ b/sys/dev/ata/wd.c @@ -430,16 +430,40 @@ wdattach(device_t parent, device_t self, void *aux) } else { wd->sc_blksize = 512; } + wd->sc_sectoralign.dsa_firstaligned = 0; + wd->sc_sectoralign.dsa_alignment = 1; + if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID + && ((wd->sc_params.atap_secsz & ATA_SECSZ_LPS) != 0)) { + wd->sc_sectoralign.dsa_alignment = 1 << + (wd->sc_params.atap_secsz & ATA_SECSZ_LPS_SZMSK); + if ((wd->sc_params.atap_logical_align & ATA_LA_VALID_MASK) == + ATA_LA_VALID) { + wd->sc_sectoralign.dsa_firstaligned = + (wd->sc_sectoralign.dsa_alignment - + (wd->sc_params.atap_logical_align & + ATA_LA_MASK)); + } + } wd->sc_capacity512 = (wd->sc_capacity * wd->sc_blksize) / DEV_BSIZE; format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * wd->sc_blksize); aprint_normal_dev(self, "%s, %d cyl, %d head, %d sec, " - "%d bytes/sect x %llu sectors\n", + "%d bytes/sect x %llu sectors", pbuf, (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity / (wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) : wd->sc_params.atap_cylinders, wd->sc_params.atap_heads, wd->sc_params.atap_sectors, wd->sc_blksize, (unsigned long long)wd->sc_capacity); + if (wd->sc_sectoralign.dsa_alignment != 1) { + aprint_normal(" (%d bytes/physsect", + wd->sc_sectoralign.dsa_alignment & wd->sc_blksize); + if (wd->sc_sectoralign.dsa_firstaligned != 0) { + aprint_normal("; first aligned sector: %jd", + (intmax_t)wd->sc_sectoralign.dsa_firstaligned); + } + aprint_normal(")"); + } + aprint_normal("\n"); ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n", device_xname(self), wd->sc_params.atap_dmatiming_mimi, @@ -1409,6 +1433,27 @@ wdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l) return(error1); } + case DIOCGSECTORALIGN: { + struct disk_sectoralign *dsa = addr; + int part = WDPART(dev); + + *dsa = wd->sc_sectoralign; + if (part != RAW_PART) { + struct disklabel *lp = dksc->sc_dkdev.dk_label; + daddr_t offset = lp->d_partitions[part].p_offset; + uint32_t r = offset % dsa->dsa_alignment; + + if (r < dsa->dsa_firstaligned) + dsa->dsa_firstaligned = dsa->dsa_firstaligned + - r; + else + dsa->dsa_firstaligned = (dsa->dsa_firstaligned + + dsa->dsa_alignment) - r; + } + + return 0; + } + default: return dk_ioctl(dksc, dev, cmd, addr, flag, l); } diff --git a/sys/dev/ata/wdvar.h b/sys/dev/ata/wdvar.h index 2f13ded15e38..461c08bdd023 100644 --- a/sys/dev/ata/wdvar.h +++ b/sys/dev/ata/wdvar.h @@ -59,6 +59,7 @@ struct wd_softc { uint64_t sc_capacity512; /* ... in DEV_BSIZE blocks */ uint32_t sc_capacity28; /* capacity accessible with LBA28 commands */ uint32_t sc_blksize; /* logical block size, in bytes */ + struct disk_sectoralign sc_sectoralign; /* sector alignment */ #ifdef WD_SOFTBADSECT SLIST_HEAD(, disk_badsectors) sc_bslist; diff --git a/sys/dev/cgd.c b/sys/dev/cgd.c index 4c7348c364d0..46512e0bbbf0 100644 --- a/sys/dev/cgd.c +++ b/sys/dev/cgd.c @@ -722,6 +722,33 @@ cgdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) * We pass this call down to the underlying disk. */ return VOP_IOCTL(cs->sc_tvn, cmd, data, flag, l->l_cred); + case DIOCGSECTORALIGN: { + struct disk_sectoralign *dsa = data; + int error; + + if (!DK_ATTACHED(dksc)) + return ENOENT; + + /* Get the underlying disk's sector alignment. */ + error = VOP_IOCTL(cs->sc_tvn, cmd, data, flag, l->l_cred); + if (error) + return error; + + /* Adjust for the disklabel partition if necessary. */ + if (part != RAW_PART) { + struct disklabel *lp = dksc->sc_dkdev.dk_label; + daddr_t offset = lp->d_partitions[part].p_offset; + uint32_t r = offset % dsa->dsa_alignment; + + if (r < dsa->dsa_firstaligned) + dsa->dsa_firstaligned = dsa->dsa_firstaligned + - r; + else + dsa->dsa_firstaligned = (dsa->dsa_firstaligned + + dsa->dsa_alignment) - r; + } + return 0; + } case DIOCGSTRATEGY: case DIOCSSTRATEGY: if (!DK_ATTACHED(dksc)) diff --git a/sys/dev/dkwedge/dk.c b/sys/dev/dkwedge/dk.c index 30fc333d0b48..9492cf0d88ca 100644 --- a/sys/dev/dkwedge/dk.c +++ b/sys/dev/dkwedge/dk.c @@ -1501,7 +1501,24 @@ dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) break; } + case DIOCGSECTORALIGN: + { + struct disk_sectoralign *dsa = data; + uint32_t r; + + error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag, + l != NULL ? l->l_cred : NOCRED); + if (error) + break; + r = sc->sc_offset % dsa->dsa_alignment; + if (r < dsa->dsa_firstaligned) + dsa->dsa_firstaligned = dsa->dsa_firstaligned - r; + else + dsa->dsa_firstaligned = (dsa->dsa_firstaligned + + dsa->dsa_alignment) - r; + break; + } default: error = ENOTTY; } diff --git a/sys/sys/disk.h b/sys/sys/disk.h index 100a44c3f880..ed25eb657017 100644 --- a/sys/sys/disk.h +++ b/sys/sys/disk.h @@ -300,6 +300,12 @@ struct disk_strategy { size_t dks_paramlen; /* notyet; should be 0 */ }; +/* Sector alignment */ +struct disk_sectoralign { + uint32_t dsa_firstaligned; /* first aligned sector # */ + uint32_t dsa_alignment; /* sectors per aligned sector */ +}; + #ifdef _KERNEL #include #include diff --git a/sys/sys/dkio.h b/sys/sys/dkio.h index 84046268d0a4..937df511d791 100644 --- a/sys/sys/dkio.h +++ b/sys/sys/dkio.h @@ -133,4 +133,7 @@ /* mass removal */ #define DIOCRMWEDGES _IOR('d', 134, int) /* remove all wedges */ + /* sector alignment */ +#define DIOCGSECTORALIGN _IOR('d', 135, struct disk_sectoralign) + #endif /* _SYS_DKIO_H_ */