Re: [Fwd: FIO suggestion]

From: Zhang, Yanmin <yanmin_zhang_at_linux.intel.com>
Date: Wed, 13 Feb 2008 17:15:24 +0800

On Mon, 2008-02-04 at 10:59 +0100, Jens Axboe wrote:
> On Mon, Feb 04 2008, Zhang, Yanmin wrote:
> > On Mon, 2008-02-04 at 09:24 +0100, Jens Axboe wrote:
> > > On Mon, Feb 04 2008, Zhang, Yanmin wrote:
> > > > iodepth will be set to 1 if the io enginee is SYNC, as well as
> > > > iodepth_batch.
> > > >
> > > > Is it possible to use different values although it's SYNC? I mean, the
> > > > core could call writev/readv, so iodepth means the vector number in
> > > > writev/readv.
> > >
> > > OK, so for sequential IO, piece them together in an iovec and use readv
> > > or writev when we see a non-sequential bit?
> > Yes.
> >
> > > It's definitely possible,
> > > though I don't know if I'd call that queuing.
> > >
> > > I'll give it a go!
> > Thanks a lot!
>
> Something like this. It adds a new io engine called vsync that uses
> readv and writev to transfer the data, coalescing into the iovecs
> when possible.
>
> You need fio-git to apply it.
It does work. Thanks a lot!

-yanmin

>
> diff --git a/HOWTO b/HOWTO
> index a20f714..3d6b354 100644
> --- a/HOWTO
> +++ b/HOWTO
> @@ -344,6 +344,8 @@ ioengine=str Defines how the job issues io to the file. The following
>
> psync Basic pread(2) or pwrite(2) io.
>
> + vsync Basic readv(2) or writev(3) IO.
> +
> libaio Linux native asynchronous io.
>
> posixaio glibc posix asynchronous io.
> diff --git a/README b/README
> index 4c58d37..940839c 100644
> --- a/README
> +++ b/README
> @@ -136,14 +136,16 @@ The job file parameters are:
> size=x Set file size to x bytes (x string can include k/m/g)
> ioengine=x 'x' may be: aio/libaio/linuxaio for Linux aio,
> posixaio for POSIX aio, sync for regular read/write io,
> - psync for regular pread/pwrite io, mmap for mmap'ed io,
> - syslet-rw for syslet driven read/write, splice for using
> - splice/vmsplice, sgio for direct SG_IO io, net for
> - network io, or cpuio for a cycler burner load. sgio only
> - works on Linux on SCSI (or SCSI-like devices, such as
> - usb-storage or sata/libata driven) devices. Fio also has
> - a null io engine, which is mainly used for testing fio
> - itself.
> + psync for regular pread/pwrite io, vsync for regular
> + readv/writev (with queuing emulation) mmap for mmap'ed
> + io, syslet-rw for syslet driven read/write, splice for
> + using splice/vmsplice, sgio for direct SG_IO io, net
> + for network io, or cpuio for a cycler burner load. sgio
> + only works on Linux on SCSI (or SCSI-like devices, such
> + as usb-storage or sata/libata driven) devices. Fio also
> + has a null io engine, which is mainly used for testing
> + fio itself.
> +
> iodepth=x For async io, allow 'x' ios in flight
> overwrite=x If 'x', layout a write file first.
> nrfiles=x Spread io load over 'x' number of files per job,
> diff --git a/engines/sync.c b/engines/sync.c
> index 5e49429..505381d 100644
> --- a/engines/sync.c
> +++ b/engines/sync.c
> @@ -13,6 +13,17 @@
>
> #include "../fio.h"
>
> +struct syncio_data {
> + struct iovec *iovecs;
> + struct io_u **io_us;
> + unsigned int queued;
> + unsigned long queued_bytes;
> +
> + unsigned long long last_offset;
> + struct fio_file *last_file;
> + enum fio_ddir last_ddir;
> +};
> +
> static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
> {
> struct fio_file *f = io_u->file;
> @@ -81,6 +92,183 @@ static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
> return fio_io_end(td, io_u, ret);
> }
>
> +static int fio_vsyncio_getevents(struct thread_data *td, unsigned int min,
> + unsigned int max,
> + struct timespec fio_unused *t)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> + int ret;
> +
> + if (min) {
> + ret = sd->queued;
> + sd->queued = 0;
> + } else
> + ret = 0;
> +
> + dprint(FD_IO, "vsyncio_getevents: min=%d,max=%d: %d\n", min, max, ret);
> + return ret;
> +}
> +
> +static struct io_u *fio_vsyncio_event(struct thread_data *td, int event)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> +
> + return sd->io_us[event];
> +}
> +
> +static int fio_vsyncio_append(struct thread_data *td, struct io_u *io_u)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> +
> + if (io_u->ddir == DDIR_SYNC)
> + return 0;
> +
> + if (io_u->offset == sd->last_offset && io_u->file == sd->last_file &&
> + io_u->ddir == sd->last_ddir)
> + return 1;
> +
> + return 0;
> +}
> +
> +static void fio_vsyncio_set_iov(struct syncio_data *sd, struct io_u *io_u,
> + int index)
> +{
> + sd->io_us[index] = io_u;
> + sd->iovecs[index].iov_base = io_u->xfer_buf;
> + sd->iovecs[index].iov_len = io_u->xfer_buflen;
> + sd->last_offset = io_u->offset + io_u->xfer_buflen;
> + sd->last_file = io_u->file;
> + sd->last_ddir = io_u->ddir;
> + sd->queued_bytes += io_u->xfer_buflen;
> + sd->queued++;
> +}
> +
> +static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> +
> + fio_ro_check(td, io_u);
> +
> + if (!fio_vsyncio_append(td, io_u)) {
> + dprint(FD_IO, "vsyncio_queue: no append (%d)\n", sd->queued);
> + /*
> + * If we can't append and have stuff queued, tell fio to
> + * commit those first and then retry this io
> + */
> + if (sd->queued)
> + return FIO_Q_BUSY;
> +
> + sd->queued = 0;
> + sd->queued_bytes = 0;
> + fio_vsyncio_set_iov(sd, io_u, 0);
> + } else {
> + if (sd->queued == td->o.iodepth) {
> + dprint(FD_IO, "vsyncio_queue: max depth %d\n", sd->queued);
> + return FIO_Q_BUSY;
> + }
> +
> + dprint(FD_IO, "vsyncio_queue: append\n");
> + fio_vsyncio_set_iov(sd, io_u, sd->queued);
> + }
> +
> + dprint(FD_IO, "vsyncio_queue: depth now %d\n", sd->queued);
> + return FIO_Q_QUEUED;
> +}
> +
> +/*
> + * Check that we transferred all bytes, or saw an error, etc
> + */
> +static int fio_vsyncio_end(struct thread_data *td, ssize_t bytes)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> + struct io_u *io_u;
> + unsigned int i;
> + int err;
> +
> + /*
> + * transferred everything, perfect
> + */
> + if (bytes == sd->queued_bytes)
> + return 0;
> +
> + err = errno;
> + for (i = 0; i < sd->queued; i++) {
> + io_u = sd->io_us[i];
> +
> + if (bytes == -1) {
> + io_u->error = err;
> + } else {
> + unsigned int this_io;
> +
> + this_io = bytes;
> + if (this_io > io_u->xfer_buflen)
> + this_io = io_u->xfer_buflen;
> +
> + io_u->resid = io_u->xfer_buflen - this_io;
> + io_u->error = 0;
> + bytes -= this_io;
> + }
> + }
> +
> + if (bytes == -1) {
> + td_verror(td, err, "xfer vsync");
> + return -err;
> + }
> +
> + return 0;
> +}
> +
> +static int fio_vsyncio_commit(struct thread_data *td)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> + struct fio_file *f;
> + ssize_t ret;
> +
> + if (!sd->queued)
> + return 0;
> +
> + f = sd->last_file;
> +
> + if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) {
> + int err = -errno;
> +
> + td_verror(td, errno, "lseek");
> + return err;
> + }
> +
> + if (sd->last_ddir == DDIR_READ)
> + ret = readv(f->fd, sd->iovecs, sd->queued);
> + else
> + ret = writev(f->fd, sd->iovecs, sd->queued);
> +
> + dprint(FD_IO, "vsyncio_commit: %d\n", (int) ret);
> + return fio_vsyncio_end(td, ret);
> +}
> +
> +static int fio_vsyncio_init(struct thread_data *td)
> +{
> + struct syncio_data *sd;
> +
> + sd = malloc(sizeof(*sd));
> + memset(sd, 0, sizeof(*sd));
> + sd->last_offset = -1ULL;
> + sd->iovecs = malloc(td->o.iodepth * sizeof(struct iovec));
> + sd->io_us = malloc(td->o.iodepth * sizeof(struct io_u *));
> +
> + td->io_ops->data = sd;
> + return 0;
> +}
> +
> +static void fio_vsyncio_cleanup(struct thread_data *td)
> +{
> + struct syncio_data *sd = td->io_ops->data;
> +
> + free(sd->iovecs);
> + free(sd->io_us);
> + free(sd);
> + td->io_ops->data = NULL;
> +}
> +
> static struct ioengine_ops ioengine_rw = {
> .name = "sync",
> .version = FIO_IOOPS_VERSION,
> @@ -100,14 +288,30 @@ static struct ioengine_ops ioengine_prw = {
> .flags = FIO_SYNCIO,
> };
>
> +static struct ioengine_ops ioengine_vrw = {
> + .name = "vsync",
> + .version = FIO_IOOPS_VERSION,
> + .init = fio_vsyncio_init,
> + .cleanup = fio_vsyncio_cleanup,
> + .queue = fio_vsyncio_queue,
> + .commit = fio_vsyncio_commit,
> + .event = fio_vsyncio_event,
> + .getevents = fio_vsyncio_getevents,
> + .open_file = generic_open_file,
> + .close_file = generic_close_file,
> + .flags = FIO_SYNCIO,
> +};
> +
> static void fio_init fio_syncio_register(void)
> {
> register_ioengine(&ioengine_rw);
> register_ioengine(&ioengine_prw);
> + register_ioengine(&ioengine_vrw);
> }
>
> static void fio_exit fio_syncio_unregister(void)
> {
> unregister_ioengine(&ioengine_rw);
> unregister_ioengine(&ioengine_prw);
> + unregister_ioengine(&ioengine_vrw);
> }
> diff --git a/options.c b/options.c
> index 3de2ae6..1e99810 100644
> --- a/options.c
> +++ b/options.c
> @@ -440,6 +440,9 @@ static struct fio_option options[] = {
> { .ival = "psync",
> .help = "Use pread/pwrite",
> },
> + { .ival = "vsync",
> + .help = "Use readv/writev",
> + },
> #ifdef FIO_HAVE_LIBAIO
> { .ival = "libaio",
> .help = "Linux native asynchronous IO",
>
Received on Wed Feb 13 2008 - 10:15:24 CET

This archive was generated by hypermail 2.2.0 : Wed Feb 13 2008 - 10:30:01 CET