Re: [Fwd: FIO suggestion]

From: Jens Axboe <jens.axboe_at_oracle.com>
Date: Mon, 4 Feb 2008 10:59:27 +0100

On Mon, Feb 04 2008, Zhang, Yanmin wrote:
> On Mon, 2008-02-04 at 09:24 +0100, Jens Axboe wrote:
> > On Mon, Feb 04 2008, Zhang, Yanmin wrote:
> > > iodepth will be set to 1 if the io enginee is SYNC, as well as
> > > iodepth_batch.
> > >
> > > Is it possible to use different values although it's SYNC? I mean, the
> > > core could call writev/readv, so iodepth means the vector number in
> > > writev/readv.
> >
> > OK, so for sequential IO, piece them together in an iovec and use readv
> > or writev when we see a non-sequential bit?
> Yes.
>
> > It's definitely possible,
> > though I don't know if I'd call that queuing.
> >
> > I'll give it a go!
> Thanks a lot!

Something like this. It adds a new io engine called vsync that uses
readv and writev to transfer the data, coalescing into the iovecs
when possible.

You need fio-git to apply it.

diff --git a/HOWTO b/HOWTO
index a20f714..3d6b354 100644
--- a/HOWTO
+++ b/HOWTO
@@ -344,6 +344,8 @@ ioengine=str Defines how the job issues io to the file. The following
 
                         psync Basic pread(2) or pwrite(2) io.
 
+ vsync Basic readv(2) or writev(3) IO.
+
                         libaio Linux native asynchronous io.
 
                         posixaio glibc posix asynchronous io.
diff --git a/README b/README
index 4c58d37..940839c 100644
--- a/README
+++ b/README
@@ -136,14 +136,16 @@ The job file parameters are:
         size=x Set file size to x bytes (x string can include k/m/g)
         ioengine=x 'x' may be: aio/libaio/linuxaio for Linux aio,
                         posixaio for POSIX aio, sync for regular read/write io,
- psync for regular pread/pwrite io, mmap for mmap'ed io,
- syslet-rw for syslet driven read/write, splice for using
- splice/vmsplice, sgio for direct SG_IO io, net for
- network io, or cpuio for a cycler burner load. sgio only
- works on Linux on SCSI (or SCSI-like devices, such as
- usb-storage or sata/libata driven) devices. Fio also has
- a null io engine, which is mainly used for testing fio
- itself.
+ psync for regular pread/pwrite io, vsync for regular
+ readv/writev (with queuing emulation) mmap for mmap'ed
+ io, syslet-rw for syslet driven read/write, splice for
+ using splice/vmsplice, sgio for direct SG_IO io, net
+ for network io, or cpuio for a cycler burner load. sgio
+ only works on Linux on SCSI (or SCSI-like devices, such
+ as usb-storage or sata/libata driven) devices. Fio also
+ has a null io engine, which is mainly used for testing
+ fio itself.
+
         iodepth=x For async io, allow 'x' ios in flight
         overwrite=x If 'x', layout a write file first.
         nrfiles=x Spread io load over 'x' number of files per job,
diff --git a/engines/sync.c b/engines/sync.c
index 5e49429..505381d 100644
--- a/engines/sync.c
+++ b/engines/sync.c
@@ -13,6 +13,17 @@
 
 #include "../fio.h"
 
+struct syncio_data {
+ struct iovec *iovecs;
+ struct io_u **io_us;
+ unsigned int queued;
+ unsigned long queued_bytes;
+
+ unsigned long long last_offset;
+ struct fio_file *last_file;
+ enum fio_ddir last_ddir;
+};
+
 static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
 {
         struct fio_file *f = io_u->file;
@@ -81,6 +92,183 @@ static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
         return fio_io_end(td, io_u, ret);
 }
 
+static int fio_vsyncio_getevents(struct thread_data *td, unsigned int min,
+ unsigned int max,
+ struct timespec fio_unused *t)
+{
+ struct syncio_data *sd = td->io_ops->data;
+ int ret;
+
+ if (min) {
+ ret = sd->queued;
+ sd->queued = 0;
+ } else
+ ret = 0;
+
+ dprint(FD_IO, "vsyncio_getevents: min=%d,max=%d: %d\n", min, max, ret);
+ return ret;
+}
+
+static struct io_u *fio_vsyncio_event(struct thread_data *td, int event)
+{
+ struct syncio_data *sd = td->io_ops->data;
+
+ return sd->io_us[event];
+}
+
+static int fio_vsyncio_append(struct thread_data *td, struct io_u *io_u)
+{
+ struct syncio_data *sd = td->io_ops->data;
+
+ if (io_u->ddir == DDIR_SYNC)
+ return 0;
+
+ if (io_u->offset == sd->last_offset && io_u->file == sd->last_file &&
+ io_u->ddir == sd->last_ddir)
+ return 1;
+
+ return 0;
+}
+
+static void fio_vsyncio_set_iov(struct syncio_data *sd, struct io_u *io_u,
+ int index)
+{
+ sd->io_us[index] = io_u;
+ sd->iovecs[index].iov_base = io_u->xfer_buf;
+ sd->iovecs[index].iov_len = io_u->xfer_buflen;
+ sd->last_offset = io_u->offset + io_u->xfer_buflen;
+ sd->last_file = io_u->file;
+ sd->last_ddir = io_u->ddir;
+ sd->queued_bytes += io_u->xfer_buflen;
+ sd->queued++;
+}
+
+static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct syncio_data *sd = td->io_ops->data;
+
+ fio_ro_check(td, io_u);
+
+ if (!fio_vsyncio_append(td, io_u)) {
+ dprint(FD_IO, "vsyncio_queue: no append (%d)\n", sd->queued);
+ /*
+ * If we can't append and have stuff queued, tell fio to
+ * commit those first and then retry this io
+ */
+ if (sd->queued)
+ return FIO_Q_BUSY;
+
+ sd->queued = 0;
+ sd->queued_bytes = 0;
+ fio_vsyncio_set_iov(sd, io_u, 0);
+ } else {
+ if (sd->queued == td->o.iodepth) {
+ dprint(FD_IO, "vsyncio_queue: max depth %d\n", sd->queued);
+ return FIO_Q_BUSY;
+ }
+
+ dprint(FD_IO, "vsyncio_queue: append\n");
+ fio_vsyncio_set_iov(sd, io_u, sd->queued);
+ }
+
+ dprint(FD_IO, "vsyncio_queue: depth now %d\n", sd->queued);
+ return FIO_Q_QUEUED;
+}
+
+/*
+ * Check that we transferred all bytes, or saw an error, etc
+ */
+static int fio_vsyncio_end(struct thread_data *td, ssize_t bytes)
+{
+ struct syncio_data *sd = td->io_ops->data;
+ struct io_u *io_u;
+ unsigned int i;
+ int err;
+
+ /*
+ * transferred everything, perfect
+ */
+ if (bytes == sd->queued_bytes)
+ return 0;
+
+ err = errno;
+ for (i = 0; i < sd->queued; i++) {
+ io_u = sd->io_us[i];
+
+ if (bytes == -1) {
+ io_u->error = err;
+ } else {
+ unsigned int this_io;
+
+ this_io = bytes;
+ if (this_io > io_u->xfer_buflen)
+ this_io = io_u->xfer_buflen;
+
+ io_u->resid = io_u->xfer_buflen - this_io;
+ io_u->error = 0;
+ bytes -= this_io;
+ }
+ }
+
+ if (bytes == -1) {
+ td_verror(td, err, "xfer vsync");
+ return -err;
+ }
+
+ return 0;
+}
+
+static int fio_vsyncio_commit(struct thread_data *td)
+{
+ struct syncio_data *sd = td->io_ops->data;
+ struct fio_file *f;
+ ssize_t ret;
+
+ if (!sd->queued)
+ return 0;
+
+ f = sd->last_file;
+
+ if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) {
+ int err = -errno;
+
+ td_verror(td, errno, "lseek");
+ return err;
+ }
+
+ if (sd->last_ddir == DDIR_READ)
+ ret = readv(f->fd, sd->iovecs, sd->queued);
+ else
+ ret = writev(f->fd, sd->iovecs, sd->queued);
+
+ dprint(FD_IO, "vsyncio_commit: %d\n", (int) ret);
+ return fio_vsyncio_end(td, ret);
+}
+
+static int fio_vsyncio_init(struct thread_data *td)
+{
+ struct syncio_data *sd;
+
+ sd = malloc(sizeof(*sd));
+ memset(sd, 0, sizeof(*sd));
+ sd->last_offset = -1ULL;
+ sd->iovecs = malloc(td->o.iodepth * sizeof(struct iovec));
+ sd->io_us = malloc(td->o.iodepth * sizeof(struct io_u *));
+
+ td->io_ops->data = sd;
+ return 0;
+}
+
+static void fio_vsyncio_cleanup(struct thread_data *td)
+{
+ struct syncio_data *sd = td->io_ops->data;
+
+ free(sd->iovecs);
+ free(sd->io_us);
+ free(sd);
+ td->io_ops->data = NULL;
+}
+
 static struct ioengine_ops ioengine_rw = {
         .name = "sync",
         .version = FIO_IOOPS_VERSION,
@@ -100,14 +288,30 @@ static struct ioengine_ops ioengine_prw = {
         .flags = FIO_SYNCIO,
 };
 
+static struct ioengine_ops ioengine_vrw = {
+ .name = "vsync",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_vsyncio_init,
+ .cleanup = fio_vsyncio_cleanup,
+ .queue = fio_vsyncio_queue,
+ .commit = fio_vsyncio_commit,
+ .event = fio_vsyncio_event,
+ .getevents = fio_vsyncio_getevents,
+ .open_file = generic_open_file,
+ .close_file = generic_close_file,
+ .flags = FIO_SYNCIO,
+};
+
 static void fio_init fio_syncio_register(void)
 {
         register_ioengine(&ioengine_rw);
         register_ioengine(&ioengine_prw);
+ register_ioengine(&ioengine_vrw);
 }
 
 static void fio_exit fio_syncio_unregister(void)
 {
         unregister_ioengine(&ioengine_rw);
         unregister_ioengine(&ioengine_prw);
+ unregister_ioengine(&ioengine_vrw);
 }
diff --git a/options.c b/options.c
index 3de2ae6..1e99810 100644
--- a/options.c
+++ b/options.c
@@ -440,6 +440,9 @@ static struct fio_option options[] = {
                           { .ival = "psync",
                             .help = "Use pread/pwrite",
                           },
+ { .ival = "vsync",
+ .help = "Use readv/writev",
+ },
 #ifdef FIO_HAVE_LIBAIO
                           { .ival = "libaio",
                             .help = "Linux native asynchronous IO",

-- 
Jens Axboe
Received on Mon Feb 04 2008 - 10:59:27 CET

This archive was generated by hypermail 2.2.0 : Mon Feb 04 2008 - 11:00:03 CET