Re: FIO enter dead-loop to look for new file

From: Zhang, Yanmin <yanmin_zhang_at_linux.intel.com>
Date: Wed, 13 Feb 2008 16:58:45 +0800

On Tue, 2008-02-05 at 10:06 +0100, Jens Axboe wrote:
> On Tue, Feb 05 2008, Jens Axboe wrote:
> > On Tue, Feb 05 2008, Zhang, Yanmin wrote:
> > > On Mon, 2008-02-04 at 11:01 +0100, Jens Axboe wrote:
> > > > On Mon, Feb 04 2008, Zhang, Yanmin wrote:
> > > > > On Mon, 2008-02-04 at 17:03 +0800, Zhang, Yanmin wrote:
> > > > > > When I used below job file to test, it hangs. I used gdb to check it and found
> > > > > > thread_main keeps calling clear_io_state over and over again. Every sub-process
> > > > > > has one file, but it doesn't finish its task after it finishs the file, so it
> > > > > > calls do_io again and again although it has no more file.
> > > > > >
> > > > > > If change bsrange=4k-4k, it does work. if it's 2k-4k, it also doesn't work.
> > > > > If I use bs=2k to replace bsrange, it looks like it does work although
> > > > > my testing is still running.
> > > >
> > > > Can you try the current version, I fixed some bugs in this area on
> > > > friday? Either use git to download it, or just use
> > > >
> > > > http://brick.kernel.dk/snaps/fio-git-latest.tar.gz
> > > I tried it. with bsrange=2k-4k, it doesn't hang. However, there is
> > > anthoer issue. I used 9 disks and every disk has a 1GB file. Every 2
> > > threads do I/O on one file, so there are 18 threads and 9 groups. With
> > > the new fio-git, the status shows there are just 4 threads working on
> > > I/O. The result also showed 5 groups has no result.
> >
> > OK, I'll play around with your job file and poke at it a bit!
>
> Two bugs there:
>
> - Parser doesn't set the 2nd bsrange like HOWTO says it does, so when
> you use bsrange=1k-4k you only get that range for reads, writes will
> use the default 4k.
> - The above exposed a bug in the offset retrieval for a random IO, where
> it generated an offset that was OK for the direction (say end - 1k),
> but failed because the blocksize was actually 4k for that IO.
>
> Patch below, checked in as two seperate fixes. Please verify that it now
> works for you, thanks!
Sorry for replying late. I took 1 week leave because of holiday.

I verified it with the latest git tarball and it does fix the issue.

-yanmin

>
> diff --git a/io_u.c b/io_u.c
> index 511f2e9..8f10bfd 100644
> --- a/io_u.c
> +++ b/io_u.c
> @@ -73,11 +73,12 @@ static void mark_random_map(struct thread_data *td, struct io_u *io_u)
> }
>
> static inline unsigned long long last_block(struct thread_data *td,
> - struct fio_file *f)
> + struct fio_file *f,
> + enum fio_ddir ddir)
> {
> unsigned long long max_blocks;
>
> - max_blocks = f->io_size / td->o.rw_min_bs;
> + max_blocks = f->io_size / td->o.min_bs[ddir];
> if (!max_blocks)
> return 0;
>
> @@ -88,7 +89,7 @@ static inline unsigned long long last_block(struct thread_data *td,
> * Return the next free block in the map.
> */
> static int get_next_free_block(struct thread_data *td, struct fio_file *f,
> - unsigned long long *b)
> + enum fio_ddir ddir, unsigned long long *b)
> {
> int i;
>
> @@ -97,7 +98,7 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f,
> while ((*b) * td->o.rw_min_bs < f->real_file_size) {
> if (f->file_map[i] != -1UL) {
> *b += fio_ffz(f->file_map[i]);
> - if (*b > last_block(td, f))
> + if (*b > last_block(td, f, ddir))
> break;
> f->last_free_lookup = i;
> return 0;
> @@ -112,14 +113,14 @@ static int get_next_free_block(struct thread_data *td, struct fio_file *f,
> }
>
> static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
> - unsigned long long *b)
> + enum fio_ddir ddir, unsigned long long *b)
> {
> unsigned long long r, rb;
> int loops = 5;
>
> do {
> r = os_random_long(&td->random_state);
> - *b = last_block(td, f);
> + *b = last_block(td, f, ddir);
>
> /*
> * if we are not maintaining a random map, we are done.
> @@ -144,7 +145,7 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
> loops = 10;
> do {
> f->last_free_lookup = (f->num_maps - 1) * (r / (RAND_MAX+1.0));
> - if (!get_next_free_block(td, f, b))
> + if (!get_next_free_block(td, f, ddir, b))
> return 0;
>
> r = os_random_long(&td->random_state);
> @@ -154,7 +155,7 @@ static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
> * that didn't work either, try exhaustive search from the start
> */
> f->last_free_lookup = 0;
> - return get_next_free_block(td, f, b);
> + return get_next_free_block(td, f, ddir, b);
> }
>
> /*
> @@ -166,21 +167,23 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u)
> {
> struct fio_file *f = io_u->file;
> unsigned long long b;
> + enum fio_ddir ddir = io_u->ddir;
>
> if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) {
> td->ddir_nr = td->o.ddir_nr;
>
> - if (get_next_rand_offset(td, f, &b))
> + if (get_next_rand_offset(td, f, ddir, &b))
> return 1;
> } else {
> if (f->last_pos >= f->real_file_size) {
> - if (!td_random(td) || get_next_rand_offset(td, f, &b))
> + if (!td_random(td) ||
> + get_next_rand_offset(td, f, ddir, &b))
> return 1;
> } else
> - b = (f->last_pos - f->file_offset) / td->o.rw_min_bs;
> + b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir];
> }
>
> - io_u->offset = (b * td->o.rw_min_bs) + f->file_offset;
> + io_u->offset = (b * td->o.min_bs[ddir]) + f->file_offset;
> if (io_u->offset >= f->real_file_size) {
> dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n",
> io_u->offset, f->real_file_size);
> @@ -219,8 +222,11 @@ static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
> buflen = (buflen + td->o.min_bs[ddir] - 1) & ~(td->o.min_bs[ddir] - 1);
> }
>
> - if (io_u->offset + buflen > io_u->file->real_file_size)
> + if (io_u->offset + buflen > io_u->file->real_file_size) {
> + dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen,
> + td->o.min_bs[ddir], ddir);
> buflen = td->o.min_bs[ddir];
> + }
>
> return buflen;
> }
> @@ -385,6 +391,8 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
>
> if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
> dprint(FD_IO, "io_u %p, offset too large\n", io_u);
> + dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset,
> + io_u->buflen, io_u->file->real_file_size);
> return 1;
> }
>
> diff --git a/parse.c b/parse.c
> index f907a34..90874f1 100644
> --- a/parse.c
> +++ b/parse.c
> @@ -350,7 +350,7 @@ static int __handle_option(struct fio_option *o, const char *ptr, void *data,
> val_store(ilp, ul1, o->off1, data);
> val_store(ilp, ul2, o->off2, data);
> }
> - if (!more && o->off3 && o->off4) {
> + if (o->off3 && o->off4) {
> val_store(ilp, ul1, o->off3, data);
> val_store(ilp, ul2, o->off4, data);
> }
>
Received on Wed Feb 13 2008 - 09:58:45 CET

This archive was generated by hypermail 2.2.0 : Wed Feb 13 2008 - 10:30:01 CET