Re: Re: i686 hang on boot in userspace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 7/25/06, gmu 2k6 <[email protected]> wrote:
On 7/25/06, Jens Axboe <[email protected]> wrote:
> On Tue, Jul 25 2006, Jens Axboe wrote:
> > On Tue, Jul 25 2006, gmu 2k6 wrote:
> > > On 7/25/06, Jens Axboe <[email protected]> wrote:
> > > >On Tue, Jul 25 2006, gmu 2k6 wrote:
> > > >> ok, let's nail it to 2.6.17-git5 instead as it survived git status
> > > >> compared to -git6
> > > >> which seems to have correctly booted by accident the lastime. timing
> > > >issues
> > > >> I guess.
> > > >
> > > >I will try and reproduce it here now. It seems to be in between commit
> > > >271f18f102c789f59644bb6c53a69da1df72b2f4 and commit
> > > >dd67d051529387f6e44d22d1d5540ef281965fdd where the first one could also
> > > >be bad.
> > > >
> > > >I'm assuming that acf421755593f7d7bd9352d57eda796c6eb4fa43 should be
> > > >good, so you can try and verify that
> > > >dd67d051529387f6e44d22d1d5540ef281965fdd is bad and bisect between the
> > > >two. It's only about 6 commits, so should be quick enough to do.
> > >
> > > 1) no luck with remote serial console
> > > 2) netconsole does not work although connecting to the listener with netcat
> > > and
> > > sending strings works
> > > I'm gonna try via physical rs232 9pins and see how that works.
> > > afterwards I will try to bisect the revisions you mentioned.
> > >
> > > btw, the issue seems to come and go as I managed to boot log into a .17-git6
> > > kernel or is timing-dependent.
> >
> > I can reproduce it, you don't have to spend more time on bisecting or
> > testing. This should fix it:
> >
> > diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> > index 1c4df22..1eac041 100644
> > --- a/drivers/block/cciss.c
> > +++ b/drivers/block/cciss.c
> > @@ -1238,6 +1238,7 @@ static void cciss_softirq_done(struct re
> >       CommandList_struct *cmd = rq->completion_data;
> >       ctlr_info_t *h = hba[cmd->ctlr];
> >       unsigned long flags;
> > +     request_queue_t *q;
> >       u64bit temp64;
> >       int i, ddir;
> >
> > @@ -1260,10 +1261,13 @@ #ifdef CCISS_DEBUG
> >       printk("Done with %p\n", rq);
> >  #endif                               /* CCISS_DEBUG */
> >
> > +     q = rq->q;
> > +
> >       add_disk_randomness(rq->rq_disk);
> >       spin_lock_irqsave(&h->lock, flags);
> >       end_that_request_last(rq, rq->errors);
> >       cmd_free(h, cmd, 1);
> > +     blk_start_queue(q);
> >       spin_unlock_irqrestore(&h->lock, flags);
> >  }
> >
> >
> > A better fix would rework the start_queue logic entirely in the driver,
> > but the above should get you running for now. I'll take a further look.
>
> Something like this matches the current logic better. It's not very good
> from a cpu efficiency point of view, but it's better than what is there
> now since at least it's not in hard irq context.
>
> Not tested yet, will do so right now.
>
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index 1c4df22..a9e0510 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -1233,6 +1233,50 @@ static inline void complete_buffers(stru
>         }
>  }
>
> +static void cciss_check_queues(ctlr_info_t *h)
> +{
> +       int start_queue = h->next_to_run;
> +       int i;
> +
> +       /* check to see if we have maxed out the number of commands that can
> +        * be placed on the queue.  If so then exit.  We do this check here
> +        * in case the interrupt we serviced was from an ioctl and did not
> +        * free any new commands.
> +        */
> +       if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
> +               return;
> +
> +       /* We have room on the queue for more commands.  Now we need to queue
> +        * them up.  We will also keep track of the next queue to run so
> +        * that every queue gets a chance to be started first.
> +        */
> +       for (i = 0; i < h->highest_lun + 1; i++) {
> +               int curr_queue = (start_queue + i) % (h->highest_lun + 1);
> +               /* make sure the disk has been added and the drive is real
> +                * because this can be called from the middle of init_one.
> +                */
> +               if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
> +                       continue;
> +               blk_start_queue(h->gendisk[curr_queue]->queue);
> +
> +               /* check to see if we have maxed out the number of commands
> +                * that can be placed on the queue.
> +                */
> +               if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) {
> +                       if (curr_queue == start_queue) {
> +                               h->next_to_run =
> +                                   (start_queue + 1) % (h->highest_lun + 1);
> +                               break;
> +                       } else {
> +                               h->next_to_run = curr_queue;
> +                               break;
> +                       }
> +               } else {
> +                       curr_queue = (curr_queue + 1) % (h->highest_lun + 1);
> +               }
> +       }
> +}
> +
>  static void cciss_softirq_done(struct request *rq)
>  {
>         CommandList_struct *cmd = rq->completion_data;
> @@ -1264,6 +1308,7 @@ #endif                            /* CCISS_DEBUG */
>         spin_lock_irqsave(&h->lock, flags);
>         end_that_request_last(rq, rq->errors);
>         cmd_free(h, cmd, 1);
> +       cciss_check_queues(h);
>         spin_unlock_irqrestore(&h->lock, flags);
>  }
>
> @@ -2528,8 +2573,6 @@ static irqreturn_t do_cciss_intr(int irq
>         CommandList_struct *c;
>         unsigned long flags;
>         __u32 a, a1, a2;
> -       int j;
> -       int start_queue = h->next_to_run;
>
>         if (interrupt_not_for_us(h))
>                 return IRQ_NONE;
> @@ -2588,45 +2631,6 @@ #                                endif
>                 }
>         }
>
> -       /* check to see if we have maxed out the number of commands that can
> -        * be placed on the queue.  If so then exit.  We do this check here
> -        * in case the interrupt we serviced was from an ioctl and did not
> -        * free any new commands.
> -        */
> -       if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS)
> -               goto cleanup;
> -
> -       /* We have room on the queue for more commands.  Now we need to queue
> -        * them up.  We will also keep track of the next queue to run so
> -        * that every queue gets a chance to be started first.
> -        */
> -       for (j = 0; j < h->highest_lun + 1; j++) {
> -               int curr_queue = (start_queue + j) % (h->highest_lun + 1);
> -               /* make sure the disk has been added and the drive is real
> -                * because this can be called from the middle of init_one.
> -                */
> -               if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
> -                       continue;
> -               blk_start_queue(h->gendisk[curr_queue]->queue);
> -
> -               /* check to see if we have maxed out the number of commands
> -                * that can be placed on the queue.
> -                */
> -               if ((find_first_zero_bit(h->cmd_pool_bits, NR_CMDS)) == NR_CMDS) {
> -                       if (curr_queue == start_queue) {
> -                               h->next_to_run =
> -                                   (start_queue + 1) % (h->highest_lun + 1);
> -                               goto cleanup;
> -                       } else {
> -                               h->next_to_run = curr_queue;
> -                               goto cleanup;
> -                       }
> -               } else {
> -                       curr_queue = (curr_queue + 1) % (h->highest_lun + 1);
> -               }
> -       }
> -
> -      cleanup:
>         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
>         return IRQ_HANDLED;
>  }

this makes the cciss init hang.

I've patched Linus' trunk/HEAD with it, btw.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux