On Fri, Aug 26, 2005 at 09:38:30AM -0500, Shawn Iverson wrote: > I have come across an error during preparing the rebooting of my system > with an updated kernel. I am receiving a segmentation fault during the > setup phase of grub. I have RAID 1, so grub-install does not work, of > course. I have instead either done it by hand or used my own script > (see below). I have been able to perform this task sucessfully in the > past up to this point with prior kernels on this hardware. Below is the > output when doing it manually, with /boot on /dev/hda1 mirrored to > /dev/hdb1 on an ext3 volume (/dev/md0). My lvm volume is at /dev/hda2 > mirrored to /dev/hdb2 (/dev/md1). Both volumes are clean, and /dev/hdc1 > and /dev/hdc2 function as hotspares for their respective partitions. > > I have noted the thread "grub segmentation fault when trying to do > setup" on July 19th with an issue like this one. I was hoping that > someone would have some insight into this before I go clear across town > to boot into rescue mode to complete this task. After many trials with GRUB and RAID, including "corruption" due to out-of-sync RAID1 components, I've taken to doing the following: 1. Install ms-sys (http://ms-sys.sourceforge.net/) in the MBR of both disks. 2. Mark my boot partitions active, e.g., echo ',,,*' | sfdisk /dev/sda -N1 echo ',,,*' | sfdisk /dev/sdb -N1 3. Apply the attached patch from Sergey Vlasov <vsu@xxxxxxxxxxx>, which I've forward-ported to grub-0.97, that allows one to map partitions as well as drives in device.map: (fd0) /dev/fd0 (hd0) /dev/sda (hd1) /dev/sdb (hd0,0) /dev/md1 (hd1,0) /dev/md1 [You can probably find other versions of the patch in ALTLinux grub SRPMS.] 4. Setup grub with the stage1 in the boot partition bootsector, which, since /dev/sd[ab] is automatically redirected to /dev/md1, updates both drives simultaneously and coherently. As long as I patch GRUB before updating to a later version, no worries. If one doesn't do this, one should be careful that the RAID1 doesn't get out of sync. When I have to use an unpatched GRUB without ms-sys, I do the following: 1. umount /boot 2. mdadm --stop /dev/md1 3. grub-install or equivalent for sda, sdb, to get the MBR in place. 4. mdadm -A /dev/md1 /dev/sda1 5. mdadm /dev/md1 -a /dev/sdb1 6. mount /boot There are circumstance under which the above will not work, but with modern disks with linear addressing, blah, blah, it works fine. Regards, Bill Rugolsky
--- grub-0.97/stage2/disk_io.c.alt-partition-map 2004-05-23 12:35:24.000000000 -0400 +++ grub-0.97/stage2/disk_io.c 2005-06-08 13:36:03.000000000 -0400 @@ -373,7 +373,8 @@ embed a Stage 1.5 into a partition instead of a MBR, use system calls directly instead of biosdisk, because of the bug in Linux. *sigh* */ - return write_to_partition (device_map, current_drive, current_partition, + return write_to_partition (device_map, partition_map, + current_drive, current_partition, sector, sector_count, buf); } else --- grub-0.97/stage2/shared.h.alt-partition-map 2005-06-08 13:36:03.000000000 -0400 +++ grub-0.97/stage2/shared.h 2005-06-08 13:36:03.000000000 -0400 @@ -581,6 +581,8 @@ extern char **device_map; /* The filename which stores the information about a device map. */ extern char *device_map_file; +/* The map between drive/partition numbers and UNIX device file names. */ +extern struct partition_map_entry *partition_map; /* The array of geometries. */ extern struct geometry *disks; /* Assign DRIVE to a device name DEVICE. */ --- grub-0.97/lib/device.c.alt-partition-map 2005-03-27 18:14:25.000000000 -0500 +++ grub-0.97/lib/device.c 2005-06-08 13:36:03.000000000 -0400 @@ -131,6 +131,122 @@ #include <shared.h> #include <device.h> +#if defined(__linux__) +/* The 2.6 kernel has removed all of the geometry handling for IDE drives + * that did fixups for LBA, etc. This means that the geometry we get + * with the ioctl has a good chance of being wrong. So, we get to + * also know about partition tables and try to read what the geometry + * is there. *grumble* Very closely based on code from cfdisk + */ +static void get_kernel_geometry(int fd, int *cyl, int *heads, int *sectors) { + struct hd_geometry hdg; + + if (ioctl (fd, HDIO_GETGEO, &hdg)) + return; + + *cyl = hdg.cylinders; + *heads = hdg.heads; + *sectors = hdg.sectors; +} + +struct partition { + unsigned char boot_ind; /* 0x80 - active */ + unsigned char head; /* starting head */ + unsigned char sector; /* starting sector */ + unsigned char cyl; /* starting cylinder */ + unsigned char sys_ind; /* What partition type */ + unsigned char end_head; /* end head */ + unsigned char end_sector; /* end sector */ + unsigned char end_cyl; /* end cylinder */ + unsigned char start4[4]; /* starting sector counting from 0 */ + unsigned char size4[4]; /* nr of sectors in partition */ +}; + +#define ALIGNMENT 2 +typedef union { + struct { + unsigned char align[ALIGNMENT]; + unsigned char b[SECTOR_SIZE]; + } c; + struct { + unsigned char align[ALIGNMENT]; + unsigned char buffer[0x1BE]; + struct partition part[4]; + unsigned char magicflag[2]; + } p; +} partition_table; + +#define PART_TABLE_FLAG0 0x55 +#define PART_TABLE_FLAG1 0xAA + +static void +get_partition_table_geometry(partition_table *bufp, int *cyl, int *heads, + int *sectors) { + struct partition *p; + int i,h,s,hh,ss; + int first = 1; + int bad = 0; + + if (bufp->p.magicflag[0] != PART_TABLE_FLAG0 || + bufp->p.magicflag[1] != PART_TABLE_FLAG1) { + /* Matthew Wilcox: slightly friendlier version of + fatal(_("Bad signature on partition table"), 3); + */ + fprintf(stderr, "Unknown partition table signature\n"); + return; + } + + hh = ss = 0; + for (i=0; i<4; i++) { + p = &(bufp->p.part[i]); + if (p->sys_ind != 0) { + h = p->end_head + 1; + s = (p->end_sector & 077); + if (first) { + hh = h; + ss = s; + first = 0; + } else if (hh != h || ss != s) + bad = 1; + } + } + + if (!first && !bad) { + *heads = hh; + *sectors = ss; + } +} + +static void get_linux_geometry (int fd, struct geometry *geom) { + int kern_cyl = 0, kern_head = 0, kern_sectors = 0; + int pt_cyl = 0, pt_head = 0, pt_sectors = 0; + partition_table bufp; + + get_kernel_geometry(fd, &kern_cyl, &kern_head, &kern_sectors); + + if (read(fd, bufp.c.b, SECTOR_SIZE) == SECTOR_SIZE) { + get_partition_table_geometry(&bufp, &pt_cyl, &pt_head, &pt_sectors); + } else { + fprintf(stderr, "Unable to read partition table: %s\n", strerror(errno)); + } + + if (pt_head && pt_sectors) { + int cyl_size; + + geom->heads = pt_head; + geom->sectors = pt_sectors; + cyl_size = pt_head * pt_sectors; + geom->cylinders = geom->total_sectors/cyl_size; + } else { + geom->heads = kern_head; + geom->sectors = kern_sectors; + geom->cylinders = kern_cyl; + } + + return; +} +#endif + /* Get the geometry of a drive DRIVE. */ void get_drive_geometry (struct geometry *geom, char **map, int drive) @@ -151,20 +267,16 @@ #if defined(__linux__) /* Linux */ { - struct hd_geometry hdg; unsigned long nr; - if (ioctl (fd, HDIO_GETGEO, &hdg)) - goto fail; - if (ioctl (fd, BLKGETSIZE, &nr)) goto fail; - - /* Got the geometry, so save it. */ - geom->cylinders = hdg.cylinders; - geom->heads = hdg.heads; - geom->sectors = hdg.sectors; + geom->total_sectors = nr; + get_linux_geometry(fd, geom); + + if (!geom->heads && !geom->cylinders && !geom->sectors) + goto fail; goto success; } @@ -489,9 +601,27 @@ return 1; } +#ifdef __linux__ +/* Find device name for PARTITION on DRIVE in MAP. */ +static const char * +find_device_for_partition (struct partition_map_entry *map, + int drive, int partition) +{ + while (map) + { + if ((map->drive == drive) && (map->partition == partition)) + return map->device_name; + map = map->next; + } + + return NULL; +} +#endif /* __linux__ */ + /* Read mapping information from FP, and write it to MAP. */ static int -read_device_map (FILE *fp, char **map, const char *map_file) +read_device_map (FILE *fp, char **map, const char *map_file, + struct partition_map_entry **partition_map) { auto void show_error (int no, const char *msg); auto void show_warning (int no, const char *msg, ...); @@ -521,6 +651,9 @@ char *ptr, *eptr; int drive; int is_floppy = 0; +#ifdef __linux__ + int partition = -1; +#endif /* __linux__ */ /* Increase the number of lines. */ line_number++; @@ -571,6 +704,27 @@ if (! is_floppy) drive += 0x80; + +#ifdef __linux__ + /* Check for a possible partition map entry. */ + if (*ptr == ',') + { + if (is_floppy) + { + show_error (line_number, + "Partitions on floppy drives are not allowed"); + return 0; + } + + ptr++; + partition = strtoul (ptr, &ptr, 10); + if (partition < 0 || partition > 255) /* XXX: max value? */ + { + show_error (line_number, "Bad partition number"); + return 0; + } + } +#endif /* __linux__ */ if (*ptr != ')') { @@ -595,6 +749,35 @@ eptr++; *eptr = 0; +#ifdef __linux__ + if (partition != -1) + { + struct partition_map_entry *new_entry; + /* Multiple entries for a given partition is not allowed. */ + if (find_device_for_partition (*partition_map, drive, partition)) + { + show_error (line_number, "Duplicated entry found"); + return 0; + } + + /* Allocate a new partition map entry. */ + new_entry = malloc (sizeof (struct partition_map_entry)); + assert (new_entry); + + /* Fill the entry. */ + new_entry->next = *partition_map; + new_entry->drive = drive; + new_entry->partition = partition; + new_entry->device_name = strdup (ptr); + assert (new_entry->device_name); + + /* Place the new entry at the beginning of the list. */ + *partition_map = new_entry; + + continue; + } +#endif + /* Multiple entries for a given drive is not allowed. */ if (map[drive]) { @@ -616,7 +799,8 @@ If it is zero, don't probe any floppy at all. If it is one, probe one floppy. If it is two, probe two floppies. And so on. */ int -init_device_map (char ***map, const char *map_file, int floppy_disks) +init_device_map (char ***map, struct partition_map_entry **partition_map, + const char *map_file, int floppy_disks) { int i; int num_hd = 0; @@ -641,7 +825,7 @@ { int ret; - ret = read_device_map (fp, *map, map_file); + ret = read_device_map (fp, *map, map_file, partition_map); fclose (fp); return ret; } @@ -812,7 +996,7 @@ /* Restore the memory consumed for MAP. */ void -restore_device_map (char **map) +restore_device_map (char **map, struct partition_map_entry *partition_map) { int i; @@ -821,6 +1005,17 @@ free (map[i]); free (map); + +#ifdef __linux__ + while (partition_map) + { + struct partition_map_entry *next = partition_map->next; + if (partition_map->device_name) + free (partition_map->device_name); + free (partition_map); + partition_map = next; + } +#endif /* __linux__ */ } #ifdef __linux__ @@ -839,10 +1034,12 @@ } int -write_to_partition (char **map, int drive, int partition, +write_to_partition (char **map, struct partition_map_entry *partition_map, + int drive, int partition, int sector, int size, const char *buf) { char dev[PATH_MAX]; /* XXX */ + const char *partition_dev; int fd; if ((partition & 0x00FF00) != 0x00FF00) @@ -852,16 +1049,27 @@ errnum = ERR_DEV_VALUES; return 1; } - - assert (map[drive] != 0); - - strcpy (dev, map[drive]); - if (have_devfs ()) + + /* First try to find the entry in PARTITION_MAP. */ + partition_dev = find_device_for_partition (partition_map, drive, + (partition >> 16) & 0xFF); + if (partition_dev) + { + strcpy (dev, partition_dev); /* XXX */ + } + else { - if (strcmp (dev + strlen(dev) - 5, "/disc") == 0) - strcpy (dev + strlen(dev) - 5, "/part"); + assert (map[drive] != 0); + + strcpy (dev, map[drive]); + if (have_devfs ()) + { + if (strcmp (dev + strlen(dev) - 5, "/disc") == 0) + strcpy (dev + strlen(dev) - 5, "/part"); + } + + sprintf (dev + strlen(dev), "%d", ((partition >> 16) & 0xFF) + 1); } - sprintf (dev + strlen(dev), "%d", ((partition >> 16) & 0xFF) + 1); /* Open the partition. */ fd = open (dev, O_RDWR); --- grub-0.97/lib/device.h.alt-partition-map 2004-05-23 12:35:00.000000000 -0400 +++ grub-0.97/lib/device.h 2005-06-08 13:37:58.000000000 -0400 @@ -32,17 +32,31 @@ #define DEFAULT_HD_HEADS 128 #define DEFAULT_HD_SECTORS 63 + +struct partition_map_entry +{ + struct partition_map_entry *next; + int drive; + int partition; + char *device_name; +}; + /* Function prototypes. */ extern void get_drive_geometry (struct geometry *geom, char **map, int drive); extern int check_device (const char *device); -extern int init_device_map (char ***map, const char *map_file, +extern int init_device_map (char ***map, + struct partition_map_entry **part_map, + const char *map_file, int no_floppies); -extern void restore_device_map (char **map); +extern void restore_device_map (char **map, + struct partition_map_entry *part_map); #ifdef __linux__ extern int is_disk_device (char **map, int drive); -extern int write_to_partition (char **map, int drive, int partition, - int offset, int size, const char *buf); +extern int write_to_partition (char **map, + struct partition_map_entry *part_map, + int drive, int partition, + int offset, int size, const char *buf); #endif /* __linux__ */ #endif /* DEVICE_MAP_HEADER */ --- grub-0.97/grub/asmstub.c.alt-partition-map 2005-06-08 13:36:03.000000000 -0400 +++ grub-0.97/grub/asmstub.c 2005-06-08 13:36:03.000000000 -0400 @@ -86,6 +86,9 @@ /* The map between BIOS drives and UNIX device file names. */ char **device_map = 0; +/* The map between drive/partition numbers and UNIX device file names. */ +struct partition_map_entry *partition_map = 0; + /* The jump buffer for exiting correctly. */ static jmp_buf env_for_exit; @@ -155,7 +158,8 @@ for (i = 0; i < NUM_DISKS; i++) disks[i].flags = -1; - if (! init_device_map (&device_map, device_map_file, floppy_disks)) + if (! init_device_map (&device_map, &partition_map, device_map_file, + floppy_disks)) return 1; /* Check some invariants. */ @@ -213,8 +217,9 @@ close (serial_fd); /* Release memory. */ - restore_device_map (device_map); + restore_device_map (device_map, partition_map); device_map = 0; + partition_map = 0; free (disks); disks = 0; free (scratch);