Re: grub segmentation fault on RAID 1 lvm

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 26, 2005 at 09:38:30AM -0500, Shawn Iverson wrote:
> I have come across an error during preparing the rebooting of my system
> with an updated kernel.  I am receiving a segmentation fault during the
> setup phase of grub.  I have RAID 1, so grub-install does not work, of
> course.  I have instead either done it by hand or used my own script
> (see below).  I have been able to perform this task sucessfully in the
> past up to this point with prior kernels on this hardware.  Below is the
> output when doing it manually, with /boot on /dev/hda1 mirrored to
> /dev/hdb1 on an ext3 volume (/dev/md0).  My lvm volume is at /dev/hda2
> mirrored to /dev/hdb2 (/dev/md1).  Both volumes are clean, and /dev/hdc1
> and /dev/hdc2 function as hotspares for their respective partitions.
> 
> I have noted the thread "grub segmentation fault when trying to do
> setup" on July 19th with an issue like this one.  I was hoping that
> someone would have some insight into this before I go clear across town
> to boot into rescue mode to complete this task.
 
After many trials with GRUB and RAID, including "corruption" due to
out-of-sync RAID1 components, I've taken to doing the following:

1. Install ms-sys (http://ms-sys.sourceforge.net/) in the MBR
   of both disks.

2. Mark my boot partitions active, e.g.,

	echo ',,,*' | sfdisk /dev/sda -N1
	echo ',,,*' | sfdisk /dev/sdb -N1

3. Apply the attached patch from Sergey Vlasov <vsu@xxxxxxxxxxx>, which
   I've forward-ported to grub-0.97, that allows one to map partitions
   as well as drives in device.map:

   (fd0)     /dev/fd0
   (hd0)    /dev/sda
   (hd1)    /dev/sdb
   (hd0,0) /dev/md1
   (hd1,0) /dev/md1

   [You can probably find other versions of the patch in ALTLinux
    grub SRPMS.]

4. Setup grub with the stage1 in the boot partition bootsector,
   which, since /dev/sd[ab] is automatically redirected to /dev/md1,
   updates both drives simultaneously and coherently.  As long as I
   patch GRUB before updating to a later version, no worries.

If one doesn't do this, one should be careful that the RAID1 doesn't
get out of sync.  When I have to use an unpatched GRUB without ms-sys,
I do the following:

1. umount /boot

2. mdadm --stop /dev/md1

3. grub-install or equivalent for sda, sdb, to get the MBR in place.

4. mdadm -A /dev/md1 /dev/sda1

5. mdadm /dev/md1 -a /dev/sdb1

6. mount /boot

There are circumstance under which the above will not work, but with
modern disks with linear addressing, blah, blah, it works fine.

Regards,

	Bill Rugolsky
--- grub-0.97/stage2/disk_io.c.alt-partition-map	2004-05-23 12:35:24.000000000 -0400
+++ grub-0.97/stage2/disk_io.c	2005-06-08 13:36:03.000000000 -0400
@@ -373,7 +373,8 @@
 	 embed a Stage 1.5 into a partition instead of a MBR, use system
 	 calls directly instead of biosdisk, because of the bug in
 	 Linux. *sigh*  */
-      return write_to_partition (device_map, current_drive, current_partition,
+      return write_to_partition (device_map, partition_map,
+				 current_drive, current_partition,
 				 sector, sector_count, buf);
     }
   else
--- grub-0.97/stage2/shared.h.alt-partition-map	2005-06-08 13:36:03.000000000 -0400
+++ grub-0.97/stage2/shared.h	2005-06-08 13:36:03.000000000 -0400
@@ -581,6 +581,8 @@
 extern char **device_map;
 /* The filename which stores the information about a device map.  */
 extern char *device_map_file;
+/* The map between drive/partition numbers and UNIX device file names.  */
+extern struct partition_map_entry *partition_map;
 /* The array of geometries.  */
 extern struct geometry *disks;
 /* Assign DRIVE to a device name DEVICE.  */
--- grub-0.97/lib/device.c.alt-partition-map	2005-03-27 18:14:25.000000000 -0500
+++ grub-0.97/lib/device.c	2005-06-08 13:36:03.000000000 -0400
@@ -131,6 +131,122 @@
 #include <shared.h>
 #include <device.h>
 
+#if defined(__linux__)
+/* The 2.6 kernel has removed all of the geometry handling for IDE drives
+ * that did fixups for LBA, etc.  This means that the geometry we get
+ * with the ioctl has a good chance of being wrong.  So, we get to 
+ * also know about partition tables and try to read what the geometry
+ * is there. *grumble*   Very closely based on code from cfdisk
+ */
+static void get_kernel_geometry(int fd, int *cyl, int *heads, int *sectors) {
+    struct hd_geometry hdg;
+    
+    if (ioctl (fd, HDIO_GETGEO, &hdg))
+        return;
+
+    *cyl = hdg.cylinders;
+    *heads = hdg.heads;
+    *sectors = hdg.sectors;
+}
+
+struct partition {
+        unsigned char boot_ind;         /* 0x80 - active */
+        unsigned char head;             /* starting head */
+        unsigned char sector;           /* starting sector */
+        unsigned char cyl;              /* starting cylinder */
+        unsigned char sys_ind;          /* What partition type */
+        unsigned char end_head;         /* end head */
+        unsigned char end_sector;       /* end sector */
+        unsigned char end_cyl;          /* end cylinder */
+        unsigned char start4[4];        /* starting sector counting from 0 */
+        unsigned char size4[4];         /* nr of sectors in partition */
+};
+
+#define ALIGNMENT 2
+typedef union {
+    struct {
+	unsigned char align[ALIGNMENT];
+	unsigned char b[SECTOR_SIZE];
+    } c;
+    struct {
+	unsigned char align[ALIGNMENT];
+	unsigned char buffer[0x1BE];
+	struct partition part[4];
+	unsigned char magicflag[2];
+    } p;
+} partition_table;
+
+#define PART_TABLE_FLAG0 0x55
+#define PART_TABLE_FLAG1 0xAA
+
+static void
+get_partition_table_geometry(partition_table *bufp, int *cyl, int *heads, 
+                             int *sectors) {
+    struct partition *p;
+    int i,h,s,hh,ss;
+    int first = 1;
+    int bad = 0;
+
+    if (bufp->p.magicflag[0] != PART_TABLE_FLAG0 ||
+	bufp->p.magicflag[1] != PART_TABLE_FLAG1) {
+	    /* Matthew Wilcox: slightly friendlier version of
+	       fatal(_("Bad signature on partition table"), 3);
+	    */
+            fprintf(stderr, "Unknown partition table signature\n");
+	    return;
+    }
+
+    hh = ss = 0;
+    for (i=0; i<4; i++) {
+	p = &(bufp->p.part[i]);
+	if (p->sys_ind != 0) {
+	    h = p->end_head + 1;
+	    s = (p->end_sector & 077);
+	    if (first) {
+		hh = h;
+		ss = s;
+		first = 0;
+	    } else if (hh != h || ss != s)
+		bad = 1;
+	}
+    }
+
+    if (!first && !bad) {
+	*heads = hh;
+	*sectors = ss;
+    }
+}
+
+static void get_linux_geometry (int fd, struct geometry *geom) {
+    int kern_cyl = 0, kern_head = 0, kern_sectors = 0;
+    int pt_cyl = 0, pt_head = 0, pt_sectors = 0;
+    partition_table bufp;
+
+    get_kernel_geometry(fd, &kern_cyl, &kern_head, &kern_sectors);
+
+    if (read(fd, bufp.c.b, SECTOR_SIZE) == SECTOR_SIZE) {
+        get_partition_table_geometry(&bufp, &pt_cyl, &pt_head, &pt_sectors);
+    } else {
+        fprintf(stderr, "Unable to read partition table: %s\n", strerror(errno));
+    }
+
+    if (pt_head && pt_sectors) {
+        int cyl_size;
+
+        geom->heads = pt_head;
+        geom->sectors = pt_sectors;
+        cyl_size = pt_head * pt_sectors;
+        geom->cylinders = geom->total_sectors/cyl_size;
+    } else {
+        geom->heads = kern_head;
+        geom->sectors = kern_sectors;
+        geom->cylinders = kern_cyl;
+    }
+
+    return;
+}
+#endif
+
 /* Get the geometry of a drive DRIVE.  */
 void
 get_drive_geometry (struct geometry *geom, char **map, int drive)
@@ -151,20 +267,16 @@
 #if defined(__linux__)
   /* Linux */
   {
-    struct hd_geometry hdg;
     unsigned long nr;
     
-    if (ioctl (fd, HDIO_GETGEO, &hdg))
-      goto fail;
-
     if (ioctl (fd, BLKGETSIZE, &nr))
       goto fail;
-    
-    /* Got the geometry, so save it. */
-    geom->cylinders = hdg.cylinders;
-    geom->heads = hdg.heads;
-    geom->sectors = hdg.sectors;
+
     geom->total_sectors = nr;
+    get_linux_geometry(fd, geom);
+
+    if (!geom->heads && !geom->cylinders && !geom->sectors)
+        goto fail;
     
     goto success;
   }
@@ -489,9 +601,27 @@
   return 1;
 }
 
+#ifdef __linux__
+/* Find device name for PARTITION on DRIVE in MAP. */
+static const char *
+find_device_for_partition (struct partition_map_entry *map,
+			   int drive, int partition)
+{
+  while (map)
+    {
+      if ((map->drive == drive) && (map->partition == partition))
+	return map->device_name;
+      map = map->next;
+    }
+
+  return NULL;
+}
+#endif /* __linux__ */
+
 /* Read mapping information from FP, and write it to MAP.  */
 static int
-read_device_map (FILE *fp, char **map, const char *map_file)
+read_device_map (FILE *fp, char **map, const char *map_file,
+		 struct partition_map_entry **partition_map)
 {
   auto void show_error (int no, const char *msg);
   auto void show_warning (int no, const char *msg, ...);
@@ -521,6 +651,9 @@
       char *ptr, *eptr;
       int drive;
       int is_floppy = 0;
+#ifdef __linux__
+      int partition = -1;
+#endif /* __linux__ */
       
       /* Increase the number of lines.  */
       line_number++;
@@ -571,6 +704,27 @@
       
       if (! is_floppy)
 	drive += 0x80;
+
+#ifdef __linux__
+      /* Check for a possible partition map entry.  */
+      if (*ptr == ',')
+	{
+	  if (is_floppy)
+	    {
+	      show_error (line_number,
+			  "Partitions on floppy drives are not allowed");
+	      return 0;
+	    }
+
+	  ptr++;
+	  partition = strtoul (ptr, &ptr, 10);
+	  if (partition < 0 || partition > 255) /* XXX: max value? */
+	    {
+	      show_error (line_number, "Bad partition number");
+	      return 0;
+	    }
+	}
+#endif /* __linux__ */
       
       if (*ptr != ')')
 	{
@@ -595,6 +749,35 @@
 	eptr++;
       *eptr = 0;
 
+#ifdef __linux__
+      if (partition != -1)
+	{
+	  struct partition_map_entry *new_entry;
+	  /* Multiple entries for a given partition is not allowed.  */
+	  if (find_device_for_partition (*partition_map, drive, partition))
+	    {
+	      show_error (line_number, "Duplicated entry found");
+	      return 0;
+	    }
+      
+	  /* Allocate a new partition map entry.  */
+	  new_entry = malloc (sizeof (struct partition_map_entry));
+	  assert (new_entry);
+      
+	  /* Fill the entry.  */
+	  new_entry->next = *partition_map;
+	  new_entry->drive = drive;
+	  new_entry->partition = partition;
+	  new_entry->device_name = strdup (ptr);
+	  assert (new_entry->device_name);
+      
+	  /* Place the new entry at the beginning of the list.  */
+	  *partition_map = new_entry;
+
+	  continue;
+	}
+#endif
+
       /* Multiple entries for a given drive is not allowed.  */
       if (map[drive])
 	{
@@ -616,7 +799,8 @@
    If it is zero, don't probe any floppy at all. If it is one, probe one
    floppy. If it is two, probe two floppies. And so on.  */
 int
-init_device_map (char ***map, const char *map_file, int floppy_disks)
+init_device_map (char ***map, struct partition_map_entry **partition_map,
+		 const char *map_file, int floppy_disks)
 {
   int i;
   int num_hd = 0;
@@ -641,7 +825,7 @@
 	{
 	  int ret;
 
-	  ret = read_device_map (fp, *map, map_file);
+	  ret = read_device_map (fp, *map, map_file, partition_map);
 	  fclose (fp);
 	  return ret;
 	}
@@ -812,7 +996,7 @@
 
 /* Restore the memory consumed for MAP.  */
 void
-restore_device_map (char **map)
+restore_device_map (char **map, struct partition_map_entry *partition_map)
 {
   int i;
 
@@ -821,6 +1005,17 @@
       free (map[i]);
 
   free (map);
+
+#ifdef __linux__
+  while (partition_map)
+    {
+      struct partition_map_entry *next = partition_map->next;
+      if (partition_map->device_name)
+	free (partition_map->device_name);
+      free (partition_map);
+      partition_map = next;
+    }
+#endif /* __linux__ */
 }
 
 #ifdef __linux__
@@ -839,10 +1034,12 @@
 }
 
 int
-write_to_partition (char **map, int drive, int partition,
+write_to_partition (char **map, struct partition_map_entry *partition_map,
+		    int drive, int partition,
 		    int sector, int size, const char *buf)
 {
   char dev[PATH_MAX];	/* XXX */
+  const char *partition_dev;
   int fd;
   
   if ((partition & 0x00FF00) != 0x00FF00)
@@ -852,16 +1049,27 @@
       errnum = ERR_DEV_VALUES;
       return 1;
     }
-  
-  assert (map[drive] != 0);
-  
-  strcpy (dev, map[drive]);
-  if (have_devfs ())
+
+  /* First try to find the entry in PARTITION_MAP.  */
+  partition_dev = find_device_for_partition (partition_map, drive,
+					     (partition >> 16) & 0xFF);
+  if (partition_dev)
+     {
+      strcpy (dev, partition_dev); /* XXX */
+     }
+  else
     {
-      if (strcmp (dev + strlen(dev) - 5, "/disc") == 0)
-	strcpy (dev + strlen(dev) - 5, "/part");
+      assert (map[drive] != 0);
+ 
+      strcpy (dev, map[drive]);
+      if (have_devfs ())
+	{
+ 	  if (strcmp (dev + strlen(dev) - 5, "/disc") == 0)
+ 	    strcpy (dev + strlen(dev) - 5, "/part");
+	}
+   
+        sprintf (dev + strlen(dev), "%d", ((partition >> 16) & 0xFF) + 1);
     }
-  sprintf (dev + strlen(dev), "%d", ((partition >> 16) & 0xFF) + 1);
   
   /* Open the partition.  */
   fd = open (dev, O_RDWR);
--- grub-0.97/lib/device.h.alt-partition-map	2004-05-23 12:35:00.000000000 -0400
+++ grub-0.97/lib/device.h	2005-06-08 13:37:58.000000000 -0400
@@ -32,17 +32,31 @@
 #define DEFAULT_HD_HEADS	128
 #define DEFAULT_HD_SECTORS	63
 
+
+struct partition_map_entry
+{
+  struct partition_map_entry *next;
+  int drive;
+  int partition;
+  char *device_name;
+};
+
 /* Function prototypes.  */
 extern void get_drive_geometry (struct geometry *geom, char **map, int drive);
 extern int check_device (const char *device);
-extern int init_device_map (char ***map, const char *map_file,
+extern int init_device_map (char ***map,
+			    struct partition_map_entry **part_map,
+			    const char *map_file,
 			    int no_floppies);
-extern void restore_device_map (char **map);
+extern void restore_device_map (char **map,
+				struct partition_map_entry *part_map);
 
 #ifdef __linux__
 extern int is_disk_device (char **map, int drive);
-extern int write_to_partition (char **map, int drive, int partition,
-			       int offset, int size, const char *buf);
+extern int write_to_partition (char **map,
+			       struct partition_map_entry *part_map,
+			       int drive, int partition,
+ 			       int offset, int size, const char *buf);
 #endif /* __linux__ */
 			       
 #endif /* DEVICE_MAP_HEADER */
--- grub-0.97/grub/asmstub.c.alt-partition-map	2005-06-08 13:36:03.000000000 -0400
+++ grub-0.97/grub/asmstub.c	2005-06-08 13:36:03.000000000 -0400
@@ -86,6 +86,9 @@
 /* The map between BIOS drives and UNIX device file names.  */
 char **device_map = 0;
 
+/* The map between drive/partition numbers and UNIX device file names.  */
+struct partition_map_entry *partition_map = 0;
+
 /* The jump buffer for exiting correctly.  */
 static jmp_buf env_for_exit;
 
@@ -155,7 +158,8 @@
   for (i = 0; i < NUM_DISKS; i++)
     disks[i].flags = -1;
 
-  if (! init_device_map (&device_map, device_map_file, floppy_disks))
+  if (! init_device_map (&device_map, &partition_map, device_map_file,
+			 floppy_disks))
     return 1;
   
   /* Check some invariants. */
@@ -213,8 +217,9 @@
     close (serial_fd);
   
   /* Release memory. */
-  restore_device_map (device_map);
+  restore_device_map (device_map, partition_map);
   device_map = 0;
+  partition_map = 0;
   free (disks);
   disks = 0;
   free (scratch);

[Index of Archives]     [Current Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [Yosemite Photos]     [KDE Users]     [Fedora Tools]     [Fedora Docs]

  Powered by Linux