Re: [PATCH] Fix console utf8 composing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jan Engelhardt wrote:
Hi,


the following patch had been posted around spring 2005, but has not since been included. It is about that hitting the keys <Compose><o><e> for example
did not produce the utf8 sequence for o-with-diaeresis, but the ASCII/ISO8859-1
char for o-w-diaeresis, as I seem to recall.
http://groups.google.de/group/linux.kernel/browse_thread/thread/e60f286969e83d99/55688a5aab326aa7?tvc=2&q=chris+(+%22utf-8%22+OR+%22utf8%22+)+console#55688a5aab326aa7

I am posting an updated version that I hope applies to 2.6.15-rc6.
Not sure what the correct X-Y-by: procedure is for this, as it originally
is not my work (see URL).

Looks like I did the same backporting independently. Please look at http://www.linuxfromscratch.org/~alexander/patches/linux-2.6.14-utf8_input-1.patch (that does apply to the latest kernels if you use patch -Np1 -l -i linux-2.6.14-utf8_input-1.patch). Differences between our versions are described below.

diff -Ppru linux-2.6.15-rc6-20051219230006/drivers/char/consolemap.c linux-2.6-AS22/drivers/char/consolemap.c
--- linux-2.6.15-rc6-20051219230006/drivers/char/consolemap.c	2005-12-11 13:42:23.000000000 +0100
+++ linux-2.6-AS22/drivers/char/consolemap.c	2005-12-19 21:53:25.000000000 +0100
@@ -178,6 +178,7 @@ struct uni_pagedir {
 	unsigned long	refcount;
 	unsigned long	sum;
 	unsigned char	*inverse_translations[4];
+	u16		*inverse_trans_unicode;
 	int		readonly;
 };
@@ -208,6 +209,40 @@ static void set_inverse_transl(struct vc
 	}
 }
+static void set_inverse_trans_unicode(struct vc_data *conp, + struct uni_pagedir *p)
+{
+	int i, j, k, glyph;
+	u16 **p1, *p2;
+	u16 *q;
+	

I'd insert the "if (!p) return;" statement here, for consistency with non-Unicode path.

+	q = p->inverse_trans_unicode;
+	if (!q) {
+		q = p->inverse_trans_unicode =
+			kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
+		if (!q)
+			return;
+	}
+	memset(q, 0, MAX_GLYPH * sizeof(u16));
+
+	for (i = 0; i < 32; i++) {
+		p1 = p->uni_pgdir[i];
+		if (!p1)
+			continue;
+		for (j = 0; j < 32; j++) {
+			p2 = p1[j];
+			if (!p2)
+				continue;
+			for (k = 0; k < 64; k++) {
+				glyph = p2[k];
+ if (glyph >= 0 && glyph < MAX_GLYPH + && q[glyph] < 32)
+		  			q[glyph] = (i << 11) + (j << 6) + k;
+			}
+		}
+	}
+}
+
 unsigned short *set_translate(int m, struct vc_data *vc)
 {
 	inv_translate[vc->vc_num] = m;
@@ -218,19 +253,29 @@ unsigned short *set_translate(int m, str
  * Inverse translation is impossible for several reasons:
  * 1. The font<->character maps are not 1-1.
  * 2. The text may have been written while a different translation map
- *    was active, or using Unicode.
+ *    was active.
  * Still, it is now possible to a certain extent to cut and paste non-ASCII.
  */
-unsigned char inverse_translate(struct vc_data *conp, int glyph)
+u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
 {
 	struct uni_pagedir *p;
+	int m;
 	if (glyph < 0 || glyph >= MAX_GLYPH)
 		return 0;
-	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) ||
-		 !p->inverse_translations[inv_translate[conp->vc_num]])
+	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
 		return glyph;
-	else
-		return p->inverse_translations[inv_translate[conp->vc_num]][glyph];
+	else if (use_unicode) {
+		if (!p->inverse_trans_unicode)
+			return glyph;
+		else
+			return p->inverse_trans_unicode[glyph];
+	} else {
+		m = inv_translate[conp->vc_num];
+		if (!p->inverse_translations[m])
+			return glyph;
+		else
+			return p->inverse_translations[m][glyph];
+	}
 }
static void update_user_maps(void)

There is probably a missed set_inverse_trans_unicode(vc_cons[i].d, p); in update_user_maps() just after set_inverse_transl(vc_cons[i].d, p, USER_MAP);

@@ -354,6 +399,10 @@ static void con_release_unimap(struct un
 		kfree(p->inverse_translations[i]);
 		p->inverse_translations[i] = NULL;
 	}
+	if (p->inverse_trans_unicode) {
+		kfree(p->inverse_trans_unicode);
+		p->inverse_trans_unicode = NULL;
+	}
 }
void con_free_unimap(struct vc_data *vc)
@@ -512,6 +561,7 @@ int con_set_unimap(struct vc_data *vc, u
for (i = 0; i <= 3; i++)
 		set_inverse_transl(vc, p, i); /* Update all inverse translations */
+	set_inverse_trans_unicode(vc, p);
return err;
 }
@@ -562,6 +612,7 @@ int con_set_default_unimap(struct vc_dat
for (i = 0; i <= 3; i++)
 		set_inverse_transl(vc, p, i);	/* Update all inverse translations */
+	set_inverse_trans_unicode(vc, p);
 	dflt = p;
 	return err;
 }
@@ -618,6 +669,19 @@ void con_protect_unimap(struct vc_data *
 		p->readonly = rdonly;
 }
+/* may be called during an interrupt */
+u32 conv_8bit_to_uni(unsigned char c)
+{
+ /* + * Always use USER_MAP. This function is used by the keyboard,
+	 * which shouldn't be affected by G0/G1 switching, etc.
+ * If the user map still contains default values, i.e. the + * direct-to-font mapping, then assume user is using Latin1.
+	 */
+	unsigned short uni = translations[USER_MAP][c];
+	return uni == (0xf000 | c) ? c : uni;
+}
+
 int
conv_uni_to_pc(struct vc_data *conp, long ucs) {
diff -Ppru linux-2.6.15-rc6-20051219230006/drivers/char/keyboard.c linux-2.6-AS22/drivers/char/keyboard.c
--- linux-2.6.15-rc6-20051219230006/drivers/char/keyboard.c	2005-12-19 21:53:05.000000000 +0100
+++ linux-2.6-AS22/drivers/char/keyboard.c	2005-12-19 21:53:25.000000000 +0100
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/consolemap.h>
 #include <linux/kbd_kern.h>
 #include <linux/kbd_diacr.h>
 #include <linux/vt_kern.h>
@@ -353,6 +354,15 @@ static void to_utf8(struct vc_data *vc,
My version of to_utf8() takes uint as a second argument and handles values beyonf 0xffff.

 	}
 }
+static void put_8bit(struct vc_data *vc, u8 c)
+{
+ if (kbd->kbdmode != VC_UNICODE || c < 32 || c == 127) + /* Don't translate control chars */
+		put_queue(vc, c);
+	else
+		to_utf8(vc, conv_8bit_to_uni(c));
+}
+
 /*
  * Called after returning from RAW mode or when changing consoles - recompute
  * shift_down[] and shift_state from key_down[] maybe called when keymap is
@@ -413,7 +423,7 @@ static unsigned char handle_diacr(struct
 	if (ch == ' ' || ch == d)
 		return d;
- put_queue(vc, d);
+	put_8bit(vc, d);
 	return ch;
 }
@@ -423,7 +433,7 @@ static unsigned char handle_diacr(struct
 static void fn_enter(struct vc_data *vc, struct pt_regs *regs)
 {
 	if (diacr) {
-		put_queue(vc, diacr);
+		put_8bit(vc, diacr);
 		diacr = 0;
 	}
 	put_queue(vc, 13);
@@ -632,7 +642,7 @@ static void k_self(struct vc_data *vc, u
 		diacr = value;
 		return;
 	}
-	put_queue(vc, value);
+	put_8bit(vc, value);
 }
/*

In k_shift(), there is a difference in the second argument of to_utf8(). My version doesn't have "& 0xffff" because of the abovementioned changes to this function.

diff -Ppru linux-2.6.15-rc6-20051219230006/drivers/char/selection.c linux-2.6-AS22/drivers/char/selection.c
--- linux-2.6.15-rc6-20051219230006/drivers/char/selection.c	2005-12-11 13:42:24.000000000 +0100
+++ linux-2.6-AS22/drivers/char/selection.c	2005-12-19 21:53:25.000000000 +0100
@@ -20,6 +20,7 @@
#include <asm/uaccess.h> +#include <linux/kbd_kern.h>
 #include <linux/vt_kern.h>
 #include <linux/consolemap.h>
 #include <linux/selection.h>
@@ -34,6 +35,7 @@ extern void poke_blanked_console(void);
 /* Variables for selection control. */
 /* Use a dynamic buffer, instead of static (Dec 1994) */
 struct vc_data *sel_cons;		/* must not be disallocated */
+static int use_unicode;
 static volatile int sel_start = -1; 	/* cleared by clear_selection */
 static int sel_end;
 static int sel_buffer_lth;
@@ -54,10 +56,8 @@ static inline void highlight_pointer(con
 	complement_pos(sel_cons, where);
 }
-static unsigned char
-sel_pos(int n)
-{
-	return inverse_translate(sel_cons, screen_glyph(sel_cons, n));
+static u16 sel_pos(int n) {
+    return inverse_translate(sel_cons, screen_glyph(sel_cons, n), use_unicode);
 }
/* remove the current selection highlight, if any,
@@ -86,8 +86,8 @@ static u32 inwordLut[8]={
   0xFF7FFFFF  /* latin-1 accented letters, not division sign */
 };
-static inline int inword(const unsigned char c) {
-	return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1;
+static inline int inword(const u16 c) {
+    return c > 0xff || ((inwordLut[c >> 5] >> (c & 0x1F)) & 1);
 }
/* set inwordLut contents. Invoked by ioctl(). */
@@ -108,13 +108,35 @@ static inline unsigned short limit(const
 	return (v > u) ? u : v;
 }
+/* stores the char in UTF8 and returns the number of bytes used (1-3) */
+int store_utf8(u16 c, char *p)  {
+	if (c < 0x80) {
+		/* 0******* */
+		p[0] = c;
+		return 1;
+	} else if (c < 0x800) {
+		/* 110***** 10****** */
+		p[0] = 0xc0 | (c >> 6);
+		p[1] = 0x80 | (c & 0x3f);
+		return 2;
+ 	} else {
+		/* 1110**** 10****** 10****** */
+		p[0] = 0xe0 | (c >> 12);
+		p[1] = 0x80 | ((c >> 6) & 0x3f);
+		p[2] = 0x80 | (c & 0x3f);
+		return 3;
+ 	}
+}
+
 /* set the current selection. Invoked by ioctl() or by kernel code. */
 int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty)
 {
 	struct vc_data *vc = vc_cons[fg_console].d;
 	int sel_mode, new_sel_start, new_sel_end, spc;
 	char *bp, *obp;
-	int i, ps, pe;
+        int i, ps, pe, multiplier;
+        u16 c;
+        struct kbd_struct *kbd = kbd_table + fg_console;
poke_blanked_console(); @@ -158,6 +180,7 @@ int set_selection(const struct tiocl_sel
 		clear_selection();
 		sel_cons = vc_cons[fg_console].d;
 	}
+	use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
switch (sel_mode)
 	{
@@ -240,7 +263,8 @@ int set_selection(const struct tiocl_sel
 	sel_end = new_sel_end;
/* Allocate a new buffer before freeing the old one ... */
-	bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL);
+	multiplier = use_unicode ? 3 : 1;  /* chars can take up to 3 bytes */
+	bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
 	if (!bp) {
 		printk(KERN_WARNING "selection: kmalloc() failed\n");
 		clear_selection();
@@ -251,8 +275,12 @@ int set_selection(const struct tiocl_sel
obp = bp;
 	for (i = sel_start; i <= sel_end; i += 2) {
-		*bp = sel_pos(i);
-		if (!isspace(*bp++))
+		c = sel_pos(i);
+		if (use_unicode)
+			bp += store_utf8(c, bp);
+		else
+			*bp++ = c;
+		if (!isspace(c))
 			obp = bp;
 		if (! ((i + 2) % vc->vc_size_row)) {
 			/* strip trailing blanks from line and add newline,
diff -Ppru linux-2.6.15-rc6-20051219230006/include/linux/consolemap.h linux-2.6-AS22/include/linux/consolemap.h
--- linux-2.6.15-rc6-20051219230006/include/linux/consolemap.h	2005-10-28 02:02:08.000000000 +0200
+++ linux-2.6-AS22/include/linux/consolemap.h	2005-12-19 21:53:25.000000000 +0100
@@ -10,6 +10,7 @@
struct vc_data; -extern unsigned char inverse_translate(struct vc_data *conp, int glyph);
+extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
 extern unsigned short *set_translate(int m, struct vc_data *vc);
 extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
+extern u32 conv_8bit_to_uni(unsigned char c);
#eof



Jan Engelhardt

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[Index of Archives]     [Kernel Newbies]     [Netfilter]     [Bugtraq]     [Photo]     [Stuff]     [Gimp]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Video 4 Linux]     [Linux for the blind]     [Linux Resources]
  Powered by Linux