Blob Blame History Raw
---
 bashintl.h          |    3 +++
 builtins/printf.def |   21 +++++++++++++++++++++
 builtins/read.def   |   52 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/sh/strtrans.c   |   21 +++++++++++++++++++++
 locale.c            |   26 ++++++++++++++++++++++++++
 parse.y             |   42 +++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 163 insertions(+), 2 deletions(-)

--- bashintl.h
+++ bashintl.h	2018-11-29 08:14:56.165762022 +0000
@@ -43,6 +43,9 @@
 #  undef HAVE_SETLOCALE
 #endif
 
+int bash_mbcs_non_utf8;
+int bash_mb_cur_max;
+
 #if !defined (HAVE_SETLOCALE)
 #  define setlocale(cat, loc)
 #endif
--- builtins/printf.def
+++ builtins/printf.def	2018-11-29 08:18:21.445909015 +0000
@@ -247,6 +247,8 @@ printf_builtin (list)
 #if defined (HANDLE_MULTIBYTE)
   char mbch[25];		/* 25 > MB_LEN_MAX, plus can handle 4-byte UTF-8 and large Unicode characters*/
   int mbind, mblen;
+  size_t charlen;
+  mbstate_t mbs;
 #endif
 #if defined (ARRAY_VARS)
   int arrayflags;
@@ -334,6 +336,25 @@ printf_builtin (list)
 	  precision = fieldwidth = 0;
 	  have_fieldwidth = have_precision = 0;
 
+#if defined (HANDLE_MULTIBYTE)
+	  if (bash_mbcs_non_utf8)
+	    {
+	      memset (&mbs, 0, sizeof (mbs));
+	      charlen = mbrlen (fmt, bash_mb_cur_max, &mbs);
+	      if (charlen > 1 && charlen != (size_t)-2 && charlen != (size_t)-1)
+	        {
+	          while (charlen > 0)
+	            {
+	              PC (*fmt);
+	              fmt++;
+	              charlen--;
+	            }
+	          fmt--;
+	          continue;
+	        }
+	    }
+#endif
+
 	  if (*fmt == '\\')
 	    {
 	      fmt++;
--- builtins/read.def
+++ builtins/read.def	2018-11-29 08:14:56.165762022 +0000
@@ -188,6 +188,12 @@ read_builtin (list)
   char c;
   char *input_string, *orig_input_string, *ifs_chars, *prompt, *arrayname;
   char *e, *t, *t1, *ps2, *tofree;
+#if defined (HANDLE_MULTIBYTE)
+  static char mbch[25];
+  size_t charlen;
+  int mblen;
+  mbstate_t mbs;
+#endif
   struct stat tsb;
   SHELL_VAR *var;
   TTYSTRUCT ttattrs, ttset;
@@ -548,6 +554,7 @@ read_builtin (list)
 #endif
 
   ps2 = 0;
+  charlen = 0;
   for (print_ps2 = eof = retval = 0;;)
     {
       CHECK_ALRM;
@@ -589,14 +596,40 @@ read_builtin (list)
 
       reading = 1;
       CHECK_ALRM;
+#if defined (HANDLE_MULTIBYTE)
+      charlen = 0;
+      mblen = 0;
+      if (bash_mbcs_non_utf8)
+	{
+	  do
+	   {
+	     if (unbuffered_read)
+	       retval = posixly_correct ? zreadintr (fd, &c, 1) : zread (fd, &c, 1);
+	     else
+	       retval = posixly_correct ? zreadcintr (fd, &c) : zreadc (fd, &c);
+
+	     if (retval <= 0)
+	        break;
+
+	     mbch[mblen++] = c;
+	     memset (&mbs, 0, sizeof (mbs));
+	     charlen = mbrlen (mbch, mblen, &mbs);
+	   }
+	  while (charlen == (size_t)-2 && mblen < bash_mb_cur_max);
+	}
+      else
+	{
+#endif
       if (unbuffered_read == 2)
 	retval = posixly_correct ? zreadintr (fd, &c, 1) : zreadn (fd, &c, nchars - nr);
       else if (unbuffered_read)
 	retval = posixly_correct ? zreadintr (fd, &c, 1) : zread (fd, &c, 1);
       else
 	retval = posixly_correct ? zreadcintr (fd, &c) : zreadc (fd, &c);
+#if defined (HANDLE_MULTIBYTE)
+	}
+#endif
       reading = 0;
-
       if (retval <= 0)
 	{
 	  if (retval < 0 && errno == EINTR)
@@ -639,6 +672,11 @@ read_builtin (list)
 	    }
 	}
 
+#if defined (HANDLE_MULTIBYTE)
+      if (bash_mbcs_non_utf8 && charlen > 1)
+	goto add_char;
+#endif
+
       /* If the next character is to be accepted verbatim, a backslash
 	 newline pair still disappears from the input. */
       if (pass_next)
@@ -681,7 +719,19 @@ read_builtin (list)
 	}
 
 add_char:
+#if defined (HANDLE_MULTIBYTE)
+      if (bash_mbcs_non_utf8 && charlen > 1)
+	{
+	   for (mblen = 0; mblen < charlen; mblen++)
+	     input_string[i++] = mbch[mblen];
+	}
+      else
+	{
+#endif
       input_string[i++] = c;
+#if defined (HANDLE_MULTIBYTE)
+	}
+#endif
       CHECK_ALRM;
 
 #if defined (HANDLE_MULTIBYTE)
--- lib/sh/strtrans.c
+++ lib/sh/strtrans.c	2018-11-29 08:14:56.165762022 +0000
@@ -29,6 +29,7 @@
 #include <chartypes.h>
 
 #include "shell.h"
+#include "bashintl.h"
 
 #include "shmbchar.h"
 #include "shmbutil.h"
@@ -55,6 +56,10 @@ ansicstr (string, len, flags, sawc, rlen
   int c, temp;
   char *ret, *r, *s;
   unsigned long v;
+#if defined (HANDLE_MULTIBYTE)
+  size_t charlen;
+  mbstate_t mbs;
+#endif
 
   if (string == 0 || *string == '\0')
     return ((char *)NULL);
@@ -69,6 +74,22 @@ ansicstr (string, len, flags, sawc, rlen
 #endif
   for (r = ret, s = string; s && *s; )
     {
+#if defined (HANDLE_MULTIBYTE)
+      if (bash_mbcs_non_utf8)
+        {
+          memset (&mbs, 0, sizeof (mbs));
+          charlen = mbrlen (s, bash_mb_cur_max, &mbs);
+          if (charlen > 1 && charlen != (size_t)-2 && charlen != (size_t)-1)
+            {
+              while (charlen > 0)
+                {
+                  *r++ = *s++;
+                  charlen--;
+                }
+              continue;
+            }
+        }
+#endif
       c = *s++;
       if (c != '\\' || *s == '\0')
 	*r++ = c;
--- locale.c
+++ locale.c	2018-11-29 08:14:56.165762022 +0000
@@ -52,6 +52,7 @@ int locale_mb_cur_max;	/* value of MB_CU
 int locale_shiftstates;
 
 extern int dump_translatable_strings, dump_po_strings;
+extern int bash_mb_cur_max, bash_mbcs_non_utf8;
 
 /* The current locale when the program begins */
 static char *default_locale;
@@ -76,6 +77,26 @@ static int reset_locale_vars __P((void))
 static void locale_setblanks __P((void));
 static int locale_isutf8 __P((char *));
 
+static void
+set_mbcs_values ()
+{
+  /* Enhance the performance. */
+#if defined (HANDLE_MULTIBYTE)
+  bash_mb_cur_max = MB_CUR_MAX;
+#  if defined (HAVE_LANGINFO_CODESET)
+  if (strcasestr (nl_langinfo (CODESET), "JIS") != 0 && bash_mb_cur_max == 2)
+    bash_mbcs_non_utf8 = 1;
+  else
+    bash_mbcs_non_utf8 = 0;
+#  else
+    bash_mbcs_non_utf8 = 0;
+#  endif
+#else
+  bash_mb_cur_max = 1;
+  bash_mbcs_non_utf8 = 0;
+#endif
+}
+
 /* Set the value of default_locale and make the current locale the
    system default locale.  This should be called very early in main(). */
 void
@@ -94,6 +115,8 @@ set_default_locale ()
   default_locale = setlocale (LC_ALL, "");
   if (default_locale)
     default_locale = savestring (default_locale);
+
+  set_mbcs_values ();
 #endif /* HAVE_SETLOCALE */
   bindtextdomain (PACKAGE, LOCALEDIR);
   textdomain (PACKAGE);
@@ -150,6 +173,7 @@ set_default_locale_vars ()
     setlocale (LC_TIME, lc_all);
 #  endif /* LC_TIME */
 
+  set_mbcs_values ();
 #endif /* HAVE_SETLOCALE */
 
   val = get_string_value ("TEXTDOMAIN");
@@ -240,6 +264,7 @@ set_locale_var (var, value)
       if (lc_all == 0 || *lc_all == '\0')
 	{
 	  x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE"));
+	  set_mbcs_values ();
 	  locale_setblanks ();
 	  locale_mb_cur_max = MB_CUR_MAX;
 	  /* if setlocale() returns NULL, the locale is not changed */
@@ -388,6 +413,7 @@ reset_locale_vars ()
   t = setlocale (LC_TIME, get_locale_var ("LC_TIME"));
 #  endif
 
+  set_mbcs_values ();
   locale_setblanks ();  
   locale_mb_cur_max = MB_CUR_MAX;
   if (x)
--- parse.y
+++ parse.y	2018-11-29 08:14:56.165762022 +0000
@@ -2005,7 +2005,12 @@ read_a_line (remove_quoted_newline)
 {
   static char *line_buffer = (char *)NULL;
   static int buffer_size = 0;
-  int indx, c, peekc, pass_next;
+  int indx, c, peekc, pass_next, chari;
+#if defined (HANDLE_MULTIBYTE)
+  size_t charlen;
+  mbstate_t mbs;
+  static char char_buffer[10];
+#endif
 
 #if defined (READLINE)
   if (no_line_editing && SHOULD_PROMPT ())
@@ -2020,7 +2025,42 @@ read_a_line (remove_quoted_newline)
       /* Allow immediate exit if interrupted during input. */
       QUIT;
 
+#if defined (HANDLE_MULTIBYTE)
+      if (!bash_mbcs_non_utf8)
+        c = yy_getc ();
+      else
+        {
+          chari = 0;
+          charlen = 0;
+          while (chari < 10)
+            {
+              char_buffer[chari++] = c = yy_getc ();
+
+              if (c == EOF)
+                break;
+
+              memset (&mbs, 0, sizeof (mbs));
+              charlen = mbrlen (char_buffer, chari, &mbs);
+
+              if ((size_t)charlen != (size_t)-2 && (size_t)charlen != (size_t)-1)
+                break;
+            }
+
+          if (charlen != 1)
+            {
+              charlen = chari;
+              for (chari = 0; chari < charlen && char_buffer[chari] != EOF; chari++)
+                {
+	          RESIZE_MALLOCED_BUFFER (line_buffer, indx, 2, buffer_size, 128);
+	          line_buffer[indx++] = char_buffer[chari];
+                }
+              if (chari == charlen)
+                continue;
+            }
+        }
+#else
       c = yy_getc ();
+#endif
 
       /* Ignore null bytes in input. */
       if (c == 0)