Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 19 Jan 2012 23:58:25 +0400 (MSK)
From:      Dmitry Marakasov <amdmi3@FreeBSD.org>
To:        FreeBSD-gnats-submit@FreeBSD.org
Subject:   bin/164317: [patch] sbin/write: add multibyte character support
Message-ID:  <20120119195825.3233CA2D@hades.panopticon>
Resent-Message-ID: <201201192020.q0JKK8OT087773@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         164317
>Category:       bin
>Synopsis:       [patch] sbin/write: add multibyte character support
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Thu Jan 19 20:20:08 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Dmitry Marakasov
>Release:        FreeBSD 9.0-RC2 amd64
>Organization:
>Environment:
System: FreeBSD hades.panopticon 9.0-RC2 FreeBSD 9.0-RC2 #0: Tue Nov 29 07:18:03 MSK 2011 root@hades.panopticon:/usr/work/usr/src/sys/HADES amd64


>Description:
Currently write(1) doesn't handle utf8 locale at all:

    (this is Russian)
# echo "Проверка" | write amdmi3 pts/29

Message from amdmi3@hades.panopticon on pts/29 at 23:31 ...
M-PM-^_M-QM-^@M-PM->M-PM-2M-PM-5M-QM-^@M-PM-:M-PM-0
EOF

checks used in character printing routine (((*s & 0x80) && *s < 0xA0)) seem to assume specific encoding (for example, CP866 has letters in 0x80-0xA0), so this will not work correctly for even 8 bit locales.

The utility is easily convertable to wchar_t however, which should handle them all, and the patch for it is attached.

% (echo "Проверка"; echo "Some control characters: \b\t^[[D^[[C^[[A^[[B^[") | ./write amdmi3 pts/29

Message from amdmi3@hades.panopticon on pts/29 at 23:43 ...
Проверка
Some control characters: <0x8>  <0x1B>[D<0x1B>[C<0x1B>[A<0x1B>[B<0x1B>
EOF

The way of displaying non-printable characters is discussable, but since one can neither assume that locale is UTF nor that that wchar_t value is somehow linked to codepoint, it would be inappropriate to use notations like U+%X or \u%X or &#%d; and/or modify wchar_t with bitwise operations. Notation like <0x%X> however is charset-agnostic and pretty readable, so I think it's quite suitable here.

>How-To-Repeat:
>Fix:
Index: write.1
===================================================================
--- write.1	(revision 230334)
+++ write.1	(working copy)
@@ -107,7 +107,3 @@
 terminal, not the receiver's (which
 .Nm
 has no way of knowing).
-.Pp
-The
-.Nm
-utility does not recognize multibyte characters.
Index: write.c
===================================================================
--- write.c	(revision 230334)
+++ write.c	(working copy)
@@ -60,12 +60,14 @@
 #include <string.h>
 #include <unistd.h>
 #include <utmpx.h>
+#include <wchar.h>
+#include <wctype.h>
 
 void done(int);
 void do_write(char *, char *, uid_t);
 static void usage(void);
 int term_chk(char *, int *, time_t *, int);
-void wr_fputs(unsigned char *s);
+void wr_fputs(wchar_t *s);
 void search_utmp(char *, char *, char *, uid_t);
 int utmp_chk(char *, char *);
 
@@ -243,7 +245,8 @@
 	char *nows;
 	struct passwd *pwd;
 	time_t now;
-	char path[MAXPATHLEN], host[MAXHOSTNAMELEN], line[512];
+	char path[MAXPATHLEN], host[MAXHOSTNAMELEN];
+	wchar_t line[512];
 
 	/* Determine our login name before we reopen() stdout */
 	if ((login = getlogin()) == NULL) {
@@ -269,7 +272,7 @@
 	(void)printf("\r\n\007\007\007Message from %s@%s on %s at %s ...\r\n",
 	    login, host, mytty, nows + 11);
 
-	while (fgets(line, sizeof(line), stdin) != NULL)
+	while (fgetws(line, sizeof(line)/sizeof(wchar_t), stdin) != NULL)
 		wr_fputs(line);
 }
 
@@ -288,30 +291,20 @@
  *     turns \n into \r\n
  */
 void
-wr_fputs(unsigned char *s)
+wr_fputs(wchar_t *s)
 {
 
-#define	PUTC(c)	if (putchar(c) == EOF) err(1, NULL);
+#define	PUTC(c)	if (putwchar(c) == WEOF) err(1, NULL);
 
-	for (; *s != '\0'; ++s) {
-		if (*s == '\n') {
-			PUTC('\r');
-		} else if (((*s & 0x80) && *s < 0xA0) ||
-			   /* disable upper controls */
-			   (!isprint(*s) && !isspace(*s) &&
-			    *s != '\a' && *s != '\b')
-			  ) {
-			if (*s & 0x80) {
-				*s &= ~0x80;
-				PUTC('M');
-				PUTC('-');
-			}
-			if (iscntrl(*s)) {
-				*s ^= 0x40;
-				PUTC('^');
-			}
+	for (; *s != L'\0'; ++s) {
+		if (*s == L'\n') {
+			PUTC(L'\r');
+			PUTC(L'\n');
+		} else if (iswprint(*s) || iswspace(*s)) {
+			PUTC(*s);
+		} else {
+			wprintf(L"<0x%X>", *s);
 		}
-		PUTC(*s);
 	}
 	return;
 #undef PUTC
>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20120119195825.3233CA2D>