/* $TOG: imConv.c /main/20 1998/06/17 15:46:41 kaleb $ */ /****************************************************************** Copyright 1991, 1992 by Fuji Xerox Co.,Ltd. Copyright 1993, 1994 by FUJITSU LIMITED Permission to use, copy, modify, distribute, and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Fuji Xerox Co.,Ltd. , and that the name of FUJITSU LIMITED not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. Fuji Xerox Co.,Ltd. , and FUJITSU LIMITED makes no representations about the suitability of this software for any purpose. It is provided "as is" without express or implied warranty. FUJI XEROX CO.,LTD. AND FUJITSU LIMITED DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL FUJI XEROX CO.,LTD. AND FUJITSU LIMITED BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. Auther: Kazunori Nishihara, Fuji Xerox Co.,Ltd. kaz@ssdev.ksp.fujixerox.co.jp Modifier: Takashi Fujiwara FUJITSU LIMITED fujiwara@a80.tech.yk.fujitsu.co.jp ******************************************************************/ /* $XFree86: xc/lib/X11/imConv.c,v 1.5.2.7 1999/04/10 05:44:38 dawes Exp $ */ #define NEED_EVENTS #include #include "Xlibint.h" #include "Xlcint.h" #include "Ximint.h" #include "XlcPubI.h" #define XK_PUBLISHING #include "X11/keysym.h" #ifdef XKB /* * rather than just call _XLookupString (i.e. the pre-XKB XLookupString) * do this because with XKB the event may have some funky modifiers that * _XLookupString doesn't grok. */ #include "XKBlib.h" #define XLOOKUPSTRING lookup_string #else #define XLOOKUPSTRING XLookupString #endif /* bit (1< 0 && keysym < 0x100) return keysym; else if (keysym > 0x1a0 && keysym < 0x200) return keysym_to_unicode_1a1_1ff[keysym - 0x1a1]; else if (keysym > 0x2a0 && keysym < 0x2ff) return keysym_to_unicode_2a1_2fe[keysym - 0x2a1]; else if (keysym > 0x3a1 && keysym < 0x3ff) return keysym_to_unicode_3a2_3fe[keysym - 0x3a2]; else if (keysym > 0x4a0 && keysym < 0x4e0) return keysym_to_unicode_4a1_4df[keysym - 0x4a1]; else if (keysym > 0x5ab && keysym < 0x5f3) return keysym_to_unicode_5ac_5f2[keysym - 0x5ac]; else if (keysym > 0x6a0 && keysym < 0x700) return keysym_to_unicode_6a1_6ff[keysym - 0x6a1]; else if (keysym > 0x7a0 && keysym < 0x7fa) return keysym_to_unicode_7a1_7f9[keysym - 0x7a1]; else if (keysym > 0x8a3 && keysym < 0x8ff) return keysym_to_unicode_8a4_8fe[keysym - 0x8a4]; else if (keysym > 0x9de && keysym < 0x9f9) return keysym_to_unicode_9df_9f8[keysym - 0x9df]; else if (keysym > 0xaa0 && keysym < 0xaff) return keysym_to_unicode_aa1_afe[keysym - 0xaa1]; else if (keysym > 0xcde && keysym < 0xcfb) return keysym_to_unicode_cdf_cfa[keysym - 0xcdf]; else if (keysym > 0xda0 && keysym < 0xdfa) return keysym_to_unicode_da1_df9[keysym - 0xda1]; else if (keysym > 0xe9f && keysym < 0xf00) return keysym_to_unicode_ea0_eff[keysym - 0xea0]; else if (keysym > 0x13bb && keysym < 0x13bf) return keysym_to_unicode_13bc_13be[keysym - 0x13bc]; else if (keysym > 0x209f && keysym < 0x20ad) return keysym_to_unicode_20a0_20ac[keysym - 0x20a0]; else return 0; } struct CodesetRec { unsigned long locale_code; char* locale_name; char* escape_seq; }; #define sLatin1 0L #define sLatin2 1L #define sLatin3 2L #define sLatin4 3L #define sKana 4L #define sX0201 0x01000004L #define sArabic 5L #define sCyrillic 6L #define sKoi8 0x01000006L #define sGreek 7L #define sHebrew 12L #define sThai 13L #define sKorean 14L #define sLatin5 15L #define sLatin6 16L #define sLatin7 17L #define sLatin8 18L #define sLatin9 19L #define sCurrency 32L #define sUTF8 0x02000000L static struct CodesetRec CodesetTable[] = { {sLatin1, "ISO8859-1", "\033-A"}, {sLatin2, "ISO8859-2", "\033-B"}, {sLatin3, "ISO8859-3", "\033-C"}, {sLatin4, "ISO8859-4", "\033-D"}, {sCyrillic, "ISO8859-5", "\033-L"}, {sArabic, "ISO8859-6", "\033-G"}, {sGreek, "ISO8859-7", "\033-F"}, {sHebrew, "ISO8859-8", "\033-H"}, {sLatin5, "ISO8859-9", "\033-M"}, {sLatin6, "ISO8859-10", "\033-V"}, {sThai, "TACTIS", "\033-T"}, {sKorean, "ko.euc", "\033$(C"}, {sThai, "ISO8859-11", "\033-T"}, #if 0 {sLatin8, "ISO8859-12", "\033-?"},/* Celtic, superceded by -14 */ {sLatin7, "ISO8859-13", "\033-?"},/* Baltic Rim */ {sLatin8 "ISO8859-14", "\033-?"},/* Celtic */ #endif {sUTF8, "utf8", "\033%B"}, /* Non-standard */ {sKoi8, "KOI8-R", "\033%/1\200\210koi8-r\002"}, {sLatin9, "ISO8859-15", "\033%/1\200\213iso8859-15\002"},/* a.k.a. Latin-0 */ }; #define NUM_CODESETS sizeof CodesetTable / sizeof CodesetTable[0] #ifndef XK_emdash #define XK_emdash 0xaa9 #endif /* ================================================================ */ /* File: ConvertUTF.C Author: Mark E. Davis Copyright (C) 1994 Taligent, Inc. All rights reserved. This code is copyrighted. Under the copyright laws, this code may not be copied, in whole or part, without prior written consent of Taligent. Taligent grants the right to use or reprint this code as long as this ENTIRE copyright notice is reproduced in the code or reproduction. The code is provided AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY NOT APPLY TO YOU. RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the government is subject to restrictions as set forth in subparagraph (c)(l)(ii) of the Rights in Technical Data and Computer Software clause at DFARS 252.227-7013 and FAR 52.227-19. This code may be protected by one or more U.S. and International Patents. TRADEMARKS: Taligent and the Taligent Design Mark are registered trademarks of Taligent, Inc. */ /* ================================================================ */ #define kReplacementCharacter 0x0000FFFDUL #define kMaximumUCS2 0x0000FFFFUL #define kMaximumUCS4 0x7FFFFFFFUL typedef enum { ok, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ targetExhausted /* insuff. room in target for conversion */ } ConversionResult; #define halfShift 10 #define halfBase 0x0010000UL #define halfMask 0x3FFUL #define kSurrogateHighStart 0xD800UL #define kSurrogateHighEnd 0xDBFFUL #define kSurrogateLowStart 0xDC00UL #define kSurrogateLowEnd 0xDFFFUL typedef unsigned int UCS4; /* wchar_t, but on AIX, SunOS wchar_t is 16 bits */ typedef unsigned char UTF8; static UCS4 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; static char bytesFromUTF8[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; static UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static ConversionResult ConvertUCS4toUTF8 ( UCS4** sourceStart, UCS4* sourceEnd, UTF8** targetStart, UTF8* targetEnd) { ConversionResult result = ok; register UCS4* source = *sourceStart; register UTF8* target = *targetStart; while (source < sourceEnd) { register UCS4 ch; register unsigned short bytesToWrite = 0; register const UCS4 byteMask = 0xBF; register const UCS4 byteMark = 0x80; ch = *source++; if (ch >= kSurrogateHighStart && ch <= kSurrogateHighEnd && source < sourceEnd) { register UCS4 ch2 = *source; if (ch2 >= kSurrogateLowStart && ch2 <= kSurrogateLowEnd) { ch = ((ch - kSurrogateHighStart) << halfShift) + (ch2 - kSurrogateLowStart) + halfBase; ++source; }; }; if (ch < 0x80) { bytesToWrite = 1; } else if (ch < 0x800) { bytesToWrite = 2; } else if (ch < 0x10000) { bytesToWrite = 3; } else if (ch < 0x200000) { bytesToWrite = 4; } else if (ch < 0x4000000) { bytesToWrite = 5; } else if (ch <= kMaximumUCS4){ bytesToWrite = 6; } else { bytesToWrite = 2; ch = kReplacementCharacter; }; /* I wish there were a smart way to avoid this conditional */ target += bytesToWrite; if (target > targetEnd) { target -= bytesToWrite; result = targetExhausted; break; }; switch (bytesToWrite) { /* note: code falls through cases! */ case 6: *--target = (ch | byteMark) & byteMask; ch >>= 6; case 5: *--target = (ch | byteMark) & byteMask; ch >>= 6; case 4: *--target = (ch | byteMark) & byteMask; ch >>= 6; case 3: *--target = (ch | byteMark) & byteMask; ch >>= 6; case 2: *--target = (ch | byteMark) & byteMask; ch >>= 6; case 1: *--target = ch | firstByteMark[bytesToWrite]; }; target += bytesToWrite; }; *sourceStart = source; *targetStart = target; return result; } /*ARGSUSED*/ int #if NeedFunctionPrototypes _XGetCharCode ( unsigned long locale_code, KeySym keysym, unsigned char* buf, int nbytes) #else _XGetCharCode (locale_code, keysym, buf, nbytes) unsigned long locale_code; KeySym keysym; unsigned char *buf; int nbytes; #endif { unsigned long kset; int count,isLatin1; if (locale_code == sUTF8) { unsigned int ucs4[2]; unsigned int* ucs4vec[1]; unsigned char* utf8vec[1]; ucs4[0] = keysym_to_ucs4 (keysym); ucs4[1] = 0; ucs4vec[0] = ucs4; utf8vec[0] = buf; (void) ConvertUCS4toUTF8 (ucs4vec, &ucs4[1], utf8vec, &buf[nbytes]); return (strlen ((char*) buf)); } kset = locale_code&0xffffff; isLatin1 = ((keysym&0xffffff00)==0); count = 0; if ( keysym == NoSymbol ) return 0; else if ((keysym >> 8) == kset) { count = 1; switch (kset) { case sKana: *buf = (unsigned char)(keysym & 0xff); if (buf[0] == 0x7e) count = 0; break; case sCyrillic: if (locale_code == sKoi8) *buf = _Xkoi8[keysym & 0x7f]; else *buf = _Xcyrillic[keysym & 0x7f]; break; case sGreek: *buf = _Xgreek[keysym & 0x7f]; if (!buf[0]) count = 0; break; default: *buf = (unsigned char)(keysym & 0xff); break; } } else if ((locale_code != 0) && (isLatin1) && (keysym & 0x80)) { if (_Xlatin1[keysym & 0x7f] & (1 << kset)) { /* Most non-latin1 locales use some latin-1 upper half keysyms as defined by bitpatterns in array latin1. Enforce it. */ *buf = (unsigned char)(keysym & 0xff); count = 1; } else { count= 1; if ((locale_code == sHebrew) && (keysym == XK_multiply)) *buf = (unsigned char)0xaa; else if ((locale_code == sHebrew) && (keysym == XK_division)) *buf = (unsigned char)0xba; else if ((locale_code == sCyrillic) && (keysym == XK_section)) *buf = (unsigned char)0xfd; else if ((locale_code == sX0201) && (keysym == XK_yen)) *buf = (unsigned char)0x5c; else count = 0; } } else if (isLatin1) { if ((locale_code == sX0201) && ((keysym == XK_backslash) || (keysym == XK_asciitilde))) count = 0; if ( (keysym&0x80)==0 ) { *buf = (unsigned char)(keysym&0x7f); count = 1; } } else if ((keysym >> 8) == sLatin2) { count = 1; if ((keysym & 0x80) && (_Xlatin2[keysym & 0x7f] & (1 << kset))) *buf = (unsigned char)(keysym & 0xff); else if (locale_code == sLatin5) { if (keysym == XK_Scedilla) *buf = (unsigned char)0xde; else if (keysym == XK_scedilla) *buf = (unsigned char)0xfe; else count = 0; } else if (locale_code == sLatin9) { if (keysym == XK_Scaron) *buf = (unsigned char)0xa6; else if (keysym == XK_scaron) *buf = (unsigned char)0xa8; else if (keysym == XK_Zcaron) *buf = (unsigned char)0xb4; else if (keysym == XK_zcaron) *buf = (unsigned char)0xb8; else count = 0; } else count = 0; } else if ((keysym >> 8) == sLatin3) { if (locale_code == sLatin5) { count = 1; switch (keysym) { case XK_Gbreve: *buf = (unsigned char)0xd0; break; case XK_gbreve: *buf = (unsigned char)0xf0; break; case XK_Scedilla: *buf = (unsigned char)0xde; break; case XK_scedilla: *buf = (unsigned char)0xfe; break; case XK_Iabovedot: *buf = (unsigned char)0xdd; break; case XK_idotless: *buf = (unsigned char)0xfd; break; default: count = 0; } } } else if ((keysym >> 8) == sLatin4) { if (locale_code == sLatin6) { count = 1; switch (keysym) { case XK_Emacron: *buf = (unsigned char)0xa2; break; case XK_Gcedilla: *buf = (unsigned char)0xa3; break; case XK_Imacron: *buf = (unsigned char)0xa4; break; case XK_Lcedilla: *buf = (unsigned char) 0xa8; break; case XK_Dstroke: *buf = (unsigned char)0xa9; break; case XK_Scaron: *buf = (unsigned char)0xaa; break; case XK_Tslash: *buf = (unsigned char)0xab; break; case XK_Zcaron: *buf = (unsigned char)0xac; break; case XK_Umacron: *buf = (unsigned char)0xae; break; case XK_Utilde: *buf = (unsigned char)0xd7; break; case XK_ENG: *buf = (unsigned char)0xaf; break; case XK_emacron: *buf = (unsigned char)0xb2; break; case XK_gcedilla: *buf = (unsigned char)0xb3; break; case XK_imacron: *buf = (unsigned char)0xb4; break; case XK_lcedilla: *buf = (unsigned char) 0xb8; break; case XK_dstroke: *buf = (unsigned char)0xb9; break; case XK_scaron: *buf = (unsigned char)0xba; break; case XK_tslash: *buf = (unsigned char)0xbb; break; case XK_zcaron: *buf = (unsigned char)0xbc; break; case XK_umacron: *buf = (unsigned char)0xbe; break; case XK_utilde: *buf = (unsigned char)0xf7; break; case XK_eng: *buf = (unsigned char)0xbf; break; case XK_kra: *buf = (unsigned char)0xff; break; case XK_Itilde: case XK_Kcedilla: case XK_Iogonek: case XK_Ncedilla: case XK_Omacron: case XK_Uogonek: case XK_itilde: case XK_kcedilla: case XK_iogonek: case XK_ncedilla: case XK_omacron: case XK_uogonek: *buf = (unsigned char)(keysym & 0xff); break; default: count = 0; } } } else if (locale_code == sLatin9 && keysym == XK_EuroSign) { count = 1; *buf = (unsigned char)0xa4; } else if ((locale_code == sGreek) && ((keysym == XK_leftsinglequotemark) || (keysym == XK_rightsinglequotemark))) { *buf = (unsigned char)(keysym - (XK_leftsinglequotemark - 0xa1)); count = 1; } if (count>nbytes) return nbytes; if (countdisplay); XkbSetXlibControls (event->display, XkbLC_ForceLatin1Lookup, XkbLC_ForceLatin1Lookup); ret = XLookupString(event, (char *)buffer, nbytes, keysym, status); XkbSetXlibControls (event->display, ctrls, ctrls); return ret; } #endif #define BUF_SIZE (20) int _XimLookupMBText(ic, event, buffer, nbytes, keysym, status) Xic ic; XKeyEvent* event; char* buffer; int nbytes; KeySym* keysym; XComposeStatus* status; { int count, local_count; KeySym symbol; struct CodesetRec *cset; int i; unsigned char c; Status dummy; Xim im = (Xim)ic->core.im; XLCd lcd = im->core.lcd; unsigned char local_buf[BUF_SIZE]; unsigned char look[BUF_SIZE]; /* force a latin-1 lookup for compatibility */ count = XLOOKUPSTRING(event, (char *)buffer, nbytes, &symbol, status); if (keysym != NULL) *keysym = symbol; if ((nbytes == 0) || (symbol == NoSymbol)) return count; for (cset = NULL, i = 0; i < NUM_CODESETS; i++) { if (strcmp (XLC_PUBLIC(lcd,encoding_name), CodesetTable[i].locale_name) == 0) { cset = &CodesetTable[i]; break; } } if (count == 0 && cset != NULL || (count == 1 && (symbol > 0x7f && symbol < 0xff00) && cset != NULL && cset->locale_code != 0)) { if ((count = _XGetCharCode(cset->locale_code, symbol, look, sizeof look))) { strcpy((char*) local_buf, cset->escape_seq); local_count = strlen(cset->escape_seq); local_buf[local_count] = look[0]; local_count++; local_buf[local_count] = '\0'; if ((count = im->methods->ctstombs(ic->core.im, (char*) local_buf, local_count, (char *)buffer, nbytes, &dummy)) < 0) { count = 0; } } } else if (count > 1) { /* not ASCII Encoding */ memcpy(look, (char *)buffer,count); look[count] = '\0'; if ((count = im->methods->ctstombs(ic->core.im, (char*) look, count, buffer, nbytes, &dummy)) < 0) { count = 0; } } /* * we should make sure that if the character is a Latin1 character * and it's on the right side, and we're in a non-Latin1 locale * that this is a valid Latin1 character for this locale. */ return count; } int _XimLookupWCText(ic, event, buffer, nbytes, keysym, status) Xic ic; XKeyEvent* event; wchar_t* buffer; int nbytes; KeySym* keysym; XComposeStatus* status; { int count, local_count; KeySym symbol; struct CodesetRec *cset; int i; unsigned char c; Status dummy; Xim im = (Xim)ic->core.im; XLCd lcd = im->core.lcd; unsigned char local_buf[BUF_SIZE]; unsigned char look[BUF_SIZE]; /* force a latin-1 lookup for compatibility */ count = XLOOKUPSTRING(event, (char *)look, nbytes, &symbol, status); if (keysym != NULL) *keysym = symbol; if ((nbytes == 0) || (symbol == NoSymbol)) return count; for (cset = NULL, i = 0; i < NUM_CODESETS; i++) { if (strcmp (XLC_PUBLIC(lcd,encoding_name), CodesetTable[i].locale_name) == 0) { cset = &CodesetTable[i]; break; } } if (count == 0 && cset != NULL || (count == 1 && (symbol > 0x7f && symbol < 0xff00) && cset != NULL && cset->locale_code != 0)) { if ((count = _XGetCharCode(cset->locale_code, symbol, look, sizeof look))) { strcpy((char*) local_buf, cset->escape_seq); local_count = strlen(cset->escape_seq); local_buf[local_count] = look[0]; local_count++; local_buf[local_count] = '\0'; if ((count = im->methods->ctstowcs(ic->core.im, (char*) local_buf, local_count, buffer, nbytes, &dummy)) < 0) { count = 0; } } } else if (count > 1) { if ((count = im->methods->ctstowcs(ic->core.im, (char*) look, count, buffer, nbytes, &dummy)) < 0) { count = 0; } } else /* * we should make sure that if the character is a Latin1 character * and it's on the right side, and we're in a non-Latin1 locale * that this is a valid Latin1 character for this locale. */ buffer[0] = look[0]; return count; }