utf8.c   utf8.c 
/* ======================================================================== /* ========================================================================
* Copyright 1988-2007 University of Washington * Copyright 1988-2008 University of Washington
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* *
* ======================================================================== * ========================================================================
*/ */
skipping to change at line 26 skipping to change at line 26
* *
* Author: Mark Crispin * Author: Mark Crispin
* Networks and Distributed Computing * Networks and Distributed Computing
* Computing & Communications * Computing & Communications
* University of Washington * University of Washington
* Administration Building, AG-44 * Administration Building, AG-44
* Seattle, WA 98195 * Seattle, WA 98195
* Internet: MRC@CAC.Washington.EDU * Internet: MRC@CAC.Washington.EDU
* *
* Date: 11 June 1997 * Date: 11 June 1997
* Last Edited: 14 November 2007 * Last Edited: 17 January 2008
*/ */
#include <stdio.h> #include <stdio.h>
#include <ctype.h> #include <ctype.h>
#include "c-client.h" #include "c-client.h"
/* *** IMPORTANT *** /* *** IMPORTANT ***
* *
* There is a very important difference between "character set" and "chars et", * There is a very important difference between "character set" and "chars et",
* and the comments in this file reflect these differences. A "character s et" * and the comments in this file reflect these differences. A "character s et"
* (also known as "coded character set") is a mapping between codepoints an d * (also known as "coded character set") is a mapping between codepoints an d
skipping to change at line 105 skipping to change at line 105
#endif #endif
#ifdef KSCTOUNICODE /* Korean */ #ifdef KSCTOUNICODE /* Korean */
static const struct utf8_eucparam ksc_param = { static const struct utf8_eucparam ksc_param = {
BASE_KSC5601_KU,BASE_KSC5601_TEN,MAX_KSC5601_KU,MAX_KSC5601_TEN, BASE_KSC5601_KU,BASE_KSC5601_TEN,MAX_KSC5601_KU,MAX_KSC5601_TEN,
(void *) ksc5601tab}; (void *) ksc5601tab};
#endif #endif
/* List of supported charsets */ /* List of supported charsets */
static const CHARSET utf8_csvalid[] = { static const CHARSET utf8_csvalid[] = {
{"US-ASCII",CT_ASCII,NIL,NIL,NIL}, {"US-ASCII",CT_ASCII,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"UTF-8",CT_UTF8,NIL,SC_UNICODE,NIL}, NIL,NIL,NIL},
{"UTF-7",CT_UTF7,NIL,SC_UNICODE,"UTF-8"}, {"UTF-8",CT_UTF8,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-1",CT_1BYTE0,NIL,SC_LATIN_1,NIL}, NIL,SC_UNICODE,NIL},
{"ISO-8859-2",CT_1BYTE,(void *) iso8859_2tab,SC_LATIN_2,NIL}, {"UTF-7",CT_UTF7,CF_PRIMARY | CF_POSTING | CF_UNSUPRT,
{"ISO-8859-3",CT_1BYTE,(void *) iso8859_3tab,SC_LATIN_3,NIL}, NIL,SC_UNICODE,"UTF-8"},
{"ISO-8859-4",CT_1BYTE,(void *) iso8859_4tab,SC_LATIN_4,NIL}, {"ISO-8859-1",CT_1BYTE0,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-5",CT_1BYTE,(void *) iso8859_5tab,SC_CYRILLIC,"KOI8-R"}, NIL,SC_LATIN_1,NIL},
{"ISO-8859-6",CT_1BYTE,(void *) iso8859_6tab,SC_ARABIC,NIL}, {"ISO-8859-2",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-7",CT_1BYTE,(void *) iso8859_7tab,SC_GREEK,NIL}, (void *) iso8859_2tab,SC_LATIN_2,NIL},
{"ISO-8859-8",CT_1BYTE,(void *) iso8859_8tab,SC_HEBREW,NIL}, {"ISO-8859-3",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-9",CT_1BYTE,(void *) iso8859_9tab,SC_LATIN_5,NIL}, (void *) iso8859_3tab,SC_LATIN_3,NIL},
{"ISO-8859-10",CT_1BYTE,(void *) iso8859_10tab,SC_LATIN_6,NIL}, {"ISO-8859-4",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-11",CT_1BYTE,(void *) iso8859_11tab,SC_THAI,NIL}, (void *) iso8859_4tab,SC_LATIN_4,NIL},
{"ISO-8859-5",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_5tab,SC_CYRILLIC,"KOI8-R"},
{"ISO-8859-6",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_6tab,SC_ARABIC,NIL},
{"ISO-8859-7",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_7tab,SC_GREEK,NIL},
{"ISO-8859-8",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_8tab,SC_HEBREW,NIL},
{"ISO-8859-9",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_9tab,SC_LATIN_5,NIL},
{"ISO-8859-10",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_10tab,SC_LATIN_6,NIL},
{"ISO-8859-11",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_11tab,SC_THAI,NIL},
#if 0 /* ISO 8859-12 reserved for ISCII(?) */ #if 0 /* ISO 8859-12 reserved for ISCII(?) */
{"ISO-8859-12",CT_1BYTE,(void *) iso8859_12tab,NIL,NIL}, {"ISO-8859-12",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) iso8859_12tab,NIL,NIL},
#endif #endif
{"ISO-8859-13",CT_1BYTE,(void *) iso8859_13tab,SC_LATIN_7,NIL}, {"ISO-8859-13",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-14",CT_1BYTE,(void *) iso8859_14tab,SC_LATIN_8,NIL}, (void *) iso8859_13tab,SC_LATIN_7,NIL},
{"ISO-8859-15",CT_1BYTE,(void *) iso8859_15tab,SC_LATIN_9,NIL}, {"ISO-8859-14",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"ISO-8859-16",CT_1BYTE,(void *) iso8859_16tab,SC_LATIN_10,NIL}, (void *) iso8859_14tab,SC_LATIN_8,NIL},
{"KOI8-R",CT_1BYTE,(void *) koi8rtab,SC_CYRILLIC,NIL}, {"ISO-8859-15",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"KOI8-U",CT_1BYTE,(void *) koi8utab,SC_CYRILLIC | SC_UKRANIAN,NIL}, (void *) iso8859_15tab,SC_LATIN_9,NIL},
{"KOI8-RU",CT_1BYTE,(void *) koi8utab,SC_CYRILLIC | SC_UKRANIAN,"KOI8-U"} {"ISO-8859-16",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
, (void *) iso8859_16tab,SC_LATIN_10,NIL},
{"TIS-620",CT_1BYTE,(void *) tis620tab,SC_THAI,"ISO-8859-11"}, {"KOI8-R",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"VISCII",CT_1BYTE8,(void *) visciitab,SC_VIETNAMESE,NIL}, (void *) koi8rtab,SC_CYRILLIC,NIL},
{"KOI8-U",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) koi8utab,SC_CYRILLIC | SC_UKRANIAN,NIL},
{"KOI8-RU",CT_1BYTE,CF_DISPLAY,
(void *) koi8utab,SC_CYRILLIC | SC_UKRANIAN,"KOI8-U"},
{"TIS-620",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) tis620tab,SC_THAI,"ISO-8859-11"},
{"VISCII",CT_1BYTE8,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) visciitab,SC_VIETNAMESE,NIL},
#ifdef GBTOUNICODE #ifdef GBTOUNICODE
{"GBK",CT_DBYTE,(void *) &gb_param,SC_CHINESE_SIMPLIFIED,NIL}, {"GBK",CT_DBYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"GB2312",CT_DBYTE,(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"}, (void *) &gb_param,SC_CHINESE_SIMPLIFIED,NIL},
{"CN-GB",CT_DBYTE,(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"}, {"GB2312",CT_DBYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},
{"CN-GB",CT_DBYTE,CF_DISPLAY,
(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},
#ifdef CNS1TOUNICODE #ifdef CNS1TOUNICODE
{"ISO-2022-CN",CT_2022,NIL,SC_CHINESE_SIMPLIFIED | SC_CHINESE_TRADITIONAL {"ISO-2022-CN",CT_2022,CF_PRIMARY | CF_UNSUPRT,
, NIL,SC_CHINESE_SIMPLIFIED | SC_CHINESE_TRADITIONAL,
NIL}, NIL},
#endif #endif
#endif #endif
#ifdef GB12345TOUNICODE #ifdef GB12345TOUNICODE
{"CN-GB-12345",CT_DBYTE,(void *) &gbt_param,SC_CHINESE_TRADITIONAL,"BIG5" {"CN-GB-12345",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,
}, (void *) &gbt_param,SC_CHINESE_TRADITIONAL,"BIG5"},
#endif #endif
#ifdef BIG5TOUNICODE #ifdef BIG5TOUNICODE
{"BIG5",CT_DBYTE2,(void *) big5_param,SC_CHINESE_TRADITIONAL,NIL}, {"BIG5",CT_DBYTE2,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"CN-BIG5",CT_DBYTE2,(void *) big5_param,SC_CHINESE_TRADITIONAL,"BIG5"}, (void *) big5_param,SC_CHINESE_TRADITIONAL,NIL},
{"BIG-5",CT_DBYTE2,(void *) big5_param,SC_CHINESE_TRADITIONAL,"BIG5"}, {"CN-BIG5",CT_DBYTE2,CF_DISPLAY,
(void *) big5_param,SC_CHINESE_TRADITIONAL,"BIG5"},
{"BIG-5",CT_DBYTE2,CF_DISPLAY,
(void *) big5_param,SC_CHINESE_TRADITIONAL,"BIG5"},
#endif #endif
#ifdef JISTOUNICODE #ifdef JISTOUNICODE
{"ISO-2022-JP",CT_2022,NIL,SC_JAPANESE,NIL}, {"ISO-2022-JP",CT_2022,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"EUC-JP",CT_EUC,(void *) jis_param,SC_JAPANESE,"ISO-2022-JP"}, NIL,SC_JAPANESE,NIL},
{"SHIFT_JIS",CT_SJIS,NIL,SC_JAPANESE,"ISO-2022-JP"}, {"EUC-JP",CT_EUC,CF_PRIMARY | CF_DISPLAY,
{"SHIFT-JIS",CT_SJIS,NIL,SC_JAPANESE,"ISO-2022-JP"}, (void *) jis_param,SC_JAPANESE,"ISO-2022-JP"},
{"SHIFT_JIS",CT_SJIS,CF_PRIMARY | CF_DISPLAY,
NIL,SC_JAPANESE,"ISO-2022-JP"},
{"SHIFT-JIS",CT_SJIS,CF_PRIMARY | CF_DISPLAY,
NIL,SC_JAPANESE,"ISO-2022-JP"},
#ifdef JIS0212TOUNICODE #ifdef JIS0212TOUNICODE
{"ISO-2022-JP-1",CT_2022,NIL,SC_JAPANESE,"ISO-2022-JP"}, {"ISO-2022-JP-1",CT_2022,CF_UNSUPRT,
NIL,SC_JAPANESE,"ISO-2022-JP"},
#ifdef GBTOUNICODE #ifdef GBTOUNICODE
#ifdef KSCTOUNICODE #ifdef KSCTOUNICODE
{"ISO-2022-JP-2",CT_2022,NIL, {"ISO-2022-JP-2",CT_2022,CF_UNSUPRT,
NIL,
SC_LATIN_1 | SC_LATIN_2 | SC_LATIN_3 | SC_LATIN_4 | SC_LATIN_5 | SC_LATIN_1 | SC_LATIN_2 | SC_LATIN_3 | SC_LATIN_4 | SC_LATIN_5 |
SC_LATIN_6 | SC_LATIN_7 | SC_LATIN_8 | SC_LATIN_9 | SC_LATIN_10 | SC_LATIN_6 | SC_LATIN_7 | SC_LATIN_8 | SC_LATIN_9 | SC_LATIN_10 |
SC_ARABIC | SC_CYRILLIC | SC_GREEK | SC_HEBREW | SC_THAI | SC_ARABIC | SC_CYRILLIC | SC_GREEK | SC_HEBREW | SC_THAI |
SC_VIETNAMESE | SC_CHINESE_SIMPLIFIED | SC_JAPANESE | SC_KOREAN SC_VIETNAMESE | SC_CHINESE_SIMPLIFIED | SC_JAPANESE | SC_KOREAN
#ifdef CNS1TOUNICODE #ifdef CNS1TOUNICODE
| SC_CHINESE_TRADITIONAL | SC_CHINESE_TRADITIONAL
#endif #endif
,"UTF-8"}, ,"UTF-8"},
#endif #endif
#endif #endif
#endif #endif
#endif #endif
#ifdef KSCTOUNICODE #ifdef KSCTOUNICODE
{"ISO-2022-KR",CT_2022,NIL,SC_KOREAN,"EUC-KR"}, {"ISO-2022-KR",CT_2022,CF_PRIMARY | CF_DISPLAY | CF_UNSUPRT,
{"EUC-KR",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,NIL}, NIL,SC_KOREAN,"EUC-KR"},
{"KSC5601",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, {"EUC-KR",CT_DBYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"KSC_5601",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, (void *) &ksc_param,SC_KOREAN,NIL},
{"KS_C_5601-1987",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, {"KSC5601",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,
{"KS_C_5601-1989",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, (void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"KS_C_5601-1992",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, {"KSC_5601",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,
{"KS_C_5601-1997",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, (void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"KS_C_5601-1987",CT_DBYTE,CF_DISPLAY,
(void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"KS_C_5601-1989",CT_DBYTE,CF_DISPLAY,
(void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"KS_C_5601-1992",CT_DBYTE,CF_DISPLAY,
(void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"KS_C_5601-1997",CT_DBYTE,CF_DISPLAY,
(void *) &ksc_param,SC_KOREAN,"EUC-KR"},
#endif #endif
/* deep sigh */ /* deep sigh */
{"WINDOWS-874",CT_1BYTE,(void *) windows_874tab,SC_THAI,"ISO-8859-11"}, {"WINDOWS-874",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"CP874",CT_1BYTE,(void *) windows_874tab,SC_THAI,"ISO-8859-11"}, (void *) windows_874tab,SC_THAI,"ISO-8859-11"},
{"CP874",CT_1BYTE,CF_DISPLAY,
(void *) windows_874tab,SC_THAI,"ISO-8859-11"},
#ifdef GBTOUNICODE #ifdef GBTOUNICODE
{"WINDOWS-936",CT_DBYTE,(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"}, {"WINDOWS-936",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,
{"CP936",CT_DBYTE,(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"}, (void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},
{"CP936",CT_DBYTE,CF_DISPLAY,
(void *) &gb_param,SC_CHINESE_SIMPLIFIED,"GBK"},
#endif #endif
#ifdef KSCTOUNICODE #ifdef KSCTOUNICODE
{"WINDOWS-949",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, {"WINDOWS-949",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,
{"CP949",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, (void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"X-WINDOWS-949",CT_DBYTE,(void *) &ksc_param,SC_KOREAN,"EUC-KR"}, {"CP949",CT_DBYTE,CF_DISPLAY,
#endif (void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"WINDOWS-1250",CT_1BYTE,(void *) windows_1250tab,SC_LATIN_2,"ISO-8859-2" {"X-WINDOWS-949",CT_DBYTE,CF_PRIMARY | CF_DISPLAY,
}, (void *) &ksc_param,SC_KOREAN,"EUC-KR"},
{"CP1250",CT_1BYTE,(void *) windows_1250tab,SC_LATIN_2,"ISO-8859-2"}, #endif
{"WINDOWS-1251",CT_1BYTE,(void *) windows_1251tab,SC_CYRILLIC,"KOI8-R"}, {"WINDOWS-1250",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"CP1251",CT_1BYTE,(void *) windows_1251tab,SC_CYRILLIC,"KOI8-R"}, (void *) windows_1250tab,SC_LATIN_2,"ISO-8859-2"},
{"WINDOWS-1252",CT_1BYTE,(void *) windows_1252tab,SC_LATIN_1,"ISO-8859-1" {"CP1250",CT_1BYTE,CF_DISPLAY,
}, (void *) windows_1250tab,SC_LATIN_2,"ISO-8859-2"},
{"CP1252",CT_1BYTE,(void *) windows_1252tab,SC_LATIN_1,"ISO-8859-1"}, {"WINDOWS-1251",CT_1BYTE,CF_PRIMARY | CF_DISPLAY | CF_POSTING,
{"WINDOWS-1253",CT_1BYTE,(void *) windows_1253tab,SC_GREEK,"ISO-8859-7"}, (void *) windows_1251tab,SC_CYRILLIC,"KOI8-R"},
{"CP1253",CT_1BYTE,(void *) windows_1253tab,SC_GREEK,"ISO-8859-7"}, {"CP1251",CT_1BYTE,CF_DISPLAY,
{"WINDOWS-1254",CT_1BYTE,(void *) windows_1254tab,SC_LATIN_5,"ISO-8859-9" (void *) windows_1251tab,SC_CYRILLIC,"KOI8-R"},
}, {"WINDOWS-1252",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"CP1254",CT_1BYTE,(void *) windows_1254tab,SC_LATIN_5,"ISO-8859-9"}, (void *) windows_1252tab,SC_LATIN_1,"ISO-8859-1"},
{"WINDOWS-1255",CT_1BYTE,(void *) windows_1255tab,SC_HEBREW,"ISO-8859-8"} {"CP1252",CT_1BYTE,CF_DISPLAY,
, (void *) windows_1252tab,SC_LATIN_1,"ISO-8859-1"},
{"CP1255",CT_1BYTE,(void *) windows_1255tab,SC_HEBREW,"ISO-8859-8"}, {"WINDOWS-1253",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"WINDOWS-1256",CT_1BYTE,(void *) windows_1256tab,SC_ARABIC,"ISO-8859-6"} (void *) windows_1253tab,SC_GREEK,"ISO-8859-7"},
, {"CP1253",CT_1BYTE,CF_DISPLAY,
{"CP1256",CT_1BYTE,(void *) windows_1256tab,SC_ARABIC,"ISO-8859-6"}, (void *) windows_1253tab,SC_GREEK,"ISO-8859-7"},
{"WINDOWS-1257",CT_1BYTE,(void *) windows_1257tab,SC_LATIN_7,"ISO-8859-13 {"WINDOWS-1254",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
"}, (void *) windows_1254tab,SC_LATIN_5,"ISO-8859-9"},
{"CP1257",CT_1BYTE,(void *) windows_1257tab,SC_LATIN_7,"ISO-8859-13"}, {"CP1254",CT_1BYTE,CF_DISPLAY,
{"WINDOWS-1258",CT_1BYTE,(void *) windows_1258tab,SC_VIETNAMESE,"VISCII"} (void *) windows_1254tab,SC_LATIN_5,"ISO-8859-9"},
, {"WINDOWS-1255",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"CP1258",CT_1BYTE,(void *) windows_1258tab,SC_VIETNAMESE,"VISCII"}, (void *) windows_1255tab,SC_HEBREW,"ISO-8859-8"},
{"CP1255",CT_1BYTE,CF_DISPLAY,
(void *) windows_1255tab,SC_HEBREW,"ISO-8859-8"},
{"WINDOWS-1256",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) windows_1256tab,SC_ARABIC,"ISO-8859-6"},
{"CP1256",CT_1BYTE,CF_DISPLAY,
(void *) windows_1256tab,SC_ARABIC,"ISO-8859-6"},
{"WINDOWS-1257",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) windows_1257tab,SC_LATIN_7,"ISO-8859-13"},
{"CP1257",CT_1BYTE,CF_DISPLAY,
(void *) windows_1257tab,SC_LATIN_7,"ISO-8859-13"},
{"WINDOWS-1258",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) windows_1258tab,SC_VIETNAMESE,"VISCII"},
{"CP1258",CT_1BYTE,CF_DISPLAY,
(void *) windows_1258tab,SC_VIETNAMESE,"VISCII"},
/* deeper sigh */ /* deeper sigh */
{"IBM367",CT_ASCII,NIL,NIL,"US-ASCII"}, {"IBM367",CT_ASCII,CF_PRIMARY | CF_DISPLAY,
{"IBM437",CT_1BYTE,(void *) ibm_437tab,SC_LATIN_1,"ISO-8859-1"}, NIL,NIL,"US-ASCII"},
{"IBM737",CT_1BYTE,(void *) ibm_737tab,SC_GREEK,"ISO-8859-7"}, {"IBM437",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM775",CT_1BYTE,(void *) ibm_775tab,SC_LATIN_7,"ISO-8859-13"}, (void *) ibm_437tab,SC_LATIN_1,"ISO-8859-1"},
{"IBM850",CT_1BYTE,(void *) ibm_850tab,SC_LATIN_1,"ISO-8859-1"}, {"IBM737",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM852",CT_1BYTE,(void *) ibm_852tab,SC_LATIN_2,"ISO-8859-2"}, (void *) ibm_737tab,SC_GREEK,"ISO-8859-7"},
{"IBM855",CT_1BYTE,(void *) ibm_855tab,SC_CYRILLIC,"ISO-8859-5"}, {"IBM775",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM857",CT_1BYTE,(void *) ibm_857tab,SC_LATIN_5,"ISO-8859-9"}, (void *) ibm_775tab,SC_LATIN_7,"ISO-8859-13"},
{"IBM860",CT_1BYTE,(void *) ibm_860tab,SC_LATIN_1,"ISO-8859-1"}, {"IBM850",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM861",CT_1BYTE,(void *) ibm_861tab,SC_LATIN_6,"ISO-8859-10"}, (void *) ibm_850tab,SC_LATIN_1,"ISO-8859-1"},
{"IBM862",CT_1BYTE,(void *) ibm_862tab,SC_HEBREW,"ISO-8859-8"}, {"IBM852",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM863",CT_1BYTE,(void *) ibm_863tab,SC_LATIN_1,"ISO-8859-1"}, (void *) ibm_852tab,SC_LATIN_2,"ISO-8859-2"},
{"IBM864",CT_1BYTE,(void *) ibm_864tab,SC_ARABIC,"ISO-8859-6"}, {"IBM855",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM865",CT_1BYTE,(void *) ibm_865tab,SC_LATIN_6,"ISO-8859-10"}, (void *) ibm_855tab,SC_CYRILLIC,"ISO-8859-5"},
{"IBM866",CT_1BYTE,(void *) ibm_866tab,SC_CYRILLIC,"KOI8-R"}, {"IBM857",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
{"IBM869",CT_1BYTE,(void *) ibm_869tab,SC_GREEK,"ISO-8859-7"}, (void *) ibm_857tab,SC_LATIN_5,"ISO-8859-9"},
{"IBM874",CT_1BYTE,(void *) ibm_874tab,SC_THAI,"ISO-8859-11"}, {"IBM860",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_860tab,SC_LATIN_1,"ISO-8859-1"},
{"IBM861",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_861tab,SC_LATIN_6,"ISO-8859-10"},
{"IBM862",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_862tab,SC_HEBREW,"ISO-8859-8"},
{"IBM863",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_863tab,SC_LATIN_1,"ISO-8859-1"},
{"IBM864",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_864tab,SC_ARABIC,"ISO-8859-6"},
{"IBM865",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_865tab,SC_LATIN_6,"ISO-8859-10"},
{"IBM866",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_866tab,SC_CYRILLIC,"KOI8-R"},
{"IBM869",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_869tab,SC_GREEK,"ISO-8859-7"},
{"IBM874",CT_1BYTE,CF_PRIMARY | CF_DISPLAY,
(void *) ibm_874tab,SC_THAI,"ISO-8859-11"},
/* deepest sigh */ /* deepest sigh */
{"ANSI_X3.4-1968",CT_ASCII,NIL,NIL,"US-ASCII"}, {"ANSI_X3.4-1968",CT_ASCII,CF_DISPLAY,
{"UNICODE-1-1-UTF-7",CT_UTF7,NIL,SC_UNICODE,"UTF-8"}, NIL,NIL,"US-ASCII"},
{"UNICODE-1-1-UTF-7",CT_UTF7,CF_UNSUPRT,
NIL,SC_UNICODE,"UTF-8"},
/* these should never appear in email */ /* these should never appear in email */
{"UCS-2",CT_UCS2,NIL,SC_UNICODE,"UTF-8"}, {"UCS-2",CT_UCS2,CF_PRIMARY | CF_DISPLAY | CF_NOEMAIL,
{"UCS-4",CT_UCS4,NIL,SC_UNICODE,"UTF-8"}, NIL,SC_UNICODE,"UTF-8"},
{"UTF-16",CT_UTF16,NIL,SC_UNICODE,"UTF-8"}, {"UCS-4",CT_UCS4,CF_PRIMARY | CF_DISPLAY | CF_NOEMAIL,
NIL,SC_UNICODE,"UTF-8"},
{"UTF-16",CT_UTF16,CF_PRIMARY | CF_DISPLAY | CF_NOEMAIL,
NIL,SC_UNICODE,"UTF-8"},
NIL NIL
}; };
/* Non-Unicode Script table */ /* Non-Unicode Script table */
static const SCRIPT utf8_scvalid[] = { static const SCRIPT utf8_scvalid[] = {
{"Arabic",NIL,SC_ARABIC}, {"Arabic",NIL,SC_ARABIC},
{"Chinese Simplified","China, Singapore",SC_CHINESE_SIMPLIFIED}, {"Chinese Simplified","China, Singapore",SC_CHINESE_SIMPLIFIED},
{"Chinese Traditional","Taiwan, Hong Kong, Macao",SC_CHINESE_TRADITIONAL} , {"Chinese Traditional","Taiwan, Hong Kong, Macao",SC_CHINESE_TRADITIONAL} ,
{"Cyrillic",NIL,SC_CYRILLIC}, {"Cyrillic",NIL,SC_CYRILLIC},
{"Cyrillic Ukranian",NIL,SC_UKRANIAN}, {"Cyrillic Ukranian",NIL,SC_UKRANIAN},
 End of changes. 19 change blocks. 
106 lines changed or deleted 189 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/