utf8.h   utf8.h 
skipping to change at line 26 skipping to change at line 26
* *
* Author: Mark Crispin * Author: Mark Crispin
* Networks and Distributed Computing * Networks and Distributed Computing
* Computing & Communications * Computing & Communications
* University of Washington * University of Washington
* Administration Building, AG-44 * Administration Building, AG-44
* Seattle, WA 98195 * Seattle, WA 98195
* Internet: MRC@CAC.Washington.EDU * Internet: MRC@CAC.Washington.EDU
* *
* Date: 11 June 1997 * Date: 11 June 1997
* Last Edited: 1 March 2007 * Last Edited: 16 March 2007
*/ */
/* UTF-8 size and conversion routines from UCS-2 values (thus in the BMP). /* UTF-8 size and conversion routines from UCS-2 values (thus in the BMP).
* Don't use these if UTF-16 data (surrogate pairs) are an issue. * Don't use these if UTF-16 data (surrogate pairs) are an issue.
* For UCS-4 values, use the utf8_size() and utf8_put() functions. * For UCS-4 values, use the utf8_size() and utf8_put() functions.
*/ */
#define UTF8_SIZE_BMP(c) ((c & 0xff80) ? ((c & 0xf800) ? 3 : 2) : 1) #define UTF8_SIZE_BMP(c) ((c & 0xff80) ? ((c & 0xf800) ? 3 : 2) : 1)
#define UTF8_PUT_BMP(b,c) { \ #define UTF8_PUT_BMP(b,c) { \
if (c & 0xff80) { /* non-ASCII? */ \ if (c & 0xff80) { /* non-ASCII? */ \
skipping to change at line 66 skipping to change at line 66
/* 0x0000 - 0xffff BMP plane */ /* 0x0000 - 0xffff BMP plane */
#define U8GM_NONBMP 0xffff0000 /* mask for non-BMP values */ #define U8GM_NONBMP 0xffff0000 /* mask for non-BMP values */
/* 0x10000 - 0x10ffff extended planes */ /* 0x10000 - 0x10ffff extended planes */
/* 0x110000 - 0x7ffffff non-Unicode */ /* 0x110000 - 0x7ffffff non-Unicode */
#define U8G_ERROR 0x80000000 /* error flag */ #define U8G_ERROR 0x80000000 /* error flag */
#define U8G_BADCONT U8G_ERROR+1 /* continuation when not in progress */ #define U8G_BADCONT U8G_ERROR+1 /* continuation when not in progress */
#define U8G_INCMPLT U8G_ERROR+2 /* incomplete UTF-8 character */ #define U8G_INCMPLT U8G_ERROR+2 /* incomplete UTF-8 character */
#define U8G_NOTUTF8 U8G_ERROR+3 /* not a valid UTF-8 octet */ #define U8G_NOTUTF8 U8G_ERROR+3 /* not a valid UTF-8 octet */
#define U8G_ENDSTRG U8G_ERROR+4 /* end of string */ #define U8G_ENDSTRG U8G_ERROR+4 /* end of string */
#define U8G_ENDSTRI U8G_ERROR+5 /* end of string w/ incomplete UTF-8 char */ #define U8G_ENDSTRI U8G_ERROR+5 /* end of string w/ incomplete UTF-8 char */
#define U8G_SURROGA U8G_ERROR+6 /* surrogate codepoint */
#define U8G_NOTUNIC U8G_ERROR+7 /* non-Unicode codepoint */
/* ucs4_width() return values */ /* ucs4_width() return values */
#define U4W_ERROR 0x80000000 /* error flags */ #define U4W_ERROR 0x80000000 /* error flags */
#define U4W_NOTUNCD U4W_ERROR+1 /* not a Unicode char */ #define U4W_NOTUNCD U4W_ERROR+1 /* not a Unicode char */
#define U4W_PRIVATE U4W_ERROR+2 /* private-space plane */ #define U4W_PRIVATE U4W_ERROR+2 /* private-space plane */
#define U4W_SSPCHAR U4W_ERROR+3 /* Supplementary Special-purpose Pla ne */ #define U4W_SSPCHAR U4W_ERROR+3 /* Supplementary Special-purpose Pla ne */
#define U4W_UNASSGN U4W_ERROR+4 /* unassigned space plane */ #define U4W_UNASSGN U4W_ERROR+4 /* unassigned space plane */
#define U4W_CONTROL U4W_ERROR+5 /* C0/C1 control */ #define U4W_CONTROL U4W_ERROR+5 /* C0/C1 control */
#define U4W_CTLSRGT U4W_CONTROL /* in case legacy code references th is */ #define U4W_CTLSRGT U4W_CONTROL /* in case legacy code references th is */
skipping to change at line 362 skipping to change at line 364
/* UBOGON is used to represent a codepoint in a character set which does n ot /* UBOGON is used to represent a codepoint in a character set which does n ot
* map to Unicode. It is also used for mapping failures, e.g. incomplete * map to Unicode. It is also used for mapping failures, e.g. incomplete
* shift sequences. NOCHAR is used to represent a codepoint in Unicode * shift sequences. NOCHAR is used to represent a codepoint in Unicode
* which does not map to the target character set. Note that these names * which does not map to the target character set. Note that these names
* have the same text width as 0x????, for convenience in the mapping table s. * have the same text width as 0x????, for convenience in the mapping table s.
*/ */
#define UBOGON UCS2_BOGON #define UBOGON UCS2_BOGON
#define NOCHAR 0xffff #define NOCHAR 0xffff
/* Non-Unicode codepoints */ /* Codepoints in non-Unicode character sets */
/* Codepoints in ISO 646 character sets */ /* Codepoints in ISO 646 character sets */
/* British ASCII codepoints */ /* British ASCII codepoints */
#define BRITISH_POUNDSTERLING 0x23 #define BRITISH_POUNDSTERLING 0x23
/* JIS Roman codepoints */ /* JIS Roman codepoints */
#define JISROMAN_YEN 0x5c #define JISROMAN_YEN 0x5c
skipping to change at line 509 skipping to change at line 511
unsigned long errch,long iso2022jp); unsigned long errch,long iso2022jp);
unsigned long utf8_rmapsize (SIZEDTEXT *text,unsigned short *rmap, unsigned long utf8_rmapsize (SIZEDTEXT *text,unsigned short *rmap,
unsigned long errch,long iso2022jp); unsigned long errch,long iso2022jp);
long ucs4_rmaptext (unsigned long *ucs4,unsigned long len,unsigned short *r map, long ucs4_rmaptext (unsigned long *ucs4,unsigned long len,unsigned short *r map,
SIZEDTEXT *ret,unsigned long errch); SIZEDTEXT *ret,unsigned long errch);
long ucs4_rmaplen (unsigned long *ucs4,unsigned long len,unsigned short *rm ap, long ucs4_rmaplen (unsigned long *ucs4,unsigned long len,unsigned short *rm ap,
unsigned long errch); unsigned long errch);
long ucs4_rmapbuf (unsigned char *t,unsigned long *ucs4,unsigned long len, long ucs4_rmapbuf (unsigned char *t,unsigned long *ucs4,unsigned long len,
unsigned short *rmap,unsigned long errch); unsigned short *rmap,unsigned long errch);
unsigned long utf8_get (unsigned char **s,unsigned long *i); unsigned long utf8_get (unsigned char **s,unsigned long *i);
unsigned long utf8_get_raw (unsigned char **s,unsigned long *i);
unsigned long ucs4_cs_get (CHARSET *cs,unsigned char **s,unsigned long *i); unsigned long ucs4_cs_get (CHARSET *cs,unsigned char **s,unsigned long *i);
const CHARSET *utf8_infercharset (SIZEDTEXT *src); const CHARSET *utf8_infercharset (SIZEDTEXT *src);
long utf8_validate (unsigned char *s,unsigned long i); long utf8_validate (unsigned char *s,unsigned long i);
void utf8_text_1byte0 (SIZEDTEXT *text,SIZEDTEXT *ret,ucs4cn_t cv,ucs4de_t de); void utf8_text_1byte0 (SIZEDTEXT *text,SIZEDTEXT *ret,ucs4cn_t cv,ucs4de_t de);
void utf8_text_1byte (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv, void utf8_text_1byte (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv,
ucs4de_t de); ucs4de_t de);
void utf8_text_1byte8 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv , void utf8_text_1byte8 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv ,
ucs4de_t de); ucs4de_t de);
void utf8_text_euc (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv, void utf8_text_euc (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv,
ucs4de_t de); ucs4de_t de);
 End of changes. 4 change blocks. 
2 lines changed or deleted 5 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/