utf8.h | utf8.h | |||
---|---|---|---|---|
skipping to change at line 26 | skipping to change at line 26 | |||
* | * | |||
* Author: Mark Crispin | * Author: Mark Crispin | |||
* Networks and Distributed Computing | * Networks and Distributed Computing | |||
* Computing & Communications | * Computing & Communications | |||
* University of Washington | * University of Washington | |||
* Administration Building, AG-44 | * Administration Building, AG-44 | |||
* Seattle, WA 98195 | * Seattle, WA 98195 | |||
* Internet: MRC@CAC.Washington.EDU | * Internet: MRC@CAC.Washington.EDU | |||
* | * | |||
* Date: 11 June 1997 | * Date: 11 June 1997 | |||
* Last Edited: 1 March 2007 | * Last Edited: 16 March 2007 | |||
*/ | */ | |||
/* UTF-8 size and conversion routines from UCS-2 values (thus in the BMP). | /* UTF-8 size and conversion routines from UCS-2 values (thus in the BMP). | |||
* Don't use these if UTF-16 data (surrogate pairs) are an issue. | * Don't use these if UTF-16 data (surrogate pairs) are an issue. | |||
* For UCS-4 values, use the utf8_size() and utf8_put() functions. | * For UCS-4 values, use the utf8_size() and utf8_put() functions. | |||
*/ | */ | |||
#define UTF8_SIZE_BMP(c) ((c & 0xff80) ? ((c & 0xf800) ? 3 : 2) : 1) | #define UTF8_SIZE_BMP(c) ((c & 0xff80) ? ((c & 0xf800) ? 3 : 2) : 1) | |||
#define UTF8_PUT_BMP(b,c) { \ | #define UTF8_PUT_BMP(b,c) { \ | |||
if (c & 0xff80) { /* non-ASCII? */ \ | if (c & 0xff80) { /* non-ASCII? */ \ | |||
skipping to change at line 66 | skipping to change at line 66 | |||
/* 0x0000 - 0xffff BMP plane */ | /* 0x0000 - 0xffff BMP plane */ | |||
#define U8GM_NONBMP 0xffff0000 /* mask for non-BMP values */ | #define U8GM_NONBMP 0xffff0000 /* mask for non-BMP values */ | |||
/* 0x10000 - 0x10ffff extended planes */ | /* 0x10000 - 0x10ffff extended planes */ | |||
/* 0x110000 - 0x7ffffff non-Unicode */ | /* 0x110000 - 0x7ffffff non-Unicode */ | |||
#define U8G_ERROR 0x80000000 /* error flag */ | #define U8G_ERROR 0x80000000 /* error flag */ | |||
#define U8G_BADCONT U8G_ERROR+1 /* continuation when not in progress */ | #define U8G_BADCONT U8G_ERROR+1 /* continuation when not in progress */ | |||
#define U8G_INCMPLT U8G_ERROR+2 /* incomplete UTF-8 character */ | #define U8G_INCMPLT U8G_ERROR+2 /* incomplete UTF-8 character */ | |||
#define U8G_NOTUTF8 U8G_ERROR+3 /* not a valid UTF-8 octet */ | #define U8G_NOTUTF8 U8G_ERROR+3 /* not a valid UTF-8 octet */ | |||
#define U8G_ENDSTRG U8G_ERROR+4 /* end of string */ | #define U8G_ENDSTRG U8G_ERROR+4 /* end of string */ | |||
#define U8G_ENDSTRI U8G_ERROR+5 /* end of string w/ incomplete UTF-8 char */ | #define U8G_ENDSTRI U8G_ERROR+5 /* end of string w/ incomplete UTF-8 char */ | |||
#define U8G_SURROGA U8G_ERROR+6 /* surrogate codepoint */ | ||||
#define U8G_NOTUNIC U8G_ERROR+7 /* non-Unicode codepoint */ | ||||
/* ucs4_width() return values */ | /* ucs4_width() return values */ | |||
#define U4W_ERROR 0x80000000 /* error flags */ | #define U4W_ERROR 0x80000000 /* error flags */ | |||
#define U4W_NOTUNCD U4W_ERROR+1 /* not a Unicode char */ | #define U4W_NOTUNCD U4W_ERROR+1 /* not a Unicode char */ | |||
#define U4W_PRIVATE U4W_ERROR+2 /* private-space plane */ | #define U4W_PRIVATE U4W_ERROR+2 /* private-space plane */ | |||
#define U4W_SSPCHAR U4W_ERROR+3 /* Supplementary Special-purpose Pla ne */ | #define U4W_SSPCHAR U4W_ERROR+3 /* Supplementary Special-purpose Pla ne */ | |||
#define U4W_UNASSGN U4W_ERROR+4 /* unassigned space plane */ | #define U4W_UNASSGN U4W_ERROR+4 /* unassigned space plane */ | |||
#define U4W_CONTROL U4W_ERROR+5 /* C0/C1 control */ | #define U4W_CONTROL U4W_ERROR+5 /* C0/C1 control */ | |||
#define U4W_CTLSRGT U4W_CONTROL /* in case legacy code references th is */ | #define U4W_CTLSRGT U4W_CONTROL /* in case legacy code references th is */ | |||
skipping to change at line 362 | skipping to change at line 364 | |||
/* UBOGON is used to represent a codepoint in a character set which does n ot | /* UBOGON is used to represent a codepoint in a character set which does n ot | |||
* map to Unicode. It is also used for mapping failures, e.g. incomplete | * map to Unicode. It is also used for mapping failures, e.g. incomplete | |||
* shift sequences. NOCHAR is used to represent a codepoint in Unicode | * shift sequences. NOCHAR is used to represent a codepoint in Unicode | |||
* which does not map to the target character set. Note that these names | * which does not map to the target character set. Note that these names | |||
* have the same text width as 0x????, for convenience in the mapping table s. | * have the same text width as 0x????, for convenience in the mapping table s. | |||
*/ | */ | |||
#define UBOGON UCS2_BOGON | #define UBOGON UCS2_BOGON | |||
#define NOCHAR 0xffff | #define NOCHAR 0xffff | |||
/* Non-Unicode codepoints */ | /* Codepoints in non-Unicode character sets */ | |||
/* Codepoints in ISO 646 character sets */ | /* Codepoints in ISO 646 character sets */ | |||
/* British ASCII codepoints */ | /* British ASCII codepoints */ | |||
#define BRITISH_POUNDSTERLING 0x23 | #define BRITISH_POUNDSTERLING 0x23 | |||
/* JIS Roman codepoints */ | /* JIS Roman codepoints */ | |||
#define JISROMAN_YEN 0x5c | #define JISROMAN_YEN 0x5c | |||
skipping to change at line 509 | skipping to change at line 511 | |||
unsigned long errch,long iso2022jp); | unsigned long errch,long iso2022jp); | |||
unsigned long utf8_rmapsize (SIZEDTEXT *text,unsigned short *rmap, | unsigned long utf8_rmapsize (SIZEDTEXT *text,unsigned short *rmap, | |||
unsigned long errch,long iso2022jp); | unsigned long errch,long iso2022jp); | |||
long ucs4_rmaptext (unsigned long *ucs4,unsigned long len,unsigned short *r map, | long ucs4_rmaptext (unsigned long *ucs4,unsigned long len,unsigned short *r map, | |||
SIZEDTEXT *ret,unsigned long errch); | SIZEDTEXT *ret,unsigned long errch); | |||
long ucs4_rmaplen (unsigned long *ucs4,unsigned long len,unsigned short *rm ap, | long ucs4_rmaplen (unsigned long *ucs4,unsigned long len,unsigned short *rm ap, | |||
unsigned long errch); | unsigned long errch); | |||
long ucs4_rmapbuf (unsigned char *t,unsigned long *ucs4,unsigned long len, | long ucs4_rmapbuf (unsigned char *t,unsigned long *ucs4,unsigned long len, | |||
unsigned short *rmap,unsigned long errch); | unsigned short *rmap,unsigned long errch); | |||
unsigned long utf8_get (unsigned char **s,unsigned long *i); | unsigned long utf8_get (unsigned char **s,unsigned long *i); | |||
unsigned long utf8_get_raw (unsigned char **s,unsigned long *i); | ||||
unsigned long ucs4_cs_get (CHARSET *cs,unsigned char **s,unsigned long *i); | unsigned long ucs4_cs_get (CHARSET *cs,unsigned char **s,unsigned long *i); | |||
const CHARSET *utf8_infercharset (SIZEDTEXT *src); | const CHARSET *utf8_infercharset (SIZEDTEXT *src); | |||
long utf8_validate (unsigned char *s,unsigned long i); | long utf8_validate (unsigned char *s,unsigned long i); | |||
void utf8_text_1byte0 (SIZEDTEXT *text,SIZEDTEXT *ret,ucs4cn_t cv,ucs4de_t de); | void utf8_text_1byte0 (SIZEDTEXT *text,SIZEDTEXT *ret,ucs4cn_t cv,ucs4de_t de); | |||
void utf8_text_1byte (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv, | void utf8_text_1byte (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv, | |||
ucs4de_t de); | ucs4de_t de); | |||
void utf8_text_1byte8 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv , | void utf8_text_1byte8 (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv , | |||
ucs4de_t de); | ucs4de_t de); | |||
void utf8_text_euc (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv, | void utf8_text_euc (SIZEDTEXT *text,SIZEDTEXT *ret,void *tab,ucs4cn_t cv, | |||
ucs4de_t de); | ucs4de_t de); | |||
End of changes. 4 change blocks. | ||||
2 lines changed or deleted | 5 lines changed or added | |||