common.h | common.h | |||
---|---|---|---|---|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -* - */ | ||||
#ifndef _COMMON_H_ | #ifndef _COMMON_H_ | |||
#define _COMMON_H_ | #define _COMMON_H_ | |||
/** | /** | |||
* common.h | * common.h | |||
* | * | |||
* Copyright (C) 2003 WiseGuys Internet B.V. | * Copyright (C) 2003 WiseGuys Internet B.V. | |||
* | * | |||
* THE BSD LICENSE | * THE BSD LICENSE | |||
* | * | |||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | |||
skipping to change at line 41 | skipping to change at line 42 | |||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | */ | |||
#include <stdio.h> | #include <stdio.h> | |||
#include <time.h> | #include <time.h> | |||
#include <stdlib.h> | ||||
#ifdef __cplusplus | #ifdef __cplusplus | |||
extern "C" { | extern "C" | |||
{ | ||||
#endif | #endif | |||
#ifdef _MSC_VER | #ifdef _MSC_VER | |||
#include <winsock2.h> | typedef __int8 int8_t; | |||
typedef unsigned __int8 uint8_t; | ||||
typedef __int8 int8_t; | typedef __int16 int16_t; | |||
typedef unsigned __int8 uint8_t; | typedef unsigned __int16 uint16_t; | |||
typedef __int16 int16_t; | typedef __int32 int32_t; | |||
typedef unsigned __int16 uint16_t; | typedef unsigned __int32 uint32_t; | |||
typedef __int32 int32_t; | typedef __int64 int64_t; | |||
typedef unsigned __int32 uint32_t; | typedef unsigned __int64 uint64_t; | |||
typedef __int64 int64_t; | ||||
typedef unsigned __int64 uint64_t; | ||||
#else | #else | |||
#include <sys/time.h> | # include <stdint.h> | |||
#include <stdint.h> | ||||
#endif | #endif | |||
typedef uint32_t uint4; | typedef uint32_t uint4; | |||
typedef uint16_t uint2; | typedef uint16_t uint2; | |||
typedef uint8_t uchar; | typedef uint8_t uchar; | |||
typedef int32_t sint4; | typedef int32_t sint4; | |||
typedef int16_t sint2; | typedef int16_t sint2; | |||
typedef int8_t schar; | typedef int8_t schar; | |||
typedef int8_t boole; | typedef int8_t boole; | |||
typedef struct wgtimer_s { | extern void *wg_zalloc(size_t size); | |||
struct timeval start; | ||||
struct timeval stop; | extern char *wg_getline(char *line, int size, FILE * fp); | |||
} wgtimer_t; | ||||
extern unsigned int wg_split(char **result, char *dest, char *src, | ||||
extern void *wg_malloc( size_t size ); | int maxsegments); | |||
extern void *wg_calloc( size_t nmemb, size_t size ); | extern char *wg_strgmov(char *dest, const char *src, | |||
extern void *wg_zalloc( size_t size ); | const char *destlimit); | |||
extern char* wg_strdup( const char *s ); | extern char *wg_trim(char *dest, const char *src); | |||
extern void* wg_realloc( void *ptr, size_t size ) ; | ||||
extern void wg_free( void *mem ); | ||||
extern char *wg_getline( char *line, int size, FILE *fp ); | ||||
extern void wg_timerstart(wgtimer_t *t); | ||||
extern uint4 wg_timerstop(wgtimer_t *t); | ||||
extern unsigned int wg_split( char **result, char *dest, char *src, int max | ||||
segments ); | ||||
extern char *wg_strgmov( char *dest, const char *src, const char *destlimit | ||||
); | ||||
extern char *wg_trim( char *dest, const char *src ); | ||||
#ifdef __cplusplus | #ifdef __cplusplus | |||
} | } | |||
#endif | #endif | |||
#endif | #endif | |||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ | ||||
End of changes. 7 change blocks. | ||||
46 lines changed or deleted | 32 lines changed or added | |||
constants.h | constants.h | |||
---|---|---|---|---|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -* - */ | ||||
#ifndef _CONSTANTS_H_ | #ifndef _CONSTANTS_H_ | |||
#define _CONSTANTS_H_ | #define _CONSTANTS_H_ | |||
/* | /* | |||
* constants.h -- some constants used throughout the code. Not pretty, | * constants.h -- some constants used throughout the code. Not pretty, | |||
* but certainly convenient. | * but certainly convenient. | |||
* | * | |||
* Copyright (C) 2003 WiseGuys Internet B.V. | * Copyright (C) 2003 WiseGuys Internet B.V. | |||
* | * | |||
* THE BSD LICENSE | * THE BSD LICENSE | |||
skipping to change at line 42 | skipping to change at line 43 | |||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | */ | |||
#include <limits.h> | #include <limits.h> | |||
#define _UTF8_ | /* Reported matches are those fingerprints with a score less than best scor | |||
e * | ||||
#define DESCRIPTION "out of place" | THRESHOLDVALUE (i.e. a THRESHOLDVALUE of 1.03 means matches must score | |||
within 3% from the best score.) */ | ||||
/* Reported matches are those fingerprints with a score less than best | ||||
* score * THRESHOLDVALUE (i.e. a THRESHOLDVALUE of 1.03 means matches | ||||
* must score within 3% from the best score.) | ||||
*/ | ||||
#define THRESHOLDVALUE 1.03 | #define THRESHOLDVALUE 1.03 | |||
/* If more than MAXCANDIDATES matches are found, the classifier reports | /* If more than MAXCANDIDATES matches are found, the classifier reports | |||
* unknown, because the input is obviously confusing. | unknown, because the input is obviously confusing. */ | |||
*/ | ||||
#define MAXCANDIDATES 5 | #define MAXCANDIDATES 5 | |||
/* The size of the buffer used to report the classification. | /* The size of the buffer used to report the classification. */ | |||
*/ | ||||
#define MAXOUTPUTSIZE 1024 | #define MAXOUTPUTSIZE 1024 | |||
/* Maximum number of n-grams in a fingerprint */ | /* Maximum number of n-grams in a fingerprint */ | |||
#define MAXNGRAMS 400 | #define MAXNGRAMS 400 | |||
/* Maximum number of character of an n-gram? */ | /* Maximum number of character of an n-gram? */ | |||
#define MAXNGRAMSYMBOL 5 | #define MAXNGRAMSYMBOL 5 | |||
/* Maximum size of the string representing an n-gram (must be greater than | /* Maximum size of the string representing an n-gram (must be greater than | |||
number of symbol) */ | number of symbol) */ | |||
#ifdef _UTF8_ | ||||
#define MAXNGRAMSIZE 20 | #define MAXNGRAMSIZE 20 | |||
#else | ||||
#define MAXNGRAMSIZE MAXNGRAMSYMBOL | ||||
#endif | ||||
/* Which characters are not acceptable in n-grams? */ | /* Which characters are not acceptable in n-grams? */ | |||
#define INVALID(c) (isspace((unsigned char)c) || isdigit((unsigned char)c)) | #define INVALID(c) (isspace((unsigned char)c) || isdigit((unsigned char)c)) | |||
/* Minimum size (in characters) for accepting a document */ | /* Minimum size (in characters) for accepting a document */ | |||
#define MINDOCSIZE 6 | #define MINDOCSIZE 1 | |||
/* Maximum penalty for missing an n-gram in fingerprint */ | /* Maximum penalty for missing an n-gram in fingerprint */ | |||
#define MAXOUTOFPLACE 400 | #define MAXOUTOFPLACE 400 | |||
/* Size of hash table is 2^TABLEPOW. */ | /* Size of hash table is 2^TABLEPOW. */ | |||
#define TABLEPOW 13 | #define TABLEPOW 13 | |||
#define MAXSCORE INT_MAX | #define MAXSCORE INT_MAX | |||
/* where the fingerprints files are stored */ | /* where the fingerprints files are stored */ | |||
#define DEFAULT_FINGERPRINTS_PATH "" | #define DEFAULT_FINGERPRINTS_PATH "" | |||
#endif | #endif | |||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ | ||||
End of changes. 8 change blocks. | ||||
19 lines changed or deleted | 10 lines changed or added | |||
fingerprint.h | fingerprint.h | |||
---|---|---|---|---|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -* - */ | ||||
#ifndef _FINGERPRINT_H_ | #ifndef _FINGERPRINT_H_ | |||
#define _FINGERPRINT_H_ | #define _FINGERPRINT_H_ | |||
/* | /* | |||
* Copyright (C) 2003 WiseGuys Internet B.V. | * Copyright (C) 2003 WiseGuys Internet B.V. | |||
* | * | |||
* THE BSD LICENSE | * THE BSD LICENSE | |||
* | * | |||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | |||
* are met: | * are met: | |||
skipping to change at line 39 | skipping to change at line 40 | |||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | */ | |||
#include "common.h" | #include "common.h" | |||
#ifdef __cplusplus | #ifdef __cplusplus | |||
extern "C" { | extern "C" | |||
{ | ||||
#endif | #endif | |||
extern void *fp_Init(const char *name); | extern void *fp_Init(const char *name); | |||
extern void fp_Done( void *handle ); | extern void fp_Done(void *handle); | |||
extern int fp_Create( void *handle, const char *buffer, uint4 bufsize, uint | extern int fp_Create(void *handle, const char *buffer, uint4 bufsize, | |||
4 maxngrams ); | uint4 maxngrams); | |||
extern int fp_Read( void *handle, const char *fname, int maxngrams ); | extern int fp_Read(void *handle, const char *fname, int maxngrams); | |||
extern sint4 fp_Compare( void *cat, void *unknown, int cutoff ); | extern sint4 fp_Compare(void *cat, void *unknown, int cutoff); | |||
extern void fp_Show( void *handle ); | extern void fp_Show(void *handle); | |||
#ifdef __cplusplus | #ifdef __cplusplus | |||
extern "C" { | extern "C" | |||
{ | ||||
#endif | #endif | |||
extern const char *fp_Name( void *handle ); | extern const char *fp_Name(void *handle); | |||
#ifdef __cplusplus | #ifdef __cplusplus | |||
} | } | |||
#endif | #endif | |||
extern void fp_Print( void *handle, FILE *fp ); | extern void fp_Print(void *handle, FILE * fp); | |||
#ifdef __cplusplus | #ifdef __cplusplus | |||
} | } | |||
#endif | #endif | |||
#endif | #endif | |||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ | ||||
End of changes. 8 change blocks. | ||||
12 lines changed or deleted | 15 lines changed or added | |||
textcat.h | textcat.h | |||
---|---|---|---|---|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -* - */ | ||||
#ifndef _TEXTCAT_H_ | #ifndef _TEXTCAT_H_ | |||
#define _TEXTCAT_H_ | #define _TEXTCAT_H_ | |||
/* | /* | |||
* textcat.h -- routines for categorizing text | * textcat.h -- routines for categorizing text | |||
* | * | |||
* Copyright (C) 2003 WiseGuys Internet B.V. | * Copyright (C) 2003 WiseGuys Internet B.V. | |||
* | * | |||
* THE BSD LICENSE | * THE BSD LICENSE | |||
* | * | |||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | |||
skipping to change at line 38 | skipping to change at line 39 | |||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*/ | */ | |||
#include <stdio.h> | #include "exttextcat-version.h" | |||
#define _TEXTCAT_RESULT_UNKOWN "UNKNOWN" | #define _TEXTCAT_RESULT_UNKOWN "UNKNOWN" | |||
#define _TEXTCAT_RESULT_SHORT "SHORT" | #define _TEXTCAT_RESULT_SHORT "SHORT" | |||
#ifdef __cplusplus | #ifdef __cplusplus | |||
extern "C" { | extern "C" | |||
{ | ||||
#endif | #endif | |||
/** | /** | |||
* textcat_Init() - Initialize the text classifier. The textfile | * textcat_Init() - Initialize the text classifier. The textfile | |||
* conffile should contain a list of fingerprint filenames and | * conffile should contain a list of fingerprint filenames and | |||
* identification strings for the categories. The filenames should be | * identification strings for the categories. The filenames should be | |||
* reachable from the current working directory. The identification | * reachable from the current working directory. The identification | |||
* strings will are used in the classification output. | * strings will are used in the classification output. | |||
* | * | |||
* Returns: handle on success, NULL on error. (At the moment, the | * Returns: handle on success, NULL on error. (At the moment, the | |||
* only way errors can occur, is when the library cannot read the | * only way errors can occur, is when the library cannot read the | |||
* conffile, or one of the fingerprint files listed in it.) | * conffile, or one of the fingerprint files listed in it.) | |||
* | * | |||
* Replace older function (and has exacly the same behaviour) | * Replace older function (and has exacly the same behaviour) | |||
* see below | * see below | |||
*/ | */ | |||
extern void *textcat_Init( const char *conffile ); | extern void *textcat_Init(const char *conffile); | |||
/** | /** | |||
* Originaly this function had only one parameter (conffile) it has been mo dified since OOo must be able to load alternativ DB | * Originaly this function had only one parameter (conffile) it has been mo dified since OOo must be able to load alternativ DB | |||
* Basicaly prefix is the directory path where fingerprints are stored | * Basicaly prefix is the directory path where fingerprints are stored | |||
*/ | */ | |||
extern void *special_textcat_Init( const char *conffile, const char *prefix | extern void *special_textcat_Init(const char *conffile, | |||
); | const char *prefix); | |||
/** | /** | |||
* textcat_Done() - Free up resources for handle | * textcat_Done() - Free up resources for handle | |||
*/ | */ | |||
extern void textcat_Done( void *handle ); | extern void textcat_Done(void *handle); | |||
/** | /** | |||
* textcat_Classify() - Give the most likely categories for buffer | * textcat_Classify() - Give the most likely categories for buffer | |||
* with length size. | * with length size. | |||
* | * | |||
* Returns: string containing a list of category id's, each one | * Returns: string containing a list of category id's, each one | |||
* between square brackets, "UNKNOWN" when not recognized, "SHORT" if the | * between square brackets, "UNKNOWN" when not recognized, "SHORT" if the | |||
* document was too short to make a reliable assessment. | * document was too short to make a reliable assessment. | |||
* | * | |||
* Performace note: longer buffers take longer to process. However, | * Performace note: longer buffers take longer to process. However, | |||
* for many uses it is not necessary to categorize the whole buffer. | * for many uses it is not necessary to categorize the whole buffer. | |||
* For language classification, a few hundred bytes will suffice. | * For language classification, a few hundred bytes will suffice. | |||
*/ | */ | |||
extern char *textcat_Classify( void *handle, const char *buffer, size_t siz | extern char *textcat_Classify(void *handle, const char *buffer, | |||
e ); | size_t size); | |||
/** | /** | |||
* textcat_Version() - Returns a string describing the version of this clas sifier. | * textcat_Version() - Returns a string describing the version of this clas sifier. | |||
*/ | */ | |||
extern char *textcat_Version(); | extern const char *textcat_Version(void); | |||
#ifdef __cplusplus | #ifdef __cplusplus | |||
} | } | |||
#endif | #endif | |||
#endif | #endif | |||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ | ||||
End of changes. 9 change blocks. | ||||
9 lines changed or deleted | 11 lines changed or added | |||
utf8misc.h | utf8misc.h | |||
---|---|---|---|---|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -* - */ | ||||
/************************************************************************** * | /************************************************************************** * | |||
* Copyright (C) 2006 by Jocelyn Merand * | * Copyright (C) 2006 by Jocelyn Merand * | |||
* joc.mer@gmail.com * | * joc.mer@gmail.com * | |||
* * | * * | |||
* THE BSD LICENSE | * THE BSD LICENSE | |||
* | * | |||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | |||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | |||
* are met: | * are met: | |||
* | * | |||
skipping to change at line 43 | skipping to change at line 44 | |||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
************************************************************************** */ | ************************************************************************** */ | |||
#ifndef _UTF8_MISC_H_ | #ifndef _UTF8_MISC_H_ | |||
#define _UTF8_MISC_H_ | #define _UTF8_MISC_H_ | |||
/** | /** | |||
* These variables are used in character processing functions | * These variables are used in character processing functions | |||
* These have been added to manage utf-8 symbols, particularly escape chars | * These have been added to manage utf-8 symbols, particularly escape chars | |||
*/ | */ | |||
#ifdef _UTF8_ | ||||
#define ESCAPE_MASK 0x80 | #define ESCAPE_MASK 0x80 | |||
#define WEIGHT_MASK 0xF0 | #define WEIGHT_MASK 0xF0 | |||
#else | ||||
#define ESCAPE_MASK 0xFF | #ifdef __cplusplus | |||
#define WEIGHT_MASK 0x00 | extern "C" | |||
{ | ||||
#endif | #endif | |||
/* | /* | |||
* Is used to jump to the next start of char | * Is used to jump to the next start of char | |||
* of course it's only usefull when encoding is utf-8 | * of course it's only usefull when encoding is utf-8 | |||
* This function have been added by Jocelyn Merand to use libtextcat in OOo | * This function have been added by Jocelyn Merand to use libtextcat in OOo | |||
*/ | */ | |||
int nextcharstart(const char *str, int position); | const char* utf8_next_char(const char *str); | |||
/*Copy the char in str to dest | /* Copy the char in str to dest of course it's only usefull when encoding i | |||
* of course it's only usefull when encoding is utf8 and the symbol is enco | s | |||
ded with more than 1 char | utf8 and the symbol is encoded with more than 1 char return the number o | |||
* return the number of char jumped | f | |||
* This function have been added by Jocelyn Merand to use libtextcat in OOo | char jumped This function have been added by Jocelyn Merand to use | |||
*/ | libtextcat in OOo */ | |||
int charcopy(const char *str, char *dest); | int charcopy(const char *str, char *dest); | |||
/* checks if n-gram lex is a prefix of key and of length len | /* checks if n-gram lex is a prefix of key and of length len | |||
* if _UTF8_ is defined, it uses escap characters and len is not realy the l | * len is the number of unicode code points | |||
ength of lex | * strlen("€") == 3 but len == 1 | |||
* in this case, len is the number of utf-8 char strlen("€") == 3 but len == | */ | |||
1 | int issame(char *lex, char *key, int len); | |||
*/ | ||||
int issame( char *lex, char *key, int len ); | /* | |||
* len is the number of unicode code points | ||||
/* Counts the number of characters | * strlen("€") == 3 but len == 1 | |||
* if _UTF8_ is defined, it uses escap characters and the result is not real | */ | |||
y the length of str | extern int utfstrlen(const char *str); | |||
* in this case, the result is the number of utf-8 char strlen("€") == 3 but | ||||
utfstrlen("€") == 1 | ||||
*/ | ||||
#ifdef __cplusplus | ||||
extern "C" { | ||||
#endif | ||||
extern int utfstrlen(const char* str); | ||||
#ifdef __cplusplus | #ifdef __cplusplus | |||
} | } | |||
#endif | #endif | |||
#endif | #endif | |||
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ | ||||
End of changes. 7 change blocks. | ||||
28 lines changed or deleted | 23 lines changed or added | |||