extractor.h | extractor.h | |||
---|---|---|---|---|
/* | /* | |||
This file is part of libextractor. | This file is part of libextractor. | |||
(C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christ ian Grothoff | (C) 2002-2013 Vidyut Samanta and Christian Grothoff | |||
libextractor is free software; you can redistribute it and/or modify | libextractor is free software; you can redistribute it and/or modify | |||
it under the terms of the GNU General Public License as published | it under the terms of the GNU General Public License as published | |||
by the Free Software Foundation; either version 3, or (at your | by the Free Software Foundation; either version 3, or (at your | |||
option) any later version. | option) any later version. | |||
libextractor is distributed in the hope that it will be useful, but | libextractor is distributed in the hope that it will be useful, but | |||
WITHOUT ANY WARRANTY; without even the implied warranty of | WITHOUT ANY WARRANTY; without even the implied warranty of | |||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
General Public License for more details. | General Public License for more details. | |||
skipping to change at line 37 | skipping to change at line 37 | |||
} | } | |||
#endif | #endif | |||
#endif | #endif | |||
#include <stdint.h> | #include <stdint.h> | |||
/** | /** | |||
* 0.2.6-1 => 0x00020601 | * 0.2.6-1 => 0x00020601 | |||
* 4.5.2-0 => 0x04050200 | * 4.5.2-0 => 0x04050200 | |||
*/ | */ | |||
#define EXTRACTOR_VERSION 0x01010000 | #define EXTRACTOR_VERSION 0x01020000 | |||
#include <stdio.h> | #include <stdio.h> | |||
/** | /** | |||
* Options for how plugin execution should be done. | * Options for how plugin execution should be done. | |||
*/ | */ | |||
enum EXTRACTOR_Options | enum EXTRACTOR_Options | |||
{ | { | |||
/** | /** | |||
skipping to change at line 98 | skipping to change at line 98 | |||
*/ | */ | |||
EXTRACTOR_METAFORMAT_UTF8 = 1, | EXTRACTOR_METAFORMAT_UTF8 = 1, | |||
/** | /** | |||
* Some kind of binary format, see given Mime type. | * Some kind of binary format, see given Mime type. | |||
*/ | */ | |||
EXTRACTOR_METAFORMAT_BINARY = 2, | EXTRACTOR_METAFORMAT_BINARY = 2, | |||
/** | /** | |||
* 0-terminated string. The specific encoding is unknown. | * 0-terminated string. The specific encoding is unknown. | |||
* "data_len" is strlen(data)+1. | * "data_len" is strlen (data)+1. | |||
*/ | */ | |||
EXTRACTOR_METAFORMAT_C_STRING = 3 | EXTRACTOR_METAFORMAT_C_STRING = 3 | |||
}; | }; | |||
/** | /** | |||
* Enumeration defining various sources of keywords. See also | * Enumeration defining various sources of keywords. See also | |||
* http://dublincore.org/documents/1998/09/dces/ | * http://dublincore.org/documents/1998/09/dces/ | |||
* | ||||
* @defgroup types meta data types | ||||
* @{ | ||||
*/ | */ | |||
enum EXTRACTOR_MetaType | enum EXTRACTOR_MetaType | |||
{ | { | |||
/* fundamental types */ | /* fundamental types */ | |||
EXTRACTOR_METATYPE_RESERVED = 0, | EXTRACTOR_METATYPE_RESERVED = 0, | |||
EXTRACTOR_METATYPE_MIMETYPE = 1, | EXTRACTOR_METATYPE_MIMETYPE = 1, | |||
EXTRACTOR_METATYPE_FILENAME = 2, | EXTRACTOR_METATYPE_FILENAME = 2, | |||
EXTRACTOR_METATYPE_COMMENT = 3, | EXTRACTOR_METATYPE_COMMENT = 3, | |||
/* Standard types from bibtex */ | /* Standard types from bibtex */ | |||
skipping to change at line 382 | skipping to change at line 385 | |||
EXTRACTOR_METATYPE_TOC = 224, | EXTRACTOR_METATYPE_TOC = 224, | |||
EXTRACTOR_METATYPE_VIDEO_DURATION = 225, | EXTRACTOR_METATYPE_VIDEO_DURATION = 225, | |||
EXTRACTOR_METATYPE_AUDIO_DURATION = 226, | EXTRACTOR_METATYPE_AUDIO_DURATION = 226, | |||
EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227, | EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227, | |||
EXTRACTOR_METATYPE_LAST = 228 | EXTRACTOR_METATYPE_LAST = 228 | |||
}; | }; | |||
/** @} */ /* end of meta data types */ | ||||
/** | /** | |||
* Get the textual name of the keyword. | * Get the textual name of the keyword. | |||
* | * | |||
* @param type meta type to get a UTF-8 string for | * @param type meta type to get a UTF-8 string for | |||
* @return NULL if the type is not known, otherwise | * @return NULL if the type is not known, otherwise | |||
* an English (locale: C) string describing the type; | * an English (locale: C) string describing the type; | |||
* translate using 'dgettext ("libextractor", rval)' | * translate using `dgettext ("libextractor", rval)` | |||
* @ingroup types | ||||
*/ | */ | |||
const char * | const char * | |||
EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type); | EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type); | |||
/** | /** | |||
* Get a long description for the meta type. | * Get a long description for the meta type. | |||
* | * | |||
* @param type meta type to get a UTF-8 description for | * @param type meta type to get a UTF-8 description for | |||
* @return NULL if the type is not known, otherwise | * @return NULL if the type is not known, otherwise | |||
* an English (locale: C) string describing the type; | * an English (locale: C) string describing the type; | |||
* translate using 'dgettext ("libextractor", rval)' | * translate using `dgettext ("libextractor", rval)` | |||
* @ingroup types | ||||
*/ | */ | |||
const char * | const char * | |||
EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type); | EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type); | |||
/** | /** | |||
* Return the highest type number, exclusive as in [0,max). | * Return the highest type number, exclusive as in [0,max). | |||
* | * | |||
* @return highest legal metatype number for this version of libextractor | * @return highest legal metatype number for this version of libextractor | |||
* @ingroup types | ||||
*/ | */ | |||
enum EXTRACTOR_MetaType | enum EXTRACTOR_MetaType | |||
EXTRACTOR_metatype_get_max (void); | EXTRACTOR_metatype_get_max (void); | |||
/** | /** | |||
* Type of a function that libextractor calls for each | * Type of a function that libextractor calls for each | |||
* meta data item found. | * meta data item found. | |||
* | * | |||
* @param cls closure (user-defined) | * @param cls closure (user-defined) | |||
* @param plugin_name name of the plugin that produced this value; | * @param plugin_name name of the plugin that produced this value; | |||
* special values can be used (i.e. '<zlib>' for zlib being | * special values can be used (i.e. '<zlib>' for zlib being | |||
* used in the main libextractor library and yielding | * used in the main libextractor library and yielding | |||
* meta data). | * meta data). | |||
* @param type libextractor-type describing the meta data | * @param type libextractor-type describing the meta data | |||
* @param format basic format information about data | * @param format basic format information about @a data | |||
* @param data_mime_type mime-type of data (not of the original file); | * @param data_mime_type mime-type of @a data (not of the original file); | |||
* can be NULL (if mime-type is not known) | * can be NULL (if mime-type is not known) | |||
* @param data actual meta-data found | * @param data actual meta-data found | |||
* @param data_len number of bytes in data | * @param data_len number of bytes in @a data | |||
* @return 0 to continue extracting, 1 to abort | * @return 0 to continue extracting, 1 to abort | |||
*/ | */ | |||
typedef int (*EXTRACTOR_MetaDataProcessor) (void *cls, | typedef int (*EXTRACTOR_MetaDataProcessor) (void *cls, | |||
const char *plugin_name, | const char *plugin_name, | |||
enum EXTRACTOR_MetaType type, | enum EXTRACTOR_MetaType type, | |||
enum EXTRACTOR_MetaFormat format , | enum EXTRACTOR_MetaFormat format , | |||
const char *data_mime_type, | const char *data_mime_type, | |||
const char *data, | const char *data, | |||
size_t data_len); | size_t data_len); | |||
skipping to change at line 454 | skipping to change at line 462 | |||
* Closure argument to pass to all callbacks. | * Closure argument to pass to all callbacks. | |||
*/ | */ | |||
void *cls; | void *cls; | |||
/** | /** | |||
* Configuration string for the plugin. | * Configuration string for the plugin. | |||
*/ | */ | |||
const char *config; | const char *config; | |||
/** | /** | |||
* Obtain a pointer to up to 'size' bytes of data from the file to proces s. | * Obtain a pointer to up to @a size bytes of data from the file to proce ss. | |||
* | * | |||
* @param cls the 'cls' member of this struct | * @param cls the @e cls member of this struct | |||
* @param data pointer to set to the file data, set to NULL on error | * @param data pointer to set to the file data, set to NULL on error | |||
* @param size maximum number of bytes requested | * @param size maximum number of bytes requested | |||
* @return number of bytes now available in data (can be smaller than 'si ze'), | * @return number of bytes now available in @a data (can be smaller than @a size), | |||
* -1 on error | * -1 on error | |||
*/ | */ | |||
ssize_t (*read) (void *cls, | ssize_t (*read) (void *cls, | |||
void **data, | void **data, | |||
size_t size); | size_t size); | |||
/** | /** | |||
* Seek in the file. Use 'SEEK_CUR' for whence and 'pos' of 0 to | * Seek in the file. Use `SEEK_CUR` for @a whence and @a pos of 0 to | |||
* obtain the current position in the file. | * obtain the current position in the file. | |||
* | * | |||
* @param cls the 'cls' member of this struct | * @param cls the @e cls member of this struct | |||
* @param pos position to seek (see 'man lseek') | * @param pos position to seek (see 'man lseek') | |||
* @param whence how to see (absolute to start, relative, absolute to end ) | * @param whence how to see (absolute to start, relative, absolute to end ) | |||
* @return new absolute position, -1 on error (i.e. desired position | * @return new absolute position, -1 on error (i.e. desired position | |||
* does not exist) | * does not exist) | |||
*/ | */ | |||
int64_t (*seek) (void *cls, | int64_t (*seek) (void *cls, | |||
int64_t pos, | int64_t pos, | |||
int whence); | int whence); | |||
/** | /** | |||
* Determine the overall size of the file. | * Determine the overall size of the file. | |||
* | * | |||
* @param cls the 'cls' member of this struct | * @param cls the @a cls member of this struct | |||
* @return overall file size, UINT64_MAX on error (i.e. IPC failure) | * @return overall file size, `UINT64_MAX` on error (i.e. IPC failure) | |||
*/ | */ | |||
uint64_t (*get_size) (void *cls); | uint64_t (*get_size) (void *cls); | |||
/** | /** | |||
* Function to call on extracted data. | * Function to call on extracted data. | |||
*/ | */ | |||
EXTRACTOR_MetaDataProcessor proc; | EXTRACTOR_MetaDataProcessor proc; | |||
}; | }; | |||
skipping to change at line 507 | skipping to change at line 515 | |||
* must provide. | * must provide. | |||
* | * | |||
* @param ec extraction context provided to the plugin | * @param ec extraction context provided to the plugin | |||
*/ | */ | |||
typedef void (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext * ec); | typedef void (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext * ec); | |||
/** | /** | |||
* Linked list of extractor plugins. An application builds this list | * Linked list of extractor plugins. An application builds this list | |||
* by telling libextractor to load various keyword-extraction | * by telling libextractor to load various keyword-extraction | |||
* plugins. Libraries can also be unloaded (removed from this list, | * plugins. Libraries can also be unloaded (removed from this list, | |||
* see EXTRACTOR_plugin_remove). | * see #EXTRACTOR_plugin_remove). | |||
*/ | */ | |||
struct EXTRACTOR_PluginList; | struct EXTRACTOR_PluginList; | |||
/** | /** | |||
* Load the default set of plugins. The default can be changed | * Load the default set of plugins. The default can be changed | |||
* by setting the LIBEXTRACTOR_LIBRARIES environment variable; | * by setting the LIBEXTRACTOR_LIBRARIES environment variable; | |||
* If it is set to "env", then this function will return | * If it is set to "env", then this function will return | |||
* EXTRACTOR_plugin_add_config (NULL, env, flags). | * #EXTRACTOR_plugin_add_config (NULL, env, flags). | |||
* | * | |||
* If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt | * If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt | |||
* to locate the installed plugins and load all of them. | * to locate the installed plugins and load all of them. | |||
* The directory where the code will search for plugins is typically | * The directory where the code will search for plugins is typically | |||
* automatically determined; it can be specified explicitly using the | * automatically determined; it can be specified explicitly using the | |||
* "LIBEXTRACTOR_PREFIX" environment variable. | * "LIBEXTRACTOR_PREFIX" environment variable. | |||
* | * | |||
* This environment variable must be set to the precise directory with | * This environment variable must be set to the precise directory with | |||
* the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that | * the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that | |||
* setting the environment variable will disable all of the methods | * setting the environment variable will disable all of the methods | |||
skipping to change at line 593 | skipping to change at line 601 | |||
* | * | |||
* @param plugin the list of plugins | * @param plugin the list of plugins | |||
*/ | */ | |||
void | void | |||
EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins); | EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins); | |||
/** | /** | |||
* Extract keywords from a file using the given set of plugins. | * Extract keywords from a file using the given set of plugins. | |||
* | * | |||
* @param plugins the list of plugins to use | * @param plugins the list of plugins to use | |||
* @param filename the name of the file, can be NULL if data is not NULL | * @param filename the name of the file, can be NULL if @a data is not NULL | |||
* @param data data of the file in memory, can be NULL (in which | * @param data data of the file in memory, can be NULL (in which | |||
* case libextractor will open file) if filename is not NULL | * case libextractor will open file) if filename is not NULL | |||
* @param size number of bytes in data, ignored if data is NULL | * @param size number of bytes in @a data, ignored if @a data is NULL | |||
* @param proc function to call for each meta data item found | * @param proc function to call for each meta data item found | |||
* @param proc_cls cls argument to proc | * @param proc_cls cls argument to @a proc | |||
*/ | */ | |||
void | void | |||
EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, | EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, | |||
const char *filename, | const char *filename, | |||
const void *data, | const void *data, | |||
size_t size, | size_t size, | |||
EXTRACTOR_MetaDataProcessor proc, | EXTRACTOR_MetaDataProcessor proc, | |||
void *proc_cls); | void *proc_cls); | |||
/** | /** | |||
* Simple EXTRACTOR_MetaDataProcessor implementation that simply | * Simple #EXTRACTOR_MetaDataProcessor implementation that simply | |||
* prints the extracted meta data to the given file. Only prints | * prints the extracted meta data to the given file. Only prints | |||
* those keywords that are in UTF-8 format. | * those keywords that are in UTF-8 format. | |||
* | * | |||
* @param handle the file to write to (stdout, stderr), must NOT be NULL, | * @param handle the file to write to (`stdout`, `stderr`), must NOT be NUL | |||
* must be of type "FILE *". | L, | |||
* must be of type `FILE *`. | ||||
* @param plugin_name name of the plugin that produced this value | * @param plugin_name name of the plugin that produced this value | |||
* @param type libextractor-type describing the meta data | * @param type libextractor-type describing the meta data | |||
* @param format basic format information about data | * @param format basic format information about data | |||
* @param data_mime_type mime-type of data (not of the original file); | * @param data_mime_type mime-type of @a data (not of the original file); | |||
* can be NULL (if mime-type is not known) | * can be NULL (if mime-type is not known) | |||
* @param data actual meta-data found | * @param data actual meta-data found | |||
* @param data_len number of bytes in data | * @param data_len number of bytes in @a data | |||
* @return non-zero if printing failed, otherwise 0. | * @return non-zero if printing failed, otherwise 0. | |||
*/ | */ | |||
int | int | |||
EXTRACTOR_meta_data_print (void * handle, | EXTRACTOR_meta_data_print (void *handle, | |||
const char *plugin_name, | const char *plugin_name, | |||
enum EXTRACTOR_MetaType type, | enum EXTRACTOR_MetaType type, | |||
enum EXTRACTOR_MetaFormat format, | enum EXTRACTOR_MetaFormat format, | |||
const char *data_mime_type, | const char *data_mime_type, | |||
const char *data, | const char *data, | |||
size_t data_len); | size_t data_len); | |||
#if 0 /* keep Emacsens' auto-indent happy */ | #if 0 /* keep Emacsens' auto-indent happy */ | |||
{ | { | |||
#endif | #endif | |||
End of changes. 26 change blocks. | ||||
26 lines changed or deleted | 35 lines changed or added | |||