extractor.h   extractor.h 
/* /*
This file is part of libextractor. This file is part of libextractor.
(C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christ ian Grothoff (C) 2002-2013 Vidyut Samanta and Christian Grothoff
libextractor is free software; you can redistribute it and/or modify libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published it under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your by the Free Software Foundation; either version 3, or (at your
option) any later version. option) any later version.
libextractor is distributed in the hope that it will be useful, but libextractor is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details. General Public License for more details.
skipping to change at line 37 skipping to change at line 37
} }
#endif #endif
#endif #endif
#include <stdint.h> #include <stdint.h>
/** /**
* 0.2.6-1 => 0x00020601 * 0.2.6-1 => 0x00020601
* 4.5.2-0 => 0x04050200 * 4.5.2-0 => 0x04050200
*/ */
#define EXTRACTOR_VERSION 0x01010000 #define EXTRACTOR_VERSION 0x01020000
#include <stdio.h> #include <stdio.h>
/** /**
* Options for how plugin execution should be done. * Options for how plugin execution should be done.
*/ */
enum EXTRACTOR_Options enum EXTRACTOR_Options
{ {
/** /**
skipping to change at line 98 skipping to change at line 98
*/ */
EXTRACTOR_METAFORMAT_UTF8 = 1, EXTRACTOR_METAFORMAT_UTF8 = 1,
/** /**
* Some kind of binary format, see given Mime type. * Some kind of binary format, see given Mime type.
*/ */
EXTRACTOR_METAFORMAT_BINARY = 2, EXTRACTOR_METAFORMAT_BINARY = 2,
/** /**
* 0-terminated string. The specific encoding is unknown. * 0-terminated string. The specific encoding is unknown.
* "data_len" is strlen(data)+1. * "data_len" is strlen (data)+1.
*/ */
EXTRACTOR_METAFORMAT_C_STRING = 3 EXTRACTOR_METAFORMAT_C_STRING = 3
}; };
/** /**
* Enumeration defining various sources of keywords. See also * Enumeration defining various sources of keywords. See also
* http://dublincore.org/documents/1998/09/dces/ * http://dublincore.org/documents/1998/09/dces/
*
* @defgroup types meta data types
* @{
*/ */
enum EXTRACTOR_MetaType enum EXTRACTOR_MetaType
{ {
/* fundamental types */ /* fundamental types */
EXTRACTOR_METATYPE_RESERVED = 0, EXTRACTOR_METATYPE_RESERVED = 0,
EXTRACTOR_METATYPE_MIMETYPE = 1, EXTRACTOR_METATYPE_MIMETYPE = 1,
EXTRACTOR_METATYPE_FILENAME = 2, EXTRACTOR_METATYPE_FILENAME = 2,
EXTRACTOR_METATYPE_COMMENT = 3, EXTRACTOR_METATYPE_COMMENT = 3,
/* Standard types from bibtex */ /* Standard types from bibtex */
skipping to change at line 382 skipping to change at line 385
EXTRACTOR_METATYPE_TOC = 224, EXTRACTOR_METATYPE_TOC = 224,
EXTRACTOR_METATYPE_VIDEO_DURATION = 225, EXTRACTOR_METATYPE_VIDEO_DURATION = 225,
EXTRACTOR_METATYPE_AUDIO_DURATION = 226, EXTRACTOR_METATYPE_AUDIO_DURATION = 226,
EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227, EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227,
EXTRACTOR_METATYPE_LAST = 228 EXTRACTOR_METATYPE_LAST = 228
}; };
/** @} */ /* end of meta data types */
/** /**
* Get the textual name of the keyword. * Get the textual name of the keyword.
* *
* @param type meta type to get a UTF-8 string for * @param type meta type to get a UTF-8 string for
* @return NULL if the type is not known, otherwise * @return NULL if the type is not known, otherwise
* an English (locale: C) string describing the type; * an English (locale: C) string describing the type;
* translate using 'dgettext ("libextractor", rval)' * translate using `dgettext ("libextractor", rval)`
* @ingroup types
*/ */
const char * const char *
EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type); EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type);
/** /**
* Get a long description for the meta type. * Get a long description for the meta type.
* *
* @param type meta type to get a UTF-8 description for * @param type meta type to get a UTF-8 description for
* @return NULL if the type is not known, otherwise * @return NULL if the type is not known, otherwise
* an English (locale: C) string describing the type; * an English (locale: C) string describing the type;
* translate using 'dgettext ("libextractor", rval)' * translate using `dgettext ("libextractor", rval)`
* @ingroup types
*/ */
const char * const char *
EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type); EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type);
/** /**
* Return the highest type number, exclusive as in [0,max). * Return the highest type number, exclusive as in [0,max).
* *
* @return highest legal metatype number for this version of libextractor * @return highest legal metatype number for this version of libextractor
* @ingroup types
*/ */
enum EXTRACTOR_MetaType enum EXTRACTOR_MetaType
EXTRACTOR_metatype_get_max (void); EXTRACTOR_metatype_get_max (void);
/** /**
* Type of a function that libextractor calls for each * Type of a function that libextractor calls for each
* meta data item found. * meta data item found.
* *
* @param cls closure (user-defined) * @param cls closure (user-defined)
* @param plugin_name name of the plugin that produced this value; * @param plugin_name name of the plugin that produced this value;
* special values can be used (i.e. '&lt;zlib&gt;' for zlib being * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
* used in the main libextractor library and yielding * used in the main libextractor library and yielding
* meta data). * meta data).
* @param type libextractor-type describing the meta data * @param type libextractor-type describing the meta data
* @param format basic format information about data * @param format basic format information about @a data
* @param data_mime_type mime-type of data (not of the original file); * @param data_mime_type mime-type of @a data (not of the original file);
* can be NULL (if mime-type is not known) * can be NULL (if mime-type is not known)
* @param data actual meta-data found * @param data actual meta-data found
* @param data_len number of bytes in data * @param data_len number of bytes in @a data
* @return 0 to continue extracting, 1 to abort * @return 0 to continue extracting, 1 to abort
*/ */
typedef int (*EXTRACTOR_MetaDataProcessor) (void *cls, typedef int (*EXTRACTOR_MetaDataProcessor) (void *cls,
const char *plugin_name, const char *plugin_name,
enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaType type,
enum EXTRACTOR_MetaFormat format , enum EXTRACTOR_MetaFormat format ,
const char *data_mime_type, const char *data_mime_type,
const char *data, const char *data,
size_t data_len); size_t data_len);
skipping to change at line 454 skipping to change at line 462
* Closure argument to pass to all callbacks. * Closure argument to pass to all callbacks.
*/ */
void *cls; void *cls;
/** /**
* Configuration string for the plugin. * Configuration string for the plugin.
*/ */
const char *config; const char *config;
/** /**
* Obtain a pointer to up to 'size' bytes of data from the file to proces s. * Obtain a pointer to up to @a size bytes of data from the file to proce ss.
* *
* @param cls the 'cls' member of this struct * @param cls the @e cls member of this struct
* @param data pointer to set to the file data, set to NULL on error * @param data pointer to set to the file data, set to NULL on error
* @param size maximum number of bytes requested * @param size maximum number of bytes requested
* @return number of bytes now available in data (can be smaller than 'si ze'), * @return number of bytes now available in @a data (can be smaller than @a size),
* -1 on error * -1 on error
*/ */
ssize_t (*read) (void *cls, ssize_t (*read) (void *cls,
void **data, void **data,
size_t size); size_t size);
/** /**
* Seek in the file. Use 'SEEK_CUR' for whence and 'pos' of 0 to * Seek in the file. Use `SEEK_CUR` for @a whence and @a pos of 0 to
* obtain the current position in the file. * obtain the current position in the file.
* *
* @param cls the 'cls' member of this struct * @param cls the @e cls member of this struct
* @param pos position to seek (see 'man lseek') * @param pos position to seek (see 'man lseek')
* @param whence how to see (absolute to start, relative, absolute to end ) * @param whence how to see (absolute to start, relative, absolute to end )
* @return new absolute position, -1 on error (i.e. desired position * @return new absolute position, -1 on error (i.e. desired position
* does not exist) * does not exist)
*/ */
int64_t (*seek) (void *cls, int64_t (*seek) (void *cls,
int64_t pos, int64_t pos,
int whence); int whence);
/** /**
* Determine the overall size of the file. * Determine the overall size of the file.
* *
* @param cls the 'cls' member of this struct * @param cls the @a cls member of this struct
* @return overall file size, UINT64_MAX on error (i.e. IPC failure) * @return overall file size, `UINT64_MAX` on error (i.e. IPC failure)
*/ */
uint64_t (*get_size) (void *cls); uint64_t (*get_size) (void *cls);
/** /**
* Function to call on extracted data. * Function to call on extracted data.
*/ */
EXTRACTOR_MetaDataProcessor proc; EXTRACTOR_MetaDataProcessor proc;
}; };
skipping to change at line 507 skipping to change at line 515
* must provide. * must provide.
* *
* @param ec extraction context provided to the plugin * @param ec extraction context provided to the plugin
*/ */
typedef void (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext * ec); typedef void (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext * ec);
/** /**
* Linked list of extractor plugins. An application builds this list * Linked list of extractor plugins. An application builds this list
* by telling libextractor to load various keyword-extraction * by telling libextractor to load various keyword-extraction
* plugins. Libraries can also be unloaded (removed from this list, * plugins. Libraries can also be unloaded (removed from this list,
* see EXTRACTOR_plugin_remove). * see #EXTRACTOR_plugin_remove).
*/ */
struct EXTRACTOR_PluginList; struct EXTRACTOR_PluginList;
/** /**
* Load the default set of plugins. The default can be changed * Load the default set of plugins. The default can be changed
* by setting the LIBEXTRACTOR_LIBRARIES environment variable; * by setting the LIBEXTRACTOR_LIBRARIES environment variable;
* If it is set to "env", then this function will return * If it is set to "env", then this function will return
* EXTRACTOR_plugin_add_config (NULL, env, flags). * #EXTRACTOR_plugin_add_config (NULL, env, flags).
* *
* If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt * If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt
* to locate the installed plugins and load all of them. * to locate the installed plugins and load all of them.
* The directory where the code will search for plugins is typically * The directory where the code will search for plugins is typically
* automatically determined; it can be specified explicitly using the * automatically determined; it can be specified explicitly using the
* "LIBEXTRACTOR_PREFIX" environment variable. * "LIBEXTRACTOR_PREFIX" environment variable.
* *
* This environment variable must be set to the precise directory with * This environment variable must be set to the precise directory with
* the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that * the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that
* setting the environment variable will disable all of the methods * setting the environment variable will disable all of the methods
skipping to change at line 593 skipping to change at line 601
* *
* @param plugin the list of plugins * @param plugin the list of plugins
*/ */
void void
EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins); EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins);
/** /**
* Extract keywords from a file using the given set of plugins. * Extract keywords from a file using the given set of plugins.
* *
* @param plugins the list of plugins to use * @param plugins the list of plugins to use
* @param filename the name of the file, can be NULL if data is not NULL * @param filename the name of the file, can be NULL if @a data is not NULL
* @param data data of the file in memory, can be NULL (in which * @param data data of the file in memory, can be NULL (in which
* case libextractor will open file) if filename is not NULL * case libextractor will open file) if filename is not NULL
* @param size number of bytes in data, ignored if data is NULL * @param size number of bytes in @a data, ignored if @a data is NULL
* @param proc function to call for each meta data item found * @param proc function to call for each meta data item found
* @param proc_cls cls argument to proc * @param proc_cls cls argument to @a proc
*/ */
void void
EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
const char *filename, const char *filename,
const void *data, const void *data,
size_t size, size_t size,
EXTRACTOR_MetaDataProcessor proc, EXTRACTOR_MetaDataProcessor proc,
void *proc_cls); void *proc_cls);
/** /**
* Simple EXTRACTOR_MetaDataProcessor implementation that simply * Simple #EXTRACTOR_MetaDataProcessor implementation that simply
* prints the extracted meta data to the given file. Only prints * prints the extracted meta data to the given file. Only prints
* those keywords that are in UTF-8 format. * those keywords that are in UTF-8 format.
* *
* @param handle the file to write to (stdout, stderr), must NOT be NULL, * @param handle the file to write to (`stdout`, `stderr`), must NOT be NUL
* must be of type "FILE *". L,
* must be of type `FILE *`.
* @param plugin_name name of the plugin that produced this value * @param plugin_name name of the plugin that produced this value
* @param type libextractor-type describing the meta data * @param type libextractor-type describing the meta data
* @param format basic format information about data * @param format basic format information about data
* @param data_mime_type mime-type of data (not of the original file); * @param data_mime_type mime-type of @a data (not of the original file);
* can be NULL (if mime-type is not known) * can be NULL (if mime-type is not known)
* @param data actual meta-data found * @param data actual meta-data found
* @param data_len number of bytes in data * @param data_len number of bytes in @a data
* @return non-zero if printing failed, otherwise 0. * @return non-zero if printing failed, otherwise 0.
*/ */
int int
EXTRACTOR_meta_data_print (void * handle, EXTRACTOR_meta_data_print (void *handle,
const char *plugin_name, const char *plugin_name,
enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaType type,
enum EXTRACTOR_MetaFormat format, enum EXTRACTOR_MetaFormat format,
const char *data_mime_type, const char *data_mime_type,
const char *data, const char *data,
size_t data_len); size_t data_len);
#if 0 /* keep Emacsens' auto-indent happy */ #if 0 /* keep Emacsens' auto-indent happy */
{ {
#endif #endif
 End of changes. 26 change blocks. 
26 lines changed or deleted 35 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/