extractor.h   extractor.h 
/* /*
This file is part of libextractor. This file is part of libextractor.
(C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Gr othoff (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christ ian Grothoff
libextractor is free software; you can redistribute it and/or modify libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published it under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 2, or (at your by the Free Software Foundation; either version 3, or (at your
option) any later version. option) any later version.
libextractor is distributed in the hope that it will be useful, but libextractor is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details. General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with libextractor; see the file COPYING. If not, write to the along with libextractor; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 59 Temple Place - Suite 330, Free Software Foundation, Inc., 59 Temple Place - Suite 330,
skipping to change at line 31 skipping to change at line 31
#ifndef EXTRACTOR_H #ifndef EXTRACTOR_H
#define EXTRACTOR_H #define EXTRACTOR_H
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#if 0 /* keep Emacsens' auto-indent happy */ #if 0 /* keep Emacsens' auto-indent happy */
} }
#endif #endif
#endif #endif
#include <stdint.h>
/** /**
* 0.2.6-1 => 0x00020601 * 0.2.6-1 => 0x00020601
* 4.5.2-0 => 0x04050200 * 4.5.2-0 => 0x04050200
*/ */
#define EXTRACTOR_VERSION 0x00060300 #define EXTRACTOR_VERSION 0x01000000
#include <stdio.h> #include <stdio.h>
/** /**
* Options for how plugin execution should be done. * Options for how plugin execution should be done.
*/ */
enum EXTRACTOR_Options enum EXTRACTOR_Options
{ {
/** /**
* Run plugin out-of-process, starting the process once the plugin * Run plugin out-of-process, starting the process once the plugin
* is to be run. If a plugin crashes, automatically restart the * is to be run. If a plugin crashes, automatically restart the
* respective process for the same file and try once more * respective process for the same file and try once more
* (since the crash may be caused by the previous file). If * (since the crash may be caused by the previous file). If
* the process crashes immediately again, it is not restarted * the process crashes immediately again, it is not restarted
* until the next file. * until the next file.
*/ */
EXTRACTOR_OPTION_DEFAULT_POLICY = 0, EXTRACTOR_OPTION_DEFAULT_POLICY = 0,
/** /**
* Run plugins out-of-process, starting the process * Deprecated option. Ignored.
* once at the time the plugin is loaded. This will
* prevent the main process crashing if a plugin dies.
* Ignored on platforms where out-of-process starts
* are not supported (in-process execution will be
* attempted, unless the plugin itself forbids it).
*/ */
EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART = 1, EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART = 1,
/** /**
* Run plugins in-process. Unsafe, not recommended, * Run plugins in-process. Unsafe, not recommended,
* can be nice for debugging. * can be nice for debugging.
*/ */
EXTRACTOR_OPTION_IN_PROCESS = 2, EXTRACTOR_OPTION_IN_PROCESS = 2,
/** /**
skipping to change at line 104 skipping to change at line 101
/** /**
* Some kind of binary format, see given Mime type. * Some kind of binary format, see given Mime type.
*/ */
EXTRACTOR_METAFORMAT_BINARY = 2, EXTRACTOR_METAFORMAT_BINARY = 2,
/** /**
* 0-terminated string. The specific encoding is unknown. * 0-terminated string. The specific encoding is unknown.
* "data_len" is strlen(data)+1. * "data_len" is strlen(data)+1.
*/ */
EXTRACTOR_METAFORMAT_C_STRING = 3 EXTRACTOR_METAFORMAT_C_STRING = 3
}; };
/** /**
* Enumeration defining various sources of keywords. See also * Enumeration defining various sources of keywords. See also
* http://dublincore.org/documents/1998/09/dces/ * http://dublincore.org/documents/1998/09/dces/
*/ */
enum EXTRACTOR_MetaType enum EXTRACTOR_MetaType
{ {
/* fundamental types */ /* fundamental types */
EXTRACTOR_METATYPE_RESERVED = 0, EXTRACTOR_METATYPE_RESERVED = 0,
skipping to change at line 318 skipping to change at line 316
/* GNUnet specific values (never extracted) */ /* GNUnet specific values (never extracted) */
EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 173, EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 173,
EXTRACTOR_METATYPE_GNUNET_FULL_DATA = 174, EXTRACTOR_METATYPE_GNUNET_FULL_DATA = 174,
EXTRACTOR_METATYPE_RATING = 175, EXTRACTOR_METATYPE_RATING = 175,
EXTRACTOR_METATYPE_ORGANIZATION = 176, EXTRACTOR_METATYPE_ORGANIZATION = 176,
EXTRACTOR_METATYPE_RIPPER = 177, EXTRACTOR_METATYPE_RIPPER = 177,
EXTRACTOR_METATYPE_PRODUCER = 178, EXTRACTOR_METATYPE_PRODUCER = 178,
EXTRACTOR_METATYPE_GROUP = 179, EXTRACTOR_METATYPE_GROUP = 179,
EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME = 180, EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME = 180,
EXTRACTOR_METATYPE_LAST = 181 EXTRACTOR_METATYPE_DISC_COUNT = 181,
EXTRACTOR_METATYPE_CODEC = 182,
EXTRACTOR_METATYPE_VIDEO_CODEC = 183,
EXTRACTOR_METATYPE_AUDIO_CODEC = 184,
EXTRACTOR_METATYPE_SUBTITLE_CODEC = 185,
EXTRACTOR_METATYPE_CONTAINER_FORMAT = 186,
EXTRACTOR_METATYPE_BITRATE = 187,
EXTRACTOR_METATYPE_NOMINAL_BITRATE = 188,
EXTRACTOR_METATYPE_MINIMUM_BITRATE = 189,
EXTRACTOR_METATYPE_MAXIMUM_BITRATE = 190,
EXTRACTOR_METATYPE_SERIAL = 191,
EXTRACTOR_METATYPE_ENCODER = 192,
EXTRACTOR_METATYPE_ENCODER_VERSION = 193,
EXTRACTOR_METATYPE_TRACK_GAIN = 194,
EXTRACTOR_METATYPE_TRACK_PEAK = 195,
EXTRACTOR_METATYPE_ALBUM_GAIN = 196,
EXTRACTOR_METATYPE_ALBUM_PEAK = 197,
EXTRACTOR_METATYPE_REFERENCE_LEVEL = 198,
EXTRACTOR_METATYPE_LOCATION_NAME = 199,
EXTRACTOR_METATYPE_LOCATION_ELEVATION = 200,
EXTRACTOR_METATYPE_LOCATION_HORIZONTAL_ERROR = 201,
EXTRACTOR_METATYPE_LOCATION_MOVEMENT_SPEED = 202,
EXTRACTOR_METATYPE_LOCATION_MOVEMENT_DIRECTION = 203,
EXTRACTOR_METATYPE_LOCATION_CAPTURE_DIRECTION = 204,
EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER = 205,
EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER = 206,
EXTRACTOR_METATYPE_GROUPING = 207,
EXTRACTOR_METATYPE_DEVICE_MANUFACTURER = 208,
EXTRACTOR_METATYPE_DEVICE_MODEL = 209,
EXTRACTOR_METATYPE_AUDIO_LANGUAGE = 210,
EXTRACTOR_METATYPE_CHANNELS = 211,
EXTRACTOR_METATYPE_SAMPLE_RATE = 212,
EXTRACTOR_METATYPE_AUDIO_DEPTH = 213,
EXTRACTOR_METATYPE_AUDIO_BITRATE = 214,
EXTRACTOR_METATYPE_MAXIMUM_AUDIO_BITRATE = 215,
EXTRACTOR_METATYPE_VIDEO_DIMENSIONS = 216,
EXTRACTOR_METATYPE_VIDEO_DEPTH = 217,
EXTRACTOR_METATYPE_FRAME_RATE = 218,
EXTRACTOR_METATYPE_PIXEL_ASPECT_RATIO = 219,
EXTRACTOR_METATYPE_VIDEO_BITRATE = 220,
EXTRACTOR_METATYPE_MAXIMUM_VIDEO_BITRATE = 221,
EXTRACTOR_METATYPE_SUBTITLE_LANGUAGE = 222,
EXTRACTOR_METATYPE_VIDEO_LANGUAGE = 223,
EXTRACTOR_METATYPE_TOC = 224,
EXTRACTOR_METATYPE_VIDEO_DURATION = 225,
EXTRACTOR_METATYPE_AUDIO_DURATION = 226,
EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227,
EXTRACTOR_METATYPE_LAST = 228
}; };
/** /**
* Get the textual name of the keyword. * Get the textual name of the keyword.
* *
* @param type meta type to get a UTF-8 string for * @param type meta type to get a UTF-8 string for
* @return NULL if the type is not known, otherwise * @return NULL if the type is not known, otherwise
* an English (locale: C) string describing the type; * an English (locale: C) string describing the type;
* translate using 'dgettext ("libextractor", rval)' * translate using 'dgettext ("libextractor", rval)'
*/ */
const char * const char *
EXTRACTOR_metatype_to_string(enum EXTRACTOR_MetaType type); EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type);
/** /**
* Get a long description for the meta type. * Get a long description for the meta type.
* *
* @param type meta type to get a UTF-8 description for * @param type meta type to get a UTF-8 description for
* @return NULL if the type is not known, otherwise * @return NULL if the type is not known, otherwise
* an English (locale: C) string describing the type; * an English (locale: C) string describing the type;
* translate using 'dgettext ("libextractor", rval)' * translate using 'dgettext ("libextractor", rval)'
*/ */
const char * const char *
EXTRACTOR_metatype_to_description(enum EXTRACTOR_MetaType type); EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type);
/** /**
* Return the highest type number, exclusive as in [0,max). * Return the highest type number, exclusive as in [0,max).
* *
* @return highest legal metatype number for this version of libextractor * @return highest legal metatype number for this version of libextractor
*/ */
enum EXTRACTOR_MetaType enum EXTRACTOR_MetaType
EXTRACTOR_metatype_get_max (void); EXTRACTOR_metatype_get_max (void);
/** /**
* Type of a function that libextractor calls for each * Type of a function that libextractor calls for each
* meta data item found. * meta data item found.
* *
* @param cls closure (user-defined) * @param cls closure (user-defined)
* @param plugin_name name of the plugin that produced this value; * @param plugin_name name of the plugin that produced this value;
* special values can be used (i.e. '<zlib&gt;' for zlib being * special values can be used (i.e. '<zlib&gt;' for zlib being
* used in the main libextractor library and yielding * used in the main libextractor library and yielding
* meta data). * meta data).
* @param type libextractor-type describing the meta data * @param type libextractor-type describing the meta data
* @param format basic format information about data * @param format basic format information about data
* @param data_mime_type mime-type of data (not of the original file); * @param data_mime_type mime-type of data (not of the original file);
* can be NULL (if mime-type is not known) * can be NULL (if mime-type is not known)
* @param data actual meta-data found * @param data actual meta-data found
* @param data_len number of bytes in data * @param data_len number of bytes in data
* @return 0 to continue extracting, 1 to abort * @return 0 to continue extracting, 1 to abort
*/ */
typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls, typedef int (*EXTRACTOR_MetaDataProcessor) (void *cls,
const char *plugin_name, const char *plugin_name,
enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaType type,
enum EXTRACTOR_MetaFormat format, enum EXTRACTOR_MetaFormat format
const char *data_mime_type, ,
const char *data, const char *data_mime_type,
size_t data_len); const char *data,
size_t data_len);
/**
* Context provided for plugins that perform meta data extraction.
*/
struct EXTRACTOR_ExtractContext
{
/**
* Closure argument to pass to all callbacks.
*/
void *cls;
/**
* Configuration string for the plugin.
*/
const char *config;
/**
* Obtain a pointer to up to 'size' bytes of data from the file to proces
s.
*
* @param cls the 'cls' member of this struct
* @param data pointer to set to the file data, set to NULL on error
* @param size maximum number of bytes requested
* @return number of bytes now available in data (can be smaller than 'si
ze'),
* -1 on error
*/
ssize_t (*read) (void *cls,
void **data,
size_t size);
/**
* Seek in the file. Use 'SEEK_CUR' for whence and 'pos' of 0 to
* obtain the current position in the file.
*
* @param cls the 'cls' member of this struct
* @param pos position to seek (see 'man lseek')
* @param whence how to see (absolute to start, relative, absolute to end
)
* @return new absolute position, -1 on error (i.e. desired position
* does not exist)
*/
int64_t (*seek) (void *cls,
int64_t pos,
int whence);
/**
* Determine the overall size of the file.
*
* @param cls the 'cls' member of this struct
* @return overall file size, UINT64_MAX on error (i.e. IPC failure)
*/
uint64_t (*get_size) (void *cls);
/**
* Function to call on extracted data.
*/
EXTRACTOR_MetaDataProcessor proc;
};
/** /**
* Signature of the extract method that each plugin * Signature of the extract method that each plugin
* must provide. * must provide.
* *
* @param data data to process * @param ec extraction context provided to the plugin
* @param datasize number of bytes available in data
* @param proc function to call for meta data found
* @param proc_cls cls argument to proc
* @param options options for this plugin; can be NULL
* @return 0 if all calls to proc returned 0, otherwise 1
*/ */
typedef int (*EXTRACTOR_ExtractMethod)(const char *data, typedef void (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *
size_t datasize, ec);
EXTRACTOR_MetaDataProcessor proc,
void *proc_cls,
const char *options);
/** /**
* Linked list of extractor plugins. An application builds this list * Linked list of extractor plugins. An application builds this list
* by telling libextractor to load various keyword-extraction * by telling libextractor to load various keyword-extraction
* plugins. Libraries can also be unloaded (removed from this list, * plugins. Libraries can also be unloaded (removed from this list,
* see EXTRACTOR_plugin_remove). * see EXTRACTOR_plugin_remove).
*/ */
struct EXTRACTOR_PluginList; struct EXTRACTOR_PluginList;
/** /**
skipping to change at line 423 skipping to change at line 533
* This environment variable must be set to the precise directory with * This environment variable must be set to the precise directory with
* the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that * the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that
* setting the environment variable will disable all of the methods * setting the environment variable will disable all of the methods
* that are typically used to determine the location of plugins. * that are typically used to determine the location of plugins.
* Multiple paths can be specified using ':' to separate them. * Multiple paths can be specified using ':' to separate them.
* *
* @param flags options for all of the plugins loaded * @param flags options for all of the plugins loaded
* @return the default set of plugins, NULL if no plugins were found * @return the default set of plugins, NULL if no plugins were found
*/ */
struct EXTRACTOR_PluginList * struct EXTRACTOR_PluginList *
EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags); EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags);
/** /**
* Add a library for keyword extraction. * Add a library for keyword extraction.
* *
* @param prev the previous list of libraries, may be NULL * @param prev the previous list of libraries, may be NULL
* @param library the name of the library (short handle, i.e. "mime") * @param library the name of the library (short handle, i.e. "mime")
* @param options options to give to the library * @param options options to give to the library
* @param flags options to use * @param flags options to use
* @return the new list of libraries, equal to prev iff an error occured * @return the new list of libraries, equal to prev iff an error occured
*/ */
struct EXTRACTOR_PluginList * struct EXTRACTOR_PluginList *
EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev,
const char * library, const char *library,
const char *options, const char *options,
enum EXTRACTOR_Options flags); enum EXTRACTOR_Options flags);
/** /**
* Load multiple libraries as specified by the user. * Load multiple libraries as specified by the user.
* *
* @param config a string given by the user that defines which * @param config a string given by the user that defines which
* libraries should be loaded. Has the format * libraries should be loaded. Has the format
* "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*". * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
* For example, 'mp3:ogg' loads the * For example, 'mp3:ogg' loads the
* mp3 and the ogg plugins. The '-' before the LIBRARYNAME * mp3 and the ogg plugins. The '-' before the LIBRARYNAME
* indicates that the library should be removed from * indicates that the library should be removed from
* the library list. * the library list.
* @param prev the previous list of libraries, may be NULL * @param prev the previous list of libraries, may be NULL
* @param flags options to use * @param flags options to use
* @return the new list of libraries, equal to prev iff an error occured * @return the new list of libraries, equal to prev iff an error occured
* or if config was empty (or NULL). * or if config was empty (or NULL).
*/ */
struct EXTRACTOR_PluginList * struct EXTRACTOR_PluginList *
EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev,
const char *config, const char *config,
enum EXTRACTOR_Options flags); enum EXTRACTOR_Options flags);
/** /**
* Remove a plugin from a list. * Remove a plugin from a list.
* *
* @param prev the current list of plugins * @param prev the current list of plugins
* @param library the name of the plugin to remove (short handle) * @param library the name of the plugin to remove (short handle)
* @return the reduced list, unchanged if the plugin was not loaded * @return the reduced list, unchanged if the plugin was not loaded
*/ */
struct EXTRACTOR_PluginList * struct EXTRACTOR_PluginList *
EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, EXTRACTOR_plugin_remove (struct EXTRACTOR_PluginList *prev,
const char * library); const char *library);
/** /**
* Remove all plugins from the given list (destroys the list). * Remove all plugins from the given list (destroys the list).
* *
* @param plugin the list of plugins * @param plugin the list of plugins
*/ */
void void
EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins); EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins);
/** /**
* Extract keywords from a file using the given set of plugins. * Extract keywords from a file using the given set of plugins.
* *
* @param plugins the list of plugins to use * @param plugins the list of plugins to use
* @param filename the name of the file, can be NULL if data is not NULL * @param filename the name of the file, can be NULL if data is not NULL
* @param data data of the file in memory, can be NULL (in which * @param data data of the file in memory, can be NULL (in which
* case libextractor will open file) if filename is not NULL * case libextractor will open file) if filename is not NULL
* @param size number of bytes in data, ignored if data is NULL * @param size number of bytes in data, ignored if data is NULL
* @param proc function to call for each meta data item found * @param proc function to call for each meta data item found
* @param proc_cls cls argument to proc * @param proc_cls cls argument to proc
*/ */
void void
EXTRACTOR_extract(struct EXTRACTOR_PluginList *plugins, EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
const char *filename, const char *filename,
const void *data, const void *data,
size_t size, size_t size,
EXTRACTOR_MetaDataProcessor proc, EXTRACTOR_MetaDataProcessor proc,
void *proc_cls); void *proc_cls);
/** /**
* Simple EXTRACTOR_MetaDataProcessor implementation that simply * Simple EXTRACTOR_MetaDataProcessor implementation that simply
* prints the extracted meta data to the given file. Only prints * prints the extracted meta data to the given file. Only prints
* those keywords that are in UTF-8 format. * those keywords that are in UTF-8 format.
* *
* @param handle the file to write to (stdout, stderr), must NOT be NULL, * @param handle the file to write to (stdout, stderr), must NOT be NULL,
* must be of type "FILE *". * must be of type "FILE *".
* @param plugin_name name of the plugin that produced this value * @param plugin_name name of the plugin that produced this value
* @param type libextractor-type describing the meta data * @param type libextractor-type describing the meta data
* @param format basic format information about data * @param format basic format information about data
* @param data_mime_type mime-type of data (not of the original file); * @param data_mime_type mime-type of data (not of the original file);
* can be NULL (if mime-type is not known) * can be NULL (if mime-type is not known)
* @param data actual meta-data found * @param data actual meta-data found
* @param data_len number of bytes in data * @param data_len number of bytes in data
* @return non-zero if printing failed, otherwise 0. * @return non-zero if printing failed, otherwise 0.
*/ */
int int
EXTRACTOR_meta_data_print(void * handle, EXTRACTOR_meta_data_print (void * handle,
const char *plugin_name, const char *plugin_name,
enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaType type,
enum EXTRACTOR_MetaFormat format, enum EXTRACTOR_MetaFormat format,
const char *data_mime_type, const char *data_mime_type,
const char *data, const char *data,
size_t data_len); size_t data_len);
#if 0 /* keep Emacsens' auto-indent happy */ #if 0 /* keep Emacsens' auto-indent happy */
{ {
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif #endif
 End of changes. 20 change blocks. 
50 lines changed or deleted 165 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/