extractor.h   extractor.h 
/* /*
This file is part of libextractor. This file is part of libextractor.
(C) 2002, 2003, 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Gr othoff
libextractor is free software; you can redistribute it and/or modify libextractor is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published it under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 2, or (at your by the Free Software Foundation; either version 2, or (at your
option) any later version. option) any later version.
libextractor is distributed in the hope that it will be useful, but libextractor is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details. General Public License for more details.
skipping to change at line 35 skipping to change at line 35
extern "C" { extern "C" {
#if 0 /* keep Emacsens' auto-indent happy */ #if 0 /* keep Emacsens' auto-indent happy */
} }
#endif #endif
#endif #endif
/** /**
* 0.2.6-1 => 0x00020601 * 0.2.6-1 => 0x00020601
* 4.5.2-0 => 0x04050200 * 4.5.2-0 => 0x04050200
*/ */
#define EXTRACTOR_VERSION 0x00052300 #define EXTRACTOR_VERSION 0x00060000
#include <stdio.h> #include <stdio.h>
/* ignore the 'type' of the keyword when eliminating duplicates */ /**
#define EXTRACTOR_DUPLICATES_TYPELESS 1 * Options for how plugin execution should be done.
/* remove type 'UNKNOWN' if there is a duplicate keyword of */
known type, even if usually different types should be enum EXTRACTOR_Options
preserved */ {
#define EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN 2
#define EXTRACTOR_DEFAULT_LIBRARIES EXTRACTOR_getDefaultLibraries()
const char * EXTRACTOR_getDefaultLibraries(void); /**
* Run plugin out-of-process, starting the process once the plugin
* is to be run. If a plugin crashes, automatically restart the
* respective process for the same file and try once more
* (since the crash may be caused by the previous file). If
* the process crashes immediately again, it is not restarted
* until the next file.
*/
EXTRACTOR_OPTION_DEFAULT_POLICY = 0,
/**
* Run plugins out-of-process, starting the process
* once at the time the plugin is loaded. This will
* prevent the main process crashing if a plugin dies.
* Ignored on platforms where out-of-process starts
* are not supported (in-process execution will be
* attempted, unless the plugin itself forbids it).
*/
EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART = 1,
/**
* Run plugins in-process. Unsafe, not recommended,
* can be nice for debugging.
*/
EXTRACTOR_OPTION_IN_PROCESS = 2,
/**
* Internal value for plugins that have been disabled.
*/
EXTRACTOR_OPTION_DISABLED = 3
};
/**
* Format in which the extracted meta data is presented.
*/
enum EXTRACTOR_MetaFormat
{
/**
* Format is unknown.
*/
EXTRACTOR_METAFORMAT_UNKNOWN = 0,
/**
* 0-terminated, UTF-8 encoded string. "data_len"
* is strlen(data)+1.
*/
EXTRACTOR_METAFORMAT_UTF8 = 1,
/**
* Some kind of binary format, see given Mime type.
*/
EXTRACTOR_METAFORMAT_BINARY = 2,
/**
* 0-terminated string. The specific encoding is unknown.
* "data_len" is strlen(data)+1.
*/
EXTRACTOR_METAFORMAT_C_STRING = 3
};
/** /**
* Enumeration defining various sources of keywords. * Enumeration defining various sources of keywords. See also
* See also
* http://dublincore.org/documents/1998/09/dces/ * http://dublincore.org/documents/1998/09/dces/
*/ */
typedef enum { enum EXTRACTOR_MetaType
EXTRACTOR_UNKNOWN = 0, {
EXTRACTOR_FILENAME = 1, /* fundamental types */
EXTRACTOR_MIMETYPE = 2, EXTRACTOR_METATYPE_RESERVED = 0,
EXTRACTOR_TITLE = 3, EXTRACTOR_METATYPE_MIMETYPE = 1,
EXTRACTOR_AUTHOR = 4, EXTRACTOR_METATYPE_FILENAME = 2,
EXTRACTOR_ARTIST = 5, EXTRACTOR_METATYPE_COMMENT = 3,
EXTRACTOR_DESCRIPTION = 6,
EXTRACTOR_COMMENT = 7, /* Standard types from bibtex */
EXTRACTOR_DATE = 8, EXTRACTOR_METATYPE_TITLE = 4,
EXTRACTOR_PUBLISHER = 9, EXTRACTOR_METATYPE_BOOK_TITLE = 5,
EXTRACTOR_LANGUAGE = 10, EXTRACTOR_METATYPE_BOOK_EDITION = 6,
EXTRACTOR_ALBUM = 11, EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER = 7,
EXTRACTOR_GENRE = 12, EXTRACTOR_METATYPE_JOURNAL_NAME = 8,
EXTRACTOR_LOCATION = 13, EXTRACTOR_METATYPE_JOURNAL_VOLUME = 9,
EXTRACTOR_VERSIONNUMBER = 14, EXTRACTOR_METATYPE_JOURNAL_NUMBER = 10,
EXTRACTOR_ORGANIZATION = 15, EXTRACTOR_METATYPE_PAGE_COUNT = 11,
EXTRACTOR_COPYRIGHT = 16, EXTRACTOR_METATYPE_PAGE_RANGE = 12,
EXTRACTOR_SUBJECT = 17, EXTRACTOR_METATYPE_AUTHOR_NAME = 13,
EXTRACTOR_KEYWORDS = 18, EXTRACTOR_METATYPE_AUTHOR_EMAIL = 14,
EXTRACTOR_CONTRIBUTOR = 19, EXTRACTOR_METATYPE_AUTHOR_INSTITUTION = 15,
EXTRACTOR_RESOURCE_TYPE = 20, EXTRACTOR_METATYPE_PUBLISHER = 16,
EXTRACTOR_FORMAT = 21, EXTRACTOR_METATYPE_PUBLISHER_ADDRESS = 17,
EXTRACTOR_RESOURCE_IDENTIFIER = 22, EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION = 18,
EXTRACTOR_SOURCE = 23, EXTRACTOR_METATYPE_PUBLISHER_SERIES = 19,
EXTRACTOR_RELATION = 24, EXTRACTOR_METATYPE_PUBLICATION_TYPE = 20,
EXTRACTOR_COVERAGE = 25, EXTRACTOR_METATYPE_PUBLICATION_YEAR = 21,
EXTRACTOR_SOFTWARE = 26, EXTRACTOR_METATYPE_PUBLICATION_MONTH = 22,
EXTRACTOR_DISCLAIMER = 27, EXTRACTOR_METATYPE_PUBLICATION_DAY = 23,
EXTRACTOR_WARNING = 28, EXTRACTOR_METATYPE_PUBLICATION_DATE = 24,
EXTRACTOR_TRANSLATED = 29, EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25,
EXTRACTOR_CREATION_DATE = 30, EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26,
EXTRACTOR_MODIFICATION_DATE = 31, EXTRACTOR_METATYPE_LANGUAGE = 27,
EXTRACTOR_CREATOR = 32, EXTRACTOR_METATYPE_CREATION_TIME = 28,
EXTRACTOR_PRODUCER = 33, EXTRACTOR_METATYPE_URL = 29,
EXTRACTOR_PAGE_COUNT = 34,
EXTRACTOR_PAGE_ORIENTATION = 35, /* "unique" document identifiers */
EXTRACTOR_PAPER_SIZE = 36, EXTRACTOR_METATYPE_URI = 30,
EXTRACTOR_USED_FONTS = 37, EXTRACTOR_METATYPE_ISRC = 31,
EXTRACTOR_PAGE_ORDER = 38, EXTRACTOR_METATYPE_HASH_MD4 = 32,
EXTRACTOR_CREATED_FOR = 39, EXTRACTOR_METATYPE_HASH_MD5 = 33,
EXTRACTOR_MAGNIFICATION = 40, EXTRACTOR_METATYPE_HASH_SHA0 = 34,
EXTRACTOR_RELEASE = 41, EXTRACTOR_METATYPE_HASH_SHA1 = 35,
EXTRACTOR_GROUP = 42, EXTRACTOR_METATYPE_HASH_RMD160 = 36,
EXTRACTOR_SIZE = 43,
EXTRACTOR_SUMMARY = 44, /* identifiers of a location */
EXTRACTOR_PACKAGER = 45, EXTRACTOR_METATYPE_GPS_LATITUDE_REF = 37,
EXTRACTOR_VENDOR = 46, EXTRACTOR_METATYPE_GPS_LATITUDE = 38,
EXTRACTOR_LICENSE = 47, EXTRACTOR_METATYPE_GPS_LONGITUDE_REF = 39,
EXTRACTOR_DISTRIBUTION = 48, EXTRACTOR_METATYPE_GPS_LONGITUDE = 40,
EXTRACTOR_BUILDHOST = 49, EXTRACTOR_METATYPE_LOCATION_CITY = 41,
EXTRACTOR_OS = 50, EXTRACTOR_METATYPE_LOCATION_SUBLOCATION = 42,
EXTRACTOR_DEPENDENCY = 51, EXTRACTOR_METATYPE_LOCATION_COUNTRY = 43,
EXTRACTOR_HASH_MD4 = 52, EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE = 44,
EXTRACTOR_HASH_MD5 = 53,
EXTRACTOR_HASH_SHA0 = 54, /* generic attributes */
EXTRACTOR_HASH_SHA1 = 55, EXTRACTOR_METATYPE_UNKNOWN = 45,
EXTRACTOR_HASH_RMD160 = 56, EXTRACTOR_METATYPE_DESCRIPTION = 46,
EXTRACTOR_RESOLUTION = 57, EXTRACTOR_METATYPE_COPYRIGHT = 47,
EXTRACTOR_CATEGORY = 58, EXTRACTOR_METATYPE_RIGHTS = 48,
EXTRACTOR_BOOKTITLE = 59, EXTRACTOR_METATYPE_KEYWORDS = 49,
EXTRACTOR_PRIORITY = 60, EXTRACTOR_METATYPE_ABSTRACT = 50,
EXTRACTOR_CONFLICTS = 61, EXTRACTOR_METATYPE_SUMMARY = 51,
EXTRACTOR_REPLACES = 62, EXTRACTOR_METATYPE_SUBJECT = 52,
EXTRACTOR_PROVIDES = 63, EXTRACTOR_METATYPE_CREATOR = 53,
EXTRACTOR_CONDUCTOR = 64, EXTRACTOR_METATYPE_FORMAT = 54,
EXTRACTOR_INTERPRET = 65, EXTRACTOR_METATYPE_FORMAT_VERSION = 55,
EXTRACTOR_OWNER = 66,
EXTRACTOR_LYRICS = 67, /* processing history */
EXTRACTOR_MEDIA_TYPE = 68, EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE = 56,
EXTRACTOR_CONTACT = 69, EXTRACTOR_METATYPE_UNKNOWN_DATE = 57,
EXTRACTOR_THUMBNAIL_DATA = 70, EXTRACTOR_METATYPE_CREATION_DATE = 58,
EXTRACTOR_PUBLICATION_DATE = 71, EXTRACTOR_METATYPE_MODIFICATION_DATE = 59,
EXTRACTOR_CAMERA_MAKE = 72, EXTRACTOR_METATYPE_LAST_PRINTED = 60,
EXTRACTOR_CAMERA_MODEL = 73, EXTRACTOR_METATYPE_LAST_SAVED_BY = 61,
EXTRACTOR_EXPOSURE = 74, EXTRACTOR_METATYPE_TOTAL_EDITING_TIME = 62,
EXTRACTOR_APERTURE = 75, EXTRACTOR_METATYPE_EDITING_CYCLES = 63,
EXTRACTOR_EXPOSURE_BIAS = 76, EXTRACTOR_METATYPE_MODIFIED_BY_SOFTWARE = 64,
EXTRACTOR_FLASH = 77, EXTRACTOR_METATYPE_REVISION_HISTORY = 65,
EXTRACTOR_FLASH_BIAS = 78,
EXTRACTOR_FOCAL_LENGTH = 79, EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE = 66,
EXTRACTOR_FOCAL_LENGTH_35MM = 80, EXTRACTOR_METATYPE_FINDER_FILE_TYPE = 67,
EXTRACTOR_ISO_SPEED = 81, EXTRACTOR_METATYPE_FINDER_FILE_CREATOR = 68,
EXTRACTOR_EXPOSURE_MODE = 82,
EXTRACTOR_METERING_MODE = 83, /* software package specifics (deb, rpm, tgz, elf) */
EXTRACTOR_MACRO_MODE = 84, EXTRACTOR_METATYPE_PACKAGE_NAME = 69,
EXTRACTOR_IMAGE_QUALITY = 85, EXTRACTOR_METATYPE_PACKAGE_VERSION = 70,
EXTRACTOR_WHITE_BALANCE = 86, EXTRACTOR_METATYPE_SECTION = 71,
EXTRACTOR_ORIENTATION = 87, EXTRACTOR_METATYPE_UPLOAD_PRIORITY = 72,
EXTRACTOR_TEMPLATE = 88, EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY = 73,
EXTRACTOR_SPLIT = 89, EXTRACTOR_METATYPE_PACKAGE_CONFLICTS = 74,
EXTRACTOR_PRODUCTVERSION = 90, EXTRACTOR_METATYPE_PACKAGE_REPLACES = 75,
EXTRACTOR_LAST_SAVED_BY = 91, EXTRACTOR_METATYPE_PACKAGE_PROVIDES = 76,
EXTRACTOR_LAST_PRINTED = 92, EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS = 77,
EXTRACTOR_WORD_COUNT = 93, EXTRACTOR_METATYPE_PACKAGE_SUGGESTS = 78,
EXTRACTOR_CHARACTER_COUNT = 94, EXTRACTOR_METATYPE_PACKAGE_MAINTAINER = 79,
EXTRACTOR_TOTAL_EDITING_TIME = 95, EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE = 80,
EXTRACTOR_THUMBNAILS = 96, EXTRACTOR_METATYPE_PACKAGE_SOURCE = 81,
EXTRACTOR_SECURITY = 97, EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL = 82,
EXTRACTOR_CREATED_BY_SOFTWARE = 98, EXTRACTOR_METATYPE_TARGET_ARCHITECTURE = 83,
EXTRACTOR_MODIFIED_BY_SOFTWARE = 99, EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY = 84,
EXTRACTOR_REVISION_HISTORY = 100, EXTRACTOR_METATYPE_LICENSE = 85,
EXTRACTOR_LOWERCASE = 101, EXTRACTOR_METATYPE_PACKAGE_DISTRIBUTION = 86,
EXTRACTOR_COMPANY = 102, EXTRACTOR_METATYPE_BUILDHOST = 87,
EXTRACTOR_GENERATOR = 103, EXTRACTOR_METATYPE_VENDOR = 88,
EXTRACTOR_CHARACTER_SET = 104, EXTRACTOR_METATYPE_TARGET_OS = 89,
EXTRACTOR_LINE_COUNT = 105, EXTRACTOR_METATYPE_SOFTWARE_VERSION = 90,
EXTRACTOR_PARAGRAPH_COUNT = 106, EXTRACTOR_METATYPE_TARGET_PLATFORM = 91,
EXTRACTOR_EDITING_CYCLES = 107, EXTRACTOR_METATYPE_RESOURCE_TYPE = 92,
EXTRACTOR_SCALE = 108, EXTRACTOR_METATYPE_LIBRARY_SEARCH_PATH = 93,
EXTRACTOR_MANAGER = 109, EXTRACTOR_METATYPE_LIBRARY_DEPENDENCY = 94,
EXTRACTOR_MOVIE_DIRECTOR = 110,
EXTRACTOR_DURATION = 111, /* photography specifics */
EXTRACTOR_INFORMATION = 112, EXTRACTOR_METATYPE_CAMERA_MAKE = 95,
EXTRACTOR_FULL_NAME = 113, EXTRACTOR_METATYPE_CAMERA_MODEL = 96,
EXTRACTOR_CHAPTER = 114, EXTRACTOR_METATYPE_EXPOSURE = 97,
EXTRACTOR_YEAR = 115, EXTRACTOR_METATYPE_APERTURE = 98,
EXTRACTOR_LINK = 116, EXTRACTOR_METATYPE_EXPOSURE_BIAS = 99,
EXTRACTOR_MUSIC_CD_IDENTIFIER = 117, EXTRACTOR_METATYPE_FLASH = 100,
EXTRACTOR_PLAY_COUNTER = 118, EXTRACTOR_METATYPE_FLASH_BIAS = 101,
EXTRACTOR_POPULARITY_METER = 119, EXTRACTOR_METATYPE_FOCAL_LENGTH = 102,
EXTRACTOR_CONTENT_TYPE = 120, EXTRACTOR_METATYPE_FOCAL_LENGTH_35MM = 103,
EXTRACTOR_ENCODED_BY = 121, EXTRACTOR_METATYPE_ISO_SPEED = 104,
EXTRACTOR_TIME = 122, EXTRACTOR_METATYPE_EXPOSURE_MODE = 105,
EXTRACTOR_MUSICIAN_CREDITS_LIST = 123, EXTRACTOR_METATYPE_METERING_MODE = 106,
EXTRACTOR_MOOD = 124, EXTRACTOR_METATYPE_MACRO_MODE = 107,
EXTRACTOR_FORMAT_VERSION = 125, EXTRACTOR_METATYPE_IMAGE_QUALITY = 108,
EXTRACTOR_TELEVISION_SYSTEM = 126, EXTRACTOR_METATYPE_WHITE_BALANCE = 109,
EXTRACTOR_SONG_COUNT = 127, EXTRACTOR_METATYPE_ORIENTATION = 110,
EXTRACTOR_STARTING_SONG = 128, EXTRACTOR_METATYPE_MAGNIFICATION = 111,
EXTRACTOR_HARDWARE_DEPENDENCY = 129,
EXTRACTOR_RIPPER = 130, /* image specifics */
EXTRACTOR_FILE_SIZE = 131, EXTRACTOR_METATYPE_IMAGE_DIMENSIONS = 112,
EXTRACTOR_TRACK_NUMBER = 132, EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE = 113,
EXTRACTOR_ISRC = 133, EXTRACTOR_METATYPE_THUMBNAIL = 114,
EXTRACTOR_DISC_NUMBER = 134, EXTRACTOR_METATYPE_IMAGE_RESOLUTION = 115,
EXTRACTOR_GNUNET_DISPLAY_TYPE = 135, EXTRACTOR_METATYPE_SOURCE = 116,
EXTRACTOR_GNUNET_ECBC_URI = 136,
} EXTRACTOR_KeywordType; /* (text) document processing specifics */
EXTRACTOR_METATYPE_CHARACTER_SET = 117,
/** EXTRACTOR_METATYPE_LINE_COUNT = 118,
* Test if a given LE type contains binary data. EXTRACTOR_METATYPE_PARAGRAPH_COUNT = 119,
*/ EXTRACTOR_METATYPE_WORD_COUNT = 120,
#define EXTRACTOR_isBinaryType(type) (type == EXTRACTOR_THUMBNAIL_DATA) EXTRACTOR_METATYPE_CHARACTER_COUNT = 121,
EXTRACTOR_METATYPE_PAGE_ORIENTATION = 122,
/** EXTRACTOR_METATYPE_PAPER_SIZE = 123,
* A linked list of keywords. This structure is passed around EXTRACTOR_METATYPE_TEMPLATE = 124,
* in libExtractor and is typically the result of any keyword EXTRACTOR_METATYPE_COMPANY = 125,
* extraction operation. EXTRACTOR_METATYPE_MANAGER = 126,
* <p> EXTRACTOR_METATYPE_REVISION_NUMBER = 127,
* Each entry in the keyword list consists of a string (the
* keyword) and the keyword type (of type KeywordType) /* music / video specifics */
* describing how/from where the keyword was obtained. EXTRACTOR_METATYPE_DURATION = 128,
*/ EXTRACTOR_METATYPE_ALBUM = 129,
typedef struct EXTRACTOR_Keywords { EXTRACTOR_METATYPE_ARTIST = 130,
/* the keyword that was found */ EXTRACTOR_METATYPE_GENRE = 131,
char * keyword; EXTRACTOR_METATYPE_TRACK_NUMBER = 132,
/* the type of the keyword (classification) */ EXTRACTOR_METATYPE_DISC_NUMBER = 133,
EXTRACTOR_KeywordType keywordType; EXTRACTOR_METATYPE_PERFORMER = 134,
/* the next entry in the list */ EXTRACTOR_METATYPE_CONTACT_INFORMATION = 135,
struct EXTRACTOR_Keywords * next; EXTRACTOR_METATYPE_SONG_VERSION = 136,
} EXTRACTOR_KeywordList; EXTRACTOR_METATYPE_PICTURE = 137,
EXTRACTOR_METATYPE_COVER_PICTURE = 138,
EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE = 139,
EXTRACTOR_METATYPE_EVENT_PICTURE = 140,
EXTRACTOR_METATYPE_LOGO = 141,
EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM = 142,
EXTRACTOR_METATYPE_SOURCE_DEVICE = 143,
EXTRACTOR_METATYPE_DISCLAIMER = 144,
EXTRACTOR_METATYPE_WARNING = 145,
EXTRACTOR_METATYPE_PAGE_ORDER = 146,
EXTRACTOR_METATYPE_WRITER = 147,
EXTRACTOR_METATYPE_PRODUCT_VERSION = 148,
EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149,
EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150,
EXTRACTOR_METATYPE_NETWORK_NAME = 151,
EXTRACTOR_METATYPE_SHOW_NAME = 152,
EXTRACTOR_METATYPE_CHAPTER_NAME = 153,
EXTRACTOR_METATYPE_SONG_COUNT = 154,
EXTRACTOR_METATYPE_STARTING_SONG = 155,
EXTRACTOR_METATYPE_PLAY_COUNTER = 156,
EXTRACTOR_METATYPE_CONDUCTOR = 157,
EXTRACTOR_METATYPE_INTERPRETATION = 158,
EXTRACTOR_METATYPE_COMPOSER = 159,
EXTRACTOR_METATYPE_BEATS_PER_MINUTE = 160,
EXTRACTOR_METATYPE_ENCODED_BY = 161,
EXTRACTOR_METATYPE_ORIGINAL_TITLE = 162,
EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163,
EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164,
EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165,
EXTRACTOR_METATYPE_ORIGINAL_PERFORMER = 166,
EXTRACTOR_METATYPE_LYRICS = 167,
EXTRACTOR_METATYPE_POPULARITY_METER = 168,
EXTRACTOR_METATYPE_LICENSEE = 169,
EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 170,
EXTRACTOR_METATYPE_MOOD = 171,
EXTRACTOR_METATYPE_SUBTITLE = 172,
/* GNUnet specific values (never extracted) */
EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 173,
EXTRACTOR_METATYPE_GNUNET_FULL_DATA = 174,
EXTRACTOR_METATYPE_RATING = 175,
EXTRACTOR_METATYPE_ORGANIZATION = 176,
EXTRACTOR_METATYPE_RIPPER = 177,
EXTRACTOR_METATYPE_PRODUCER = 178,
EXTRACTOR_METATYPE_GROUP = 179,
EXTRACTOR_METATYPE_LAST = 180
};
/** /**
* Signature of the extract method that each plugin * Get the textual name of the keyword.
* must provide.
* *
* @param filename MAYBE NULL (!) * @param type meta type to get a UTF-8 string for
* @param data must not be modified (!) * @return NULL if the type is not known, otherwise
* an English (locale: C) string describing the type;
* translate using 'dgettext ("libextractor", rval)'
*/ */
typedef EXTRACTOR_KeywordList * const char *
(*ExtractMethod)(const char * filename, EXTRACTOR_metatype_to_string(enum EXTRACTOR_MetaType type);
char * data,
size_t filesize,
EXTRACTOR_KeywordList * next,
const char * options);
/**
* Linked list of extractor helper-libraries. An application
* builds this list by telling libextractor to load various
* keyword-extraction libraries. Libraries can also be unloaded
* (removed from this list, see removeLibrary).
* <p>
* Client code should never be concerned with the internals of
* this struct.
*/
typedef struct EXTRACTOR_Extractor {
void * libraryHandle;
char * libname;
ExtractMethod extractMethod;
struct EXTRACTOR_Extractor * next;
char * options;
} EXTRACTOR_ExtractorList;
/** /**
* Load the default set of libraries. * Get a long description for the meta type.
* @return the default set of libraries. *
* @param type meta type to get a UTF-8 description for
* @return NULL if the type is not known, otherwise
* an English (locale: C) string describing the type;
* translate using 'dgettext ("libextractor", rval)'
*/ */
EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries(void); const char *
EXTRACTOR_metatype_to_description(enum EXTRACTOR_MetaType type);
/** /**
* Get the textual name of the keyword. * Return the highest type number, exclusive as in [0,max).
* @return NULL if the type is not known *
* @return highest legal metatype number for this version of libextractor
*/ */
const char * enum EXTRACTOR_MetaType
EXTRACTOR_getKeywordTypeAsString(EXTRACTOR_KeywordType type); EXTRACTOR_metatype_get_max (void);
/** /**
* Return the highest type number, exclusive as in [0,highest). * Type of a function that libextractor calls for each
*/ * meta data item found.
EXTRACTOR_KeywordType *
EXTRACTOR_getHighestKeywordTypeNumber(void); * @param cls closure (user-defined)
* @param plugin_name name of the plugin that produced this value;
* special values can be used (i.e. '<zlib>' for zlib being
* used in the main libextractor library and yielding
* meta data).
* @param type libextractor-type describing the meta data
* @param format basic format information about data
* @param data_mime_type mime-type of data (not of the original file);
* can be NULL (if mime-type is not known)
* @param data actual meta-data found
* @param data_len number of bytes in data
* @return 0 to continue extracting, 1 to abort
*/
typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls,
const char *plugin_name,
enum EXTRACTOR_MetaType type,
enum EXTRACTOR_MetaFormat format,
const char *data_mime_type,
const char *data,
size_t data_len);
/** /**
* Load multiple libraries as specified by the user. * Signature of the extract method that each plugin
* @param config a string given by the user that defines which * must provide.
* libraries should be loaded. Has the format *
* "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*". * @param data data to process
* For example, * @param datasize number of bytes available in data
* libextractor_mp3.so:libextractor_ogg.so loads the * @param proc function to call for meta data found
* mp3 and the ogg library. The '-' before the LIBRARYNAME * @param proc_cls cls argument to proc
* indicates that the library should be added to the end * @param options options for this plugin; can be NULL
* of the library list (addLibraryLast). * @return 0 if all calls to proc returned 0, otherwise 1
* @param prev the previous list of libraries, may be NULL */
* @return the new list of libraries, equal to prev iff an error occured typedef int (*EXTRACTOR_ExtractMethod)(const char *data,
* or if config was empty (or NULL). size_t datasize,
EXTRACTOR_MetaDataProcessor proc,
void *proc_cls,
const char *options);
/**
* Linked list of extractor plugins. An application builds this list
* by telling libextractor to load various keyword-extraction
* plugins. Libraries can also be unloaded (removed from this list,
* see EXTRACTOR_plugin_remove).
*/
struct EXTRACTOR_PluginList;
/**
* Load the default set of plugins. The default can be changed
* by setting the LIBEXTRACTOR_LIBRARIES environment variable;
* If it is set to "env", then this function will return
* EXTRACTOR_plugin_add_config (NULL, env, flags).
*
* If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt
* to locate the installed plugins and load all of them.
* The directory where the code will search for plugins is typically
* automatically determined; it can be specified explicitly using the
* "LIBEXTRACTOR_PREFIX" environment variable.
*
* This environment variable must be set to the precise directory with
* the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that
* setting the environment variable will disable all of the methods
* that are typically used to determine the location of plugins.
* Multiple paths can be specified using ':' to separate them.
*
* @param flags options for all of the plugins loaded
* @return the default set of plugins, NULL if no plugins were found
*/ */
EXTRACTOR_ExtractorList * struct EXTRACTOR_PluginList *
EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev, EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags);
const char * config);
/** /**
* Add a library for keyword extraction. * Add a library for keyword extraction.
*
* @param prev the previous list of libraries, may be NULL * @param prev the previous list of libraries, may be NULL
* @param library the name of the library * @param library the name of the library (short handle, i.e. "mime")
* @param options options to give to the library
* @param flags options to use
* @return the new list of libraries, equal to prev iff an error occured * @return the new list of libraries, equal to prev iff an error occured
*/ */
EXTRACTOR_ExtractorList * struct EXTRACTOR_PluginList *
EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev, EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev,
const char * library); const char * library,
const char *options,
/** enum EXTRACTOR_Options flags);
* Add a library for keyword extraction at the END of the list.
* @param prev the previous list of libraries, may be NULL
* @param library the name of the library
* @return the new list of libraries, always equal to prev
* except if prev was NULL and no error occurs
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev,
const char * library);
/**
* Remove a library for keyword extraction.
* @param prev the current list of libraries
* @param library the name of the library to remove
* @return the reduced list, unchanged if the library was not loaded
*/
EXTRACTOR_ExtractorList *
EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev,
const char * library);
/**
* Remove all extractors.
* @param libraries the list of extractors
*/
void EXTRACTOR_removeAll(EXTRACTOR_ExtractorList * libraries);
/** /**
* Extract keywords from a file using the available extractors. * Load multiple libraries as specified by the user.
* @param extractor the list of extractor libraries
* @param filename the name of the file
* @return the list of keywords found in the file, NULL if none
* were found (or other errors)
*/
EXTRACTOR_KeywordList *
EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor,
const char * filename);
/**
* Extract keywords from a buffer in memory
* using the available extractors.
* *
* @param extractor the list of extractor libraries * @param config a string given by the user that defines which
* @param data the data of the file * libraries should be loaded. Has the format
* @param size the number of bytes in data * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
* @return the list of keywords found in the file, NULL if none * For example, 'mp3:ogg' loads the
* were found (or other errors) * mp3 and the ogg plugins. The '-' before the LIBRARYNAME
*/ * indicates that the library should be removed from
EXTRACTOR_KeywordList * * the library list.
EXTRACTOR_getKeywords2(EXTRACTOR_ExtractorList * extractor, * @param prev the previous list of libraries, may be NULL
const void * data, * @param flags options to use
size_t size); * @return the new list of libraries, equal to prev iff an error occured
* or if config was empty (or NULL).
/**
* Remove duplicate keywords from the list.
* @param list the original keyword list (destroyed in the process!)
* @param options a set of options (DUPLICATES_XXXX)
* @return a list of keywords without duplicates
*/
EXTRACTOR_KeywordList *
EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list,
unsigned int options);
/**
* Remove empty (all-whitespace) keywords from the list.
* @param list the original keyword list (destroyed in the process!)
* @return a list of keywords without duplicates
*/
EXTRACTOR_KeywordList *
EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list);
/**
* Remove keywords of a particular type from the list.
* @param list the original keyword list (altered in the process!)
* @param type the type to remove
* @return a list of keywords without entries of given type
*/
EXTRACTOR_KeywordList *
EXTRACTOR_removeKeywordsOfType(EXTRACTOR_KeywordList * list,
EXTRACTOR_KeywordType type);
/**
* Print a keyword list to a file.
* For debugging.
* @param handle the file to write to (stdout, stderr), must NOT be NULL
* @param keywords the list of keywords to print, may be NULL
*/
void EXTRACTOR_printKeywords(FILE * handle,
EXTRACTOR_KeywordList * keywords);
/**
* Free the memory occupied by the keyword list (and the
* keyword strings in it!)
* @param keywords the list to free
*/
void EXTRACTOR_freeKeywords(EXTRACTOR_KeywordList * keywords);
/**
* Extract the last keyword that of the given type from the keyword list.
* @param type the type of the keyword
* @param keywords the keyword list
* @return the last matching keyword, or NULL if none matches;
* the string returned is aliased in the keywords list and must
* not be freed or manipulated by the client. It will become
* invalid once the keyword list is freed.
*/
const char * EXTRACTOR_extractLast(EXTRACTOR_KeywordType type,
EXTRACTOR_KeywordList * keywords);
/**
* Extract the last keyword of the given string from the keyword list.
* @param type the string describing the type of the keyword
* @param keywords the keyword list
* @return the last matching keyword, or NULL if none matches;
* the string returned is aliased in the keywords list and must
* not be freed or manipulated by the client. It will become
* invalid once the keyword list is freed.
*/ */
const char * EXTRACTOR_extractLastByString(const char * type, struct EXTRACTOR_PluginList *
EXTRACTOR_KeywordList * keywords) EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev,
; const char *config,
enum EXTRACTOR_Options flags);
/** /**
* Count the number of keywords in the keyword list. * Remove a plugin from a list.
* @param keywords the keyword list *
* @return the number of keywords in the list * @param prev the current list of plugins
* @param library the name of the plugin to remove (short handle)
* @return the reduced list, unchanged if the plugin was not loaded
*/ */
unsigned int EXTRACTOR_countKeywords(EXTRACTOR_KeywordList * keywords); struct EXTRACTOR_PluginList *
EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
const char * library);
/** /**
* This function can be used to decode the binary data * Remove all plugins from the given list (destroys the list).
* encoded in the libextractor metadata (i.e. for
* the thumbnails).
* *
* @param in 0-terminated string from the meta-data * @param plugin the list of plugins
* @return 1 on error, 0 on success
*/ */
int EXTRACTOR_binaryDecode(const char * in, void
unsigned char ** out, EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins);
size_t * outSize);
/** /**
* Encode the given binary data object * Extract keywords from a file using the given set of plugins.
* as a 0-terminated C-string according
* to the LE binary data encoding standard.
* *
* @return NULL on error, the 0-terminated * @param plugins the list of plugins to use
* encoding otherwise * @param filename the name of the file, can be NULL if data is not NULL
*/ * @param data data of the file in memory, can be NULL (in which
char * EXTRACTOR_binaryEncode(const unsigned char * data, * case libextractor will open file) if filename is not NULL
size_t size); * @param size number of bytes in data, ignored if data is NULL
* @param proc function to call for each meta data item found
* @param proc_cls cls argument to proc
*/
void
EXTRACTOR_extract(struct EXTRACTOR_PluginList *plugins,
const char *filename,
const void *data,
size_t size,
EXTRACTOR_MetaDataProcessor proc,
void *proc_cls);
/**
* Simple EXTRACTOR_MetaDataProcessor implementation that simply
* prints the extracted meta data to the given file. Only prints
* those keywords that are in UTF-8 format.
*
* @param handle the file to write to (stdout, stderr), must NOT be NULL,
* must be of type "FILE *".
* @param plugin_name name of the plugin that produced this value
* @param type libextractor-type describing the meta data
* @param format basic format information about data
* @param data_mime_type mime-type of data (not of the original file);
* can be NULL (if mime-type is not known)
* @param data actual meta-data found
* @param data_len number of bytes in data
* @return non-zero if printing failed, otherwise 0.
*/
int
EXTRACTOR_meta_data_print(void * handle,
const char *plugin_name,
enum EXTRACTOR_MetaType type,
enum EXTRACTOR_MetaFormat format,
const char *data_mime_type,
const char *data,
size_t data_len);
#if 0 /* keep Emacsens' auto-indent happy */ #if 0 /* keep Emacsens' auto-indent happy */
{ {
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif #endif
 End of changes. 29 change blocks. 
370 lines changed or deleted 444 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/