| database.h | | database.h | |
| /** @file database.h | | /** @file database.h | |
| * @brief API for working with Xapian databases | | * @brief API for working with Xapian databases | |
| */ | | */ | |
| /* Copyright 1999,2000,2001 BrightStation PLC | | /* Copyright 1999,2000,2001 BrightStation PLC | |
| * Copyright 2002 Ananova Ltd | | * Copyright 2002 Ananova Ltd | |
|
| * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013 Olly Be
tts | | * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014 Ol
ly Betts | |
| * Copyright 2006,2008 Lemur Consulting Ltd | | * Copyright 2006,2008 Lemur Consulting Ltd | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 28 | | skipping to change at line 28 | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_DATABASE_H | | #ifndef XAPIAN_INCLUDED_DATABASE_H | |
| #define XAPIAN_INCLUDED_DATABASE_H | | #define XAPIAN_INCLUDED_DATABASE_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/database.h> directly; include <xapian.h> instead
." | | # error "Never use <xapian/database.h> directly; include <xapian.h> instead
." | |
| #endif | | #endif | |
| | | | |
| #include <iosfwd> | | #include <iosfwd> | |
| #include <string> | | #include <string> | |
| #include <vector> | | #include <vector> | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/intrusive_ptr.h> | | #include <xapian/intrusive_ptr.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| | | | |
| skipping to change at line 69 | | skipping to change at line 69 | |
| * @exception DatabaseVersionError may be thrown if the database is in an | | * @exception DatabaseVersionError may be thrown if the database is in an | |
| * unsupported format (for example, created by a newer version of Xapian | | * unsupported format (for example, created by a newer version of Xapian | |
| * which uses an incompatible format). | | * which uses an incompatible format). | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT Database { | | class XAPIAN_VISIBILITY_DEFAULT Database { | |
| public: | | public: | |
| class Internal; | | class Internal; | |
| /// @private @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| std::vector<Xapian::Internal::intrusive_ptr<Internal> > internal; | | std::vector<Xapian::Internal::intrusive_ptr<Internal> > internal; | |
| | | | |
|
| /** @private @internal Get a document from the database, but doesn't | | | |
| * need to check if it exists. | | | |
| * | | | |
| * This method returns a Xapian::Document object which provides the | | | |
| * information about a document. If the document doesn't exist, | | | |
| * either a NULL pointer may be returned, or the returned object wi | | | |
| ll | | | |
| * throw DocNotFoundError when you try to access it. | | | |
| * | | | |
| * The caller should delete the returned object when it has finishe | | | |
| d | | | |
| * with it. | | | |
| * | | | |
| * The returned value is cast to void* to avoid needing to include | | | |
| * xapian/document.h from here. | | | |
| * | | | |
| * @param did The document id of the document to retrieve. | | | |
| * | | | |
| * @return Pointer to Document::Internal object cast to void*. | | | |
| */ | | | |
| void * get_document_lazily_(Xapian::docid did) const; | | | |
| | | | |
| /** Add an existing database (or group of databases) to those | | /** Add an existing database (or group of databases) to those | |
| * accessed by this object. | | * accessed by this object. | |
| * | | * | |
| * @param database the database(s) to add. | | * @param database the database(s) to add. | |
| */ | | */ | |
| void add_database(const Database & database); | | void add_database(const Database & database); | |
| | | | |
| /** Create a Database with no databases in. | | /** Create a Database with no databases in. | |
| */ | | */ | |
| Database(); | | Database(); | |
| | | | |
| /** Open a Database, automatically determining the database | | /** Open a Database, automatically determining the database | |
| * backend to use. | | * backend to use. | |
| * | | * | |
| * @param path directory that the database is stored in. | | * @param path directory that the database is stored in. | |
| */ | | */ | |
|
| explicit Database(const std::string &path); | | explicit Database(const std::string &path, int flags = 0); | |
| | | | |
| /** @private @internal Create a Database from its internals. | | /** @private @internal Create a Database from its internals. | |
| */ | | */ | |
| explicit Database(Internal *internal); | | explicit Database(Internal *internal); | |
| | | | |
| /** Destroy this handle on the database. | | /** Destroy this handle on the database. | |
| * | | * | |
| * If there are no copies of this object remaining, the database(s) | | * If there are no copies of this object remaining, the database(s) | |
| * will be closed. | | * will be closed. | |
| */ | | */ | |
| | | | |
| skipping to change at line 354 | | skipping to change at line 334 | |
| ValueIterator valuestream_begin(Xapian::valueno slot) const; | | ValueIterator valuestream_begin(Xapian::valueno slot) const; | |
| | | | |
| /// Return end iterator corresponding to valuestream_begin(). | | /// Return end iterator corresponding to valuestream_begin(). | |
| ValueIterator XAPIAN_NOTHROW(valuestream_end(Xapian::valueno) const)
{ | | ValueIterator XAPIAN_NOTHROW(valuestream_end(Xapian::valueno) const)
{ | |
| return ValueIterator(); | | return ValueIterator(); | |
| } | | } | |
| | | | |
| /// Get the length of a document. | | /// Get the length of a document. | |
| Xapian::termcount get_doclength(Xapian::docid did) const; | | Xapian::termcount get_doclength(Xapian::docid did) const; | |
| | | | |
|
| | | /// Get the number of unique terms in document. | |
| | | Xapian::termcount get_unique_terms(Xapian::docid did) const; | |
| | | | |
| /** Send a "keep-alive" to remote databases to stop them timing out. | | /** Send a "keep-alive" to remote databases to stop them timing out. | |
| * | | * | |
| * Has no effect on non-remote databases. | | * Has no effect on non-remote databases. | |
| */ | | */ | |
| void keep_alive(); | | void keep_alive(); | |
| | | | |
| /** Get a document from the database, given its document id. | | /** Get a document from the database, given its document id. | |
| * | | * | |
| * This method returns a Xapian::Document object which provides the | | * This method returns a Xapian::Document object which provides the | |
| * information about a document. | | * information about a document. | |
| | | | |
| skipping to change at line 500 | | skipping to change at line 483 | |
| std::string get_uuid() const; | | std::string get_uuid() const; | |
| | | | |
| /** Check the integrity of a database or database table. | | /** Check the integrity of a database or database table. | |
| * | | * | |
| * This method is currently experimental, and may change incompatib
ly | | * This method is currently experimental, and may change incompatib
ly | |
| * or possibly even be removed. Feedback on how well it works and | | * or possibly even be removed. Feedback on how well it works and | |
| * how it might be improved are welcome. | | * how it might be improved are welcome. | |
| * | | * | |
| * @param path Path to database or table | | * @param path Path to database or table | |
| * @param opts Options to use for check | | * @param opts Options to use for check | |
|
| * @param out std::ostream to write output to | | * @param out std::ostream to write output to (NULL for no output) | |
| */ | | | |
| #ifndef check | | | |
| static size_t check(const std::string & path, int opts, | | | |
| std::ostream &out); | | | |
| #else | | | |
| // The AssertMacros.h header in the OS X SDK currently defines a che | | | |
| ck | | | |
| // macro. Apple have deprecated check() in favour of __Check() and | | | |
| // plan to remove check() in a "future release", but for now prevent | | | |
| // expansion of check by adding parentheses in the method prototype: | | | |
| // http://www.opensource.apple.com/source/CarbonHeaders/CarbonHeader | | | |
| s-18.1/AssertMacros.h | | | |
| // | | | |
| // We do this conditionally, as these parentheses trip up SWIG's | | | |
| // parser: | | | |
| // https://github.com/swig/swig/issues/45 | | | |
| static size_t (check)(const std::string & path, int opts, | | | |
| std::ostream &out); | | | |
| #endif | | | |
| | | | |
| /** Check the integrity of a database or database table. | | | |
| * | | | |
| * This method is currently experimental, and may change incompatib | | | |
| ly | | | |
| * or possibly even be removed. Feedback on how well it works and | | | |
| * how it might be improved are welcome. | | | |
| * | | | |
| * @param path Path to database or table | | | |
| * @param opts Options to use for check | | | |
| */ | | */ | |
|
| static size_t check(const std::string & path, int opts); | | static size_t check(const std::string & path, int opts = 0, | |
| | | std::ostream *out = NULL); | |
| }; | | }; | |
| | | | |
| /** This class provides read/write access to a database. | | /** This class provides read/write access to a database. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT WritableDatabase : public Database { | | class XAPIAN_VISIBILITY_DEFAULT WritableDatabase : public Database { | |
| public: | | public: | |
| /** Destroy this handle on the database. | | /** Destroy this handle on the database. | |
| * | | * | |
| * If no other handles to this database remain, the database will b
e | | * If no other handles to this database remain, the database will b
e | |
| * closed. | | * closed. | |
| * | | * | |
| * If a transaction is active cancel_transaction() will be implicit
ly | | * If a transaction is active cancel_transaction() will be implicit
ly | |
| * called; if no transaction is active commit() will be implicitly | | * called; if no transaction is active commit() will be implicitly | |
| * called, but any exception will be swallowed (because throwing | | * called, but any exception will be swallowed (because throwing | |
| * exceptions in C++ destructors is problematic). If you aren't us
ing | | * exceptions in C++ destructors is problematic). If you aren't us
ing | |
| * transactions and want to know about any failure to commit change
s, | | * transactions and want to know about any failure to commit change
s, | |
| * call commit() explicitly before the destructor gets called. | | * call commit() explicitly before the destructor gets called. | |
| */ | | */ | |
| virtual ~WritableDatabase(); | | virtual ~WritableDatabase(); | |
| | | | |
|
| /** Create an empty WritableDatabase. | | /** Create a WritableDatabase with no subdatabases. | |
| | | * | |
| | | * The created object isn't very useful in this state - it's intend | |
| | | ed | |
| | | * as a placeholder value. | |
| */ | | */ | |
| WritableDatabase(); | | WritableDatabase(); | |
| | | | |
| /** Open a database for update, automatically determining the databa
se | | /** Open a database for update, automatically determining the databa
se | |
| * backend to use. | | * backend to use. | |
| * | | * | |
| * If the database is to be created, Xapian will try | | * If the database is to be created, Xapian will try | |
| * to create the directory indicated by path if it doesn't already | | * to create the directory indicated by path if it doesn't already | |
| * exist (but only the leaf directory, not recursively). | | * exist (but only the leaf directory, not recursively). | |
| * | | * | |
| * @param path directory that the database is stored in. | | * @param path directory that the database is stored in. | |
|
| * @param action one of: | | * @param flags one of: | |
| * - Xapian::DB_CREATE_OR_OPEN open for read/write; create if no db | | * - Xapian::DB_CREATE_OR_OPEN open for read/write; create if no db | |
|
| * exists | | * exists (the default if flags isn't specified) | |
| * - Xapian::DB_CREATE create new database; fail if db exists | | * - Xapian::DB_CREATE create new database; fail if db exists | |
| * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing db; create i
f | | * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing db; create i
f | |
| * none exists | | * none exists | |
| * - Xapian::DB_OPEN open for read/write; fail if no db exists | | * - Xapian::DB_OPEN open for read/write; fail if no db exists | |
| * | | * | |
|
| | | * Additionally, the following flags can be combined with action | |
| | | * using bitwise-or (| in C++): | |
| | | * | |
| | | * - Xapian::DB_NO_SYNC don't call fsync() or similar | |
| | | * - Xapian::DB_DANGEROUS don't be crash-safe, no concurrent reade | |
| | | rs | |
| | | * | |
| | | * @param block_size If a new database is created, this specifies | |
| | | * the block size (in bytes) for backends which | |
| | | * have such a concept. For chert and glass, the | |
| | | * block size must be a power of 2 between 2048 a | |
| | | nd | |
| | | * 65536 (inclusive), and the default (also used | |
| | | if | |
| | | * an invalid value is passed) is 8192 bytes. | |
| | | * | |
| * @exception Xapian::DatabaseCorruptError will be thrown if the | | * @exception Xapian::DatabaseCorruptError will be thrown if the | |
| * database is in a corrupt state. | | * database is in a corrupt state. | |
| * | | * | |
| * @exception Xapian::DatabaseLockError will be thrown if a lock | | * @exception Xapian::DatabaseLockError will be thrown if a lock | |
| * couldn't be acquired on the database. | | * couldn't be acquired on the database. | |
| */ | | */ | |
|
| WritableDatabase(const std::string &path, int action); | | explicit WritableDatabase(const std::string &path, | |
| | | int flags = 0, | |
| | | int block_size = 0); | |
| | | | |
| /** @private @internal Create an WritableDatabase given its internal
s. | | /** @private @internal Create an WritableDatabase given its internal
s. | |
| */ | | */ | |
| explicit WritableDatabase(Database::Internal *internal); | | explicit WritableDatabase(Database::Internal *internal); | |
| | | | |
| /** Copying is allowed. The internals are reference counted, so | | /** Copying is allowed. The internals are reference counted, so | |
| * copying is cheap. | | * copying is cheap. | |
| * | | * | |
| * @param other The object to copy. | | * @param other The object to copy. | |
| */ | | */ | |
| | | | |
| skipping to change at line 981 | | skipping to change at line 957 | |
| * @exception Xapian::UnimplementedError will be thrown if the | | * @exception Xapian::UnimplementedError will be thrown if the | |
| * database backend in use doesn't support user-specifie
d | | * database backend in use doesn't support user-specifie
d | |
| * metadata. | | * metadata. | |
| */ | | */ | |
| void set_metadata(const std::string & key, const std::string & value
); | | void set_metadata(const std::string & key, const std::string & value
); | |
| | | | |
| /// Return a string describing this object. | | /// Return a string describing this object. | |
| std::string get_description() const; | | std::string get_description() const; | |
| }; | | }; | |
| | | | |
|
| /** Open for read/write; create if no db exists. */ | | | |
| const int DB_CREATE_OR_OPEN = 1; | | | |
| /** Create a new database; fail if db exists. */ | | | |
| const int DB_CREATE = 2; | | | |
| /** Overwrite existing db; create if none exists. */ | | | |
| const int DB_CREATE_OR_OVERWRITE = 3; | | | |
| /** Open for read/write; fail if no db exists. */ | | | |
| const int DB_OPEN = 4; | | | |
| | | | |
| /** Show a short-format display of the B-tree contents. | | | |
| * | | | |
| * For use with Xapian::Database::check(). | | | |
| */ | | | |
| const int DBCHECK_SHORT_TREE = 1; | | | |
| | | | |
| /** Show a full display of the B-tree contents. | | | |
| * | | | |
| * For use with Xapian::Database::check(). | | | |
| */ | | | |
| const int DBCHECK_FULL_TREE = 2; | | | |
| | | | |
| /** Show the bitmap for the B-tree. | | | |
| * | | | |
| * For use with Xapian::Database::check(). | | | |
| */ | | | |
| const int DBCHECK_SHOW_BITMAP = 4; | | | |
| | | | |
| /** Show statistics for the B-tree. | | | |
| * | | | |
| * For use with Xapian::Database::check(). | | | |
| */ | | | |
| const int DBCHECK_SHOW_STATS = 8; | | | |
| | | | |
| /** Fix problems. | | | |
| * | | | |
| * Currently this is supported for chert, and will: | | | |
| * | | | |
| * * regenerate the "iamchert" file if it isn't valid (so if it is lost, | | | |
| you | | | |
| * can just create it empty and then "fix problems"). | | | |
| * | | | |
| * * regenerate base files (currently the algorithm for finding the root | | | |
| * block may not work if there was a change partly written but not | | | |
| * committed). | | | |
| * | | | |
| * For use with Xapian::Database::check(). | | | |
| */ | | | |
| const int DBCHECK_FIX = 16; | | | |
| | | | |
| } | | } | |
| | | | |
| #endif /* XAPIAN_INCLUDED_DATABASE_H */ | | #endif /* XAPIAN_INCLUDED_DATABASE_H */ | |
| | | | |
End of changes. 13 change blocks. |
| 109 lines changed or deleted | | 35 lines changed or added | |
|
| dbfactory.h | | dbfactory.h | |
| /** @file dbfactory.h | | /** @file dbfactory.h | |
| * @brief Factory functions for constructing Database and WritableDatabase
objects | | * @brief Factory functions for constructing Database and WritableDatabase
objects | |
| */ | | */ | |
|
| /* Copyright (C) 2005,2006,2007,2008,2009,2011,2013 Olly Betts | | /* Copyright (C) 2005,2006,2007,2008,2009,2011,2013,2014 Olly Betts | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_DBFACTORY_H | | #ifndef XAPIAN_INCLUDED_DBFACTORY_H | |
| #define XAPIAN_INCLUDED_DBFACTORY_H | | #define XAPIAN_INCLUDED_DBFACTORY_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/dbfactory.h> directly; include <xapian.h> instea
d." | | # error "Never use <xapian/dbfactory.h> directly; include <xapian.h> instea
d." | |
| #endif | | #endif | |
| | | | |
| #ifndef _MSC_VER | | #ifndef _MSC_VER | |
| # include <sys/types.h> | | # include <sys/types.h> | |
| #endif | | #endif | |
| | | | |
| #include <string> | | #include <string> | |
| | | | |
|
| | | #include <xapian/constants.h> | |
| | | #include <xapian/database.h> | |
| | | #include <xapian/deprecated.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/version.h> | | #include <xapian/version.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
| #ifdef _MSC_VER | | #ifdef _MSC_VER | |
| typedef unsigned useconds_t; | | typedef unsigned useconds_t; | |
| #endif | | #endif | |
| | | | |
|
| class Database; | | | |
| class WritableDatabase; | | | |
| | | | |
| /// Database factory functions which determine the database type automatica
lly. | | /// Database factory functions which determine the database type automatica
lly. | |
| namespace Auto { | | namespace Auto { | |
| | | | |
| /** Construct a Database object for a stub database file. | | /** Construct a Database object for a stub database file. | |
| * | | * | |
| * The stub database file contains serialised parameters for one | | * The stub database file contains serialised parameters for one | |
| * or more databases. | | * or more databases. | |
| * | | * | |
| * @param file pathname of the stub database file. | | * @param file pathname of the stub database file. | |
| */ | | */ | |
|
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_DEPRECATED(Database open_stub(const std::string &file)); | |
| Database open_stub(const std::string &file); | | | |
| | | inline Database | |
| | | open_stub(const std::string &file) | |
| | | { | |
| | | return Database(file, DB_BACKEND_STUB); | |
| | | } | |
| | | | |
| /** Construct a WritableDatabase object for a stub database file. | | /** Construct a WritableDatabase object for a stub database file. | |
| * | | * | |
| * The stub database file must contain serialised parameters for exactly o
ne | | * The stub database file must contain serialised parameters for exactly o
ne | |
| * database. | | * database. | |
| * | | * | |
| * @param file pathname of the stub database file. | | * @param file pathname of the stub database file. | |
| * @param action determines handling of existing/non-existing databas
e: | | * @param action determines handling of existing/non-existing databas
e: | |
| * - Xapian::DB_CREATE fail if database already exi
st, | | * - Xapian::DB_CREATE fail if database already exi
st, | |
| * otherwise create new database. | | * otherwise create new database. | |
| * - Xapian::DB_CREATE_OR_OPEN open existing database, or c
reate new | | * - Xapian::DB_CREATE_OR_OPEN open existing database, or c
reate new | |
| * database if none exists. | | * database if none exists. | |
| * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing database, or crea
te | | * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing database, or crea
te | |
| * new database if none exists. | | * new database if none exists. | |
| * - Xapian::DB_OPEN open existing database, failing if n
one | | * - Xapian::DB_OPEN open existing database, failing if n
one | |
| * exists. | | * exists. | |
| */ | | */ | |
|
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_DEPRECATED(WritableDatabase open_stub(const std::string &file, int a | |
| WritableDatabase open_stub(const std::string &file, int action); | | ction)); | |
| | | | |
| | | inline WritableDatabase | |
| | | open_stub(const std::string &file, int action) | |
| | | { | |
| | | return WritableDatabase(file, action|DB_BACKEND_STUB); | |
| | | } | |
| | | | |
| } | | } | |
| | | | |
| #ifdef XAPIAN_HAS_INMEMORY_BACKEND | | #ifdef XAPIAN_HAS_INMEMORY_BACKEND | |
| /// Database factory functions for the inmemory backend. | | /// Database factory functions for the inmemory backend. | |
| namespace InMemory { | | namespace InMemory { | |
| | | | |
| /** Construct a WritableDatabase object for a new, empty InMemory database. | | /** Construct a WritableDatabase object for a new, empty InMemory database. | |
| * | | * | |
| * Only a writable InMemory database can be created, since a read-only one | | * Only a writable InMemory database can be created, since a read-only one | |
| * would always remain empty. | | * would always remain empty. | |
| */ | | */ | |
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_VISIBILITY_DEFAULT | |
| WritableDatabase open(); | | WritableDatabase open(); | |
| | | | |
| } | | } | |
| #endif | | #endif | |
| | | | |
|
| #ifdef XAPIAN_HAS_BRASS_BACKEND | | | |
| /// Database factory functions for the brass backend. | | | |
| namespace Brass { | | | |
| | | | |
| /** Construct a Database object for read-only access to a Brass database. | | | |
| * | | | |
| * @param dir pathname of the directory containing the database. | | | |
| */ | | | |
| XAPIAN_VISIBILITY_DEFAULT | | | |
| Database open(const std::string &dir); | | | |
| | | | |
| /** Construct a Database object for update access to a Brass database. | | | |
| * | | | |
| * @param dir pathname of the directory containing the database. | | | |
| * @param action determines handling of existing/non-existing databas | | | |
| e: | | | |
| * - Xapian::DB_CREATE fail if database already exi | | | |
| st, | | | |
| * otherwise create new database. | | | |
| * - Xapian::DB_CREATE_OR_OPEN open existing database, or c | | | |
| reate new | | | |
| * database if none exists. | | | |
| * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing database, or crea | | | |
| te | | | |
| * new database if none exists. | | | |
| * - Xapian::DB_OPEN open existing database, failing if n | | | |
| one | | | |
| * exists. | | | |
| * @param block_size the Btree blocksize to use (in bytes), which must be | | | |
| a | | | |
| * power of two between 2048 and 65536 (inclusive). Th | | | |
| e | | | |
| * default (also used if an invalid value if passed) is | | | |
| * 8192 bytes. This parameter is ignored when opening | | | |
| an | | | |
| * existing database. | | | |
| */ | | | |
| XAPIAN_VISIBILITY_DEFAULT | | | |
| WritableDatabase | | | |
| open(const std::string &dir, int action, int block_size = 8192); | | | |
| | | | |
| } | | | |
| #endif | | | |
| | | | |
| #ifdef XAPIAN_HAS_CHERT_BACKEND | | #ifdef XAPIAN_HAS_CHERT_BACKEND | |
| /// Database factory functions for the chert backend. | | /// Database factory functions for the chert backend. | |
| namespace Chert { | | namespace Chert { | |
| | | | |
| /** Construct a Database object for read-only access to a Chert database. | | /** Construct a Database object for read-only access to a Chert database. | |
| * | | * | |
| * @param dir pathname of the directory containing the database. | | * @param dir pathname of the directory containing the database. | |
| */ | | */ | |
|
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_DEPRECATED(Database open(const std::string &dir)); | |
| Database open(const std::string &dir); | | | |
| | | inline Database | |
| | | open(const std::string &dir) | |
| | | { | |
| | | return Database(dir, DB_BACKEND_CHERT); | |
| | | } | |
| | | | |
| /** Construct a Database object for update access to a Chert database. | | /** Construct a Database object for update access to a Chert database. | |
| * | | * | |
| * @param dir pathname of the directory containing the database. | | * @param dir pathname of the directory containing the database. | |
| * @param action determines handling of existing/non-existing databas
e: | | * @param action determines handling of existing/non-existing databas
e: | |
| * - Xapian::DB_CREATE fail if database already exi
st, | | * - Xapian::DB_CREATE fail if database already exi
st, | |
| * otherwise create new database. | | * otherwise create new database. | |
| * - Xapian::DB_CREATE_OR_OPEN open existing database, or c
reate new | | * - Xapian::DB_CREATE_OR_OPEN open existing database, or c
reate new | |
| * database if none exists. | | * database if none exists. | |
| * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing database, or crea
te | | * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing database, or crea
te | |
| * new database if none exists. | | * new database if none exists. | |
| * - Xapian::DB_OPEN open existing database, failing if n
one | | * - Xapian::DB_OPEN open existing database, failing if n
one | |
| * exists. | | * exists. | |
| * @param block_size the Btree blocksize to use (in bytes), which must be
a | | * @param block_size the Btree blocksize to use (in bytes), which must be
a | |
| * power of two between 2048 and 65536 (inclusive). Th
e | | * power of two between 2048 and 65536 (inclusive). Th
e | |
| * default (also used if an invalid value if passed) is | | * default (also used if an invalid value if passed) is | |
| * 8192 bytes. This parameter is ignored when opening
an | | * 8192 bytes. This parameter is ignored when opening
an | |
| * existing database. | | * existing database. | |
| */ | | */ | |
|
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_DEPRECATED(WritableDatabase open(const std::string &dir, int action, | |
| WritableDatabase | | int block_size = 0)); | |
| open(const std::string &dir, int action, int block_size = 8192); | | | |
| | | inline WritableDatabase | |
| | | open(const std::string &dir, int action, int block_size) | |
| | | { | |
| | | return WritableDatabase(dir, action|DB_BACKEND_CHERT, block_size); | |
| | | } | |
| | | | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| #ifdef XAPIAN_HAS_REMOTE_BACKEND | | #ifdef XAPIAN_HAS_REMOTE_BACKEND | |
| /// Database factory functions for the remote backend. | | /// Database factory functions for the remote backend. | |
| namespace Remote { | | namespace Remote { | |
| | | | |
| /** Construct a Database object for read-only access to a remote database | | /** Construct a Database object for read-only access to a remote database | |
| * accessed via a TCP connection. | | * accessed via a TCP connection. | |
| | | | |
End of changes. 9 change blocks. |
| 58 lines changed or deleted | | 35 lines changed or added | |
|
| enquire.h | | enquire.h | |
| /** @file enquire.h | | /** @file enquire.h | |
| * @brief API for running queries | | * @brief API for running queries | |
| */ | | */ | |
| /* Copyright 1999,2000,2001 BrightStation PLC | | /* Copyright 1999,2000,2001 BrightStation PLC | |
| * Copyright 2001,2002 Ananova Ltd | | * Copyright 2001,2002 Ananova Ltd | |
|
| * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013 Olly Be
tts | | * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014 Ol
ly Betts | |
| * Copyright 2009 Lemur Consulting Ltd | | * Copyright 2009 Lemur Consulting Ltd | |
| * Copyright 2011 Action Without Borders | | * Copyright 2011 Action Without Borders | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_ENQUIRE_H | | #ifndef XAPIAN_INCLUDED_ENQUIRE_H | |
| #define XAPIAN_INCLUDED_ENQUIRE_H | | #define XAPIAN_INCLUDED_ENQUIRE_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/enquire.h> directly; include <xapian.h> instead.
" | | # error "Never use <xapian/enquire.h> directly; include <xapian.h> instead.
" | |
| #endif | | #endif | |
| | | | |
|
| | | #include "xapian/deprecated.h" | |
| #include <string> | | #include <string> | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/intrusive_ptr.h> | | #include <xapian/intrusive_ptr.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/termiterator.h> | | #include <xapian/termiterator.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
| | | | |
| skipping to change at line 59 | | skipping to change at line 60 | |
| class MSetIterator; | | class MSetIterator; | |
| class Query; | | class Query; | |
| class Weight; | | class Weight; | |
| | | | |
| /** A match set (MSet). | | /** A match set (MSet). | |
| * This class represents (a portion of) the results of a query. | | * This class represents (a portion of) the results of a query. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT MSet { | | class XAPIAN_VISIBILITY_DEFAULT MSet { | |
| public: | | public: | |
| class Internal; | | class Internal; | |
|
| /// @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::intrusive_ptr<Internal> internal; | | Xapian::Internal::intrusive_ptr<Internal> internal; | |
| | | | |
|
| /// @internal Constructor for internal use. | | /// @private @internal Constructor for internal use. | |
| explicit MSet(Internal * internal_); | | explicit MSet(Internal * internal_); | |
| | | | |
| /// Create an empty Xapian::MSet. | | /// Create an empty Xapian::MSet. | |
| MSet(); | | MSet(); | |
| | | | |
| /// Destroy a Xapian::MSet. | | /// Destroy a Xapian::MSet. | |
| ~MSet(); | | ~MSet(); | |
| | | | |
| /// Copying is allowed (and is cheap). | | /// Copying is allowed (and is cheap). | |
| MSet(const MSet & other); | | MSet(const MSet & other); | |
| | | | |
| skipping to change at line 273 | | skipping to change at line 274 | |
| }; | | }; | |
| | | | |
| /** An iterator pointing to items in an MSet. | | /** An iterator pointing to items in an MSet. | |
| * This is used for access to individual results of a match. | | * This is used for access to individual results of a match. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT MSetIterator { | | class XAPIAN_VISIBILITY_DEFAULT MSetIterator { | |
| private: | | private: | |
| friend class MSet; | | friend class MSet; | |
| friend bool operator==(const MSetIterator &a, const MSetIterator &b)
; | | friend bool operator==(const MSetIterator &a, const MSetIterator &b)
; | |
| friend bool operator!=(const MSetIterator &a, const MSetIterator &b)
; | | friend bool operator!=(const MSetIterator &a, const MSetIterator &b)
; | |
|
| | | friend void iterator_rewind(MSetIterator & it); | |
| | | friend bool iterator_valid(const MSetIterator & it); | |
| | | | |
| MSetIterator(Xapian::doccount index_, const MSet & mset_) | | MSetIterator(Xapian::doccount index_, const MSet & mset_) | |
| : index(index_), mset(mset_) { } | | : index(index_), mset(mset_) { } | |
| | | | |
| Xapian::doccount index; | | Xapian::doccount index; | |
| MSet mset; | | MSet mset; | |
| | | | |
| public: | | public: | |
| /** Create an uninitialised iterator; this cannot be used, but is | | /** Create an uninitialised iterator; this cannot be used, but is | |
| * convenient syntactically. | | * convenient syntactically. | |
| | | | |
| skipping to change at line 403 | | skipping to change at line 406 | |
| * However, currently it may get a lower percentage score if you | | * However, currently it may get a lower percentage score if you | |
| * use a MatchDecider and the sorting is primarily by value. | | * use a MatchDecider and the sorting is primarily by value. | |
| * In this case, the percentage for a particular document may vary | | * In this case, the percentage for a particular document may vary | |
| * depending on the first, max_size, and checkatleast parameters | | * depending on the first, max_size, and checkatleast parameters | |
| * passed to Enquire::get_mset() (this bug is hard to fix without | | * passed to Enquire::get_mset() (this bug is hard to fix without | |
| * having to apply the MatchDecider to potentially many more | | * having to apply the MatchDecider to potentially many more | |
| * documents, which is potentially costly). | | * documents, which is potentially costly). | |
| */ | | */ | |
| int get_percent() const; | | int get_percent() const; | |
| | | | |
|
| /// @private @internal Determine if the iterator has been exhausted. | | | |
| bool at_end() const { return index == mset.size(); } | | | |
| | | | |
| /// Return a string describing this object. | | /// Return a string describing this object. | |
| std::string get_description() const; | | std::string get_description() const; | |
| | | | |
| /// Allow use as an STL iterator | | /// Allow use as an STL iterator | |
| //@{ | | //@{ | |
| typedef std::bidirectional_iterator_tag iterator_category; // FIXME:
could enhance to be a randomaccess_iterator | | typedef std::bidirectional_iterator_tag iterator_category; // FIXME:
could enhance to be a randomaccess_iterator | |
| typedef Xapian::docid value_type; | | typedef Xapian::docid value_type; | |
| typedef Xapian::doccount_diff difference_type; | | typedef Xapian::doccount_diff difference_type; | |
| typedef Xapian::docid * pointer; | | typedef Xapian::docid * pointer; | |
| typedef Xapian::docid & reference; | | typedef Xapian::docid & reference; | |
| | | | |
| skipping to change at line 440 | | skipping to change at line 440 | |
| | | | |
| class ESetIterator; | | class ESetIterator; | |
| | | | |
| /** Class representing an ordered set of expand terms (an ESet). | | /** Class representing an ordered set of expand terms (an ESet). | |
| * This set represents the results of an expand operation, which is | | * This set represents the results of an expand operation, which is | |
| * performed by Xapian::Enquire::get_eset(). | | * performed by Xapian::Enquire::get_eset(). | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT ESet { | | class XAPIAN_VISIBILITY_DEFAULT ESet { | |
| public: | | public: | |
| class Internal; | | class Internal; | |
|
| /// @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::intrusive_ptr<Internal> internal; | | Xapian::Internal::intrusive_ptr<Internal> internal; | |
| | | | |
| /// Construct an empty ESet | | /// Construct an empty ESet | |
| ESet(); | | ESet(); | |
| | | | |
| /// Destructor. | | /// Destructor. | |
| ~ESet(); | | ~ESet(); | |
| | | | |
| /// Copying is allowed (and is cheap). | | /// Copying is allowed (and is cheap). | |
| ESet(const ESet & other); | | ESet(const ESet & other); | |
| | | | |
| skipping to change at line 498 | | skipping to change at line 498 | |
| /// Return a string describing this object. | | /// Return a string describing this object. | |
| std::string get_description() const; | | std::string get_description() const; | |
| }; | | }; | |
| | | | |
| /** Iterate through terms in the ESet */ | | /** Iterate through terms in the ESet */ | |
| class XAPIAN_VISIBILITY_DEFAULT ESetIterator { | | class XAPIAN_VISIBILITY_DEFAULT ESetIterator { | |
| private: | | private: | |
| friend class ESet; | | friend class ESet; | |
| friend bool operator==(const ESetIterator &a, const ESetIterator &b)
; | | friend bool operator==(const ESetIterator &a, const ESetIterator &b)
; | |
| friend bool operator!=(const ESetIterator &a, const ESetIterator &b)
; | | friend bool operator!=(const ESetIterator &a, const ESetIterator &b)
; | |
|
| | | friend void iterator_rewind(ESetIterator & it); | |
| | | friend bool iterator_valid(const ESetIterator & it); | |
| | | | |
| ESetIterator(Xapian::termcount index_, const ESet & eset_) | | ESetIterator(Xapian::termcount index_, const ESet & eset_) | |
| : index(index_), eset(eset_) { } | | : index(index_), eset(eset_) { } | |
| | | | |
| Xapian::termcount index; | | Xapian::termcount index; | |
| ESet eset; | | ESet eset; | |
| | | | |
| public: | | public: | |
| /** Create an uninitialised iterator; this cannot be used, but is | | /** Create an uninitialised iterator; this cannot be used, but is | |
| * convenient syntactically. | | * convenient syntactically. | |
| | | | |
| skipping to change at line 559 | | skipping to change at line 561 | |
| | | | |
| /// Get the term for the current position | | /// Get the term for the current position | |
| const std::string & operator *() const; | | const std::string & operator *() const; | |
| | | | |
| /// Get the weight of the term at the current position | | /// Get the weight of the term at the current position | |
| double get_weight() const; | | double get_weight() const; | |
| | | | |
| /// Return a string describing this object. | | /// Return a string describing this object. | |
| std::string get_description() const; | | std::string get_description() const; | |
| | | | |
|
| /// @private @internal Determine if the iterator has been exhausted. | | | |
| bool at_end() const { return index == eset.size(); } | | | |
| | | | |
| /// Allow use as an STL iterator | | /// Allow use as an STL iterator | |
| //@{ | | //@{ | |
| typedef std::bidirectional_iterator_tag iterator_category; // FIXME:
go for randomaccess_iterator! | | typedef std::bidirectional_iterator_tag iterator_category; // FIXME:
go for randomaccess_iterator! | |
| typedef std::string value_type; | | typedef std::string value_type; | |
| typedef Xapian::termcount_diff difference_type; | | typedef Xapian::termcount_diff difference_type; | |
| typedef std::string * pointer; | | typedef std::string * pointer; | |
| typedef std::string & reference; | | typedef std::string & reference; | |
| //@} | | //@} | |
| }; | | }; | |
| | | | |
| | | | |
| skipping to change at line 601 | | skipping to change at line 600 | |
| | | | |
| /** A relevance set (R-Set). | | /** A relevance set (R-Set). | |
| * This is the set of documents which are marked as relevant, for use | | * This is the set of documents which are marked as relevant, for use | |
| * in modifying the term weights, and in performing query expansion. | | * in modifying the term weights, and in performing query expansion. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT RSet { | | class XAPIAN_VISIBILITY_DEFAULT RSet { | |
| public: | | public: | |
| /// Class holding details of RSet | | /// Class holding details of RSet | |
| class Internal; | | class Internal; | |
| | | | |
|
| /// @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::intrusive_ptr<Internal> internal; | | Xapian::Internal::intrusive_ptr<Internal> internal; | |
| | | | |
| /// Copy constructor | | /// Copy constructor | |
| RSet(const RSet &rset); | | RSet(const RSet &rset); | |
| | | | |
| /// Assignment operator | | /// Assignment operator | |
| void operator=(const RSet &rset); | | void operator=(const RSet &rset); | |
| | | | |
| /// Default constructor | | /// Default constructor | |
| RSet(); | | RSet(); | |
| | | | |
| skipping to change at line 680 | | skipping to change at line 679 | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT Enquire { | | class XAPIAN_VISIBILITY_DEFAULT Enquire { | |
| public: | | public: | |
| /// Copying is allowed (and is cheap). | | /// Copying is allowed (and is cheap). | |
| Enquire(const Enquire & other); | | Enquire(const Enquire & other); | |
| | | | |
| /// Assignment is allowed (and is cheap). | | /// Assignment is allowed (and is cheap). | |
| void operator=(const Enquire & other); | | void operator=(const Enquire & other); | |
| | | | |
| class Internal; | | class Internal; | |
|
| /// @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::intrusive_ptr<Internal> internal; | | Xapian::Internal::intrusive_ptr<Internal> internal; | |
| | | | |
| /** Create a Xapian::Enquire object. | | /** Create a Xapian::Enquire object. | |
| * | | * | |
| * This specification cannot be changed once the Xapian::Enquire is | | * This specification cannot be changed once the Xapian::Enquire is | |
| * opened: you must create a new Xapian::Enquire object to access a | | * opened: you must create a new Xapian::Enquire object to access a | |
| * different database, or set of databases. | | * different database, or set of databases. | |
| * | | * | |
| * The database supplied must have been initialised (ie, must not b
e | | * The database supplied must have been initialised (ie, must not b
e | |
| * the result of calling the Database::Database() constructor). If | | * the result of calling the Database::Database() constructor). If | |
| | | | |
| skipping to change at line 764 | | skipping to change at line 763 | |
| void clear_matchspies(); | | void clear_matchspies(); | |
| | | | |
| /** Set the weighting scheme to use for queries. | | /** Set the weighting scheme to use for queries. | |
| * | | * | |
| * @param weight_ the new weighting scheme. If no weighting schem
e | | * @param weight_ the new weighting scheme. If no weighting schem
e | |
| * is specified, the default is BM25 with the | | * is specified, the default is BM25 with the | |
| * default parameters. | | * default parameters. | |
| */ | | */ | |
| void set_weighting_scheme(const Weight &weight_); | | void set_weighting_scheme(const Weight &weight_); | |
| | | | |
|
| | | /** Set the weighting scheme to use for expansion. | |
| | | * | |
| | | * If you don't call this method, the default is as if you'd used: | |
| | | * | |
| | | * get_expansion_scheme("trad"); | |
| | | * | |
| | | * @param eweightname_ A string in lowercase specifying the name o | |
| | | f | |
| | | * the scheme to be used. The following scheme | |
| | | s | |
| | | * are currently available: | |
| | | * "bo1" : The Bo1 scheme for query expansion. | |
| | | * "trad" : The TradWeight scheme for query ex | |
| | | pansion. | |
| | | * @param expand_k_ The parameter required for TradWeight query exp | |
| | | ansion. | |
| | | * A default value of 1.0 is used if none is speci | |
| | | fied. | |
| | | */ | |
| | | void set_expansion_scheme(const std::string &eweightname_, | |
| | | double expand_k_ = 1.0) const; | |
| | | | |
| /** Set the collapse key to use for queries. | | /** Set the collapse key to use for queries. | |
| * | | * | |
| * @param collapse_key value number to collapse on - at most one M
Set | | * @param collapse_key value number to collapse on - at most one M
Set | |
| * entry with each particular value will be returned | | * entry with each particular value will be returned | |
| * (default is Xapian::BAD_VALUENO which means no collapsing). | | * (default is Xapian::BAD_VALUENO which means no collapsing). | |
| * | | * | |
| * @param collapse_max Max number of items with the same key to le
ave | | * @param collapse_max Max number of items with the same key to le
ave | |
| * after collapsing (default 1). | | * after collapsing (default 1). | |
| * | | * | |
| * The MSet returned by get_mset() will have only the "best" | | * The MSet returned by get_mset() will have only the "best" | |
| | | | |
| skipping to change at line 926 | | skipping to change at line 942 | |
| * Note that with the default BM25 weighting scheme parameters, | | * Note that with the default BM25 weighting scheme parameters, | |
| * non-identical documents will rarely have the same weight, so | | * non-identical documents will rarely have the same weight, so | |
| * this setting will give very similar results to | | * this setting will give very similar results to | |
| * set_sort_by_relevance(). It becomes more useful with particular | | * set_sort_by_relevance(). It becomes more useful with particular | |
| * BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom | | * BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom | |
| * weighting schemes. | | * weighting schemes. | |
| * | | * | |
| * @param sort_key value number to sort on. | | * @param sort_key value number to sort on. | |
| * | | * | |
| * @param reverse If true, reverses the sort order of sort_key. | | * @param reverse If true, reverses the sort order of sort_key. | |
|
| | | * Beware that in 1.2.16 and earlier, the sense | |
| | | * of this parameter was incorrectly inverted | |
| | | * and inconsistent with the other set_sort_by_... | |
| | | * methods. This was fixed in 1.2.17, so make that | |
| | | * version a minimum requirement if this detail | |
| | | * matters to your application. | |
| */ | | */ | |
| void set_sort_by_relevance_then_value(Xapian::valueno sort_key, | | void set_sort_by_relevance_then_value(Xapian::valueno sort_key, | |
| bool reverse); | | bool reverse); | |
| | | | |
| /** Set the sorting to be by relevance, then by keys generated from | | /** Set the sorting to be by relevance, then by keys generated from | |
| * values. | | * values. | |
| * | | * | |
| * Note that with the default BM25 weighting scheme parameters, | | * Note that with the default BM25 weighting scheme parameters, | |
| * non-identical documents will rarely have the same weight, so | | * non-identical documents will rarely have the same weight, so | |
| * this setting will give very similar results to | | * this setting will give very similar results to | |
| * set_sort_by_relevance(). It becomes more useful with particular | | * set_sort_by_relevance(). It becomes more useful with particular | |
| * BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom | | * BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom | |
| * weighting schemes. | | * weighting schemes. | |
| * | | * | |
| * @param sorter The functor to use for generating keys. | | * @param sorter The functor to use for generating keys. | |
| * | | * | |
| * @param reverse If true, reverses the sort order of the generate
d | | * @param reverse If true, reverses the sort order of the generate
d | |
|
| * keys. | | * keys. Beware that in 1.2.16 and earlier, the se | |
| | | nse | |
| | | * of this parameter was incorrectly inverted | |
| | | * and inconsistent with the other set_sort_by_... | |
| | | * methods. This was fixed in 1.2.17, so make that | |
| | | * version a minimum requirement if this detail | |
| | | * matters to your application. | |
| */ | | */ | |
| void set_sort_by_relevance_then_key(Xapian::KeyMaker * sorter, | | void set_sort_by_relevance_then_key(Xapian::KeyMaker * sorter, | |
| bool reverse); | | bool reverse); | |
| | | | |
|
| | | /** Set a time limit for the match. | |
| | | * | |
| | | * Matches with check_at_least set high can take a long time in som | |
| | | e | |
| | | * cases. You can set a time limit on this, after which check_at_l | |
| | | east | |
| | | * will be turned off. | |
| | | * | |
| | | * @param time_limit time in seconds after which to disable | |
| | | * check_at_least (default: 0.0 which means no | |
| | | * time limit) | |
| | | * | |
| | | * Limitations: | |
| | | * | |
| | | * This feature is currently supported on platforms which support P | |
| | | OSIX | |
| | | * interval timers. Interaction with the remote backend when using | |
| | | * multiple databases may have bugs. There's not currently a way t | |
| | | o | |
| | | * force the match to end after a certain time. | |
| | | */ | |
| | | void set_time_limit(double time_limit); | |
| | | | |
| /** Get (a portion of) the match set for the current query. | | /** Get (a portion of) the match set for the current query. | |
| * | | * | |
| * @param first the first item in the result set to return. | | * @param first the first item in the result set to return. | |
| * A value of zero corresponds to the first item | | * A value of zero corresponds to the first item | |
| * returned being that with the highest score. | | * returned being that with the highest score. | |
| * A value of 10 corresponds to the first 10 items | | * A value of 10 corresponds to the first 10 items | |
| * being ignored, and the returned items starting | | * being ignored, and the returned items starting | |
| * at the eleventh. | | * at the eleventh. | |
| * @param maxitems the maximum number of items to return. If you | | * @param maxitems the maximum number of items to return. If you | |
| * want all matches, then you can pass the result | | * want all matches, then you can pass the result | |
| | | | |
| skipping to change at line 1012 | | skipping to change at line 1058 | |
| * @param maxitems the maximum number of items to return. | | * @param maxitems the maximum number of items to return. | |
| * @param omrset the relevance set to use when performing | | * @param omrset the relevance set to use when performing | |
| * the expand operation. | | * the expand operation. | |
| * @param flags zero or more of these values |-ed together: | | * @param flags zero or more of these values |-ed together: | |
| * - Xapian::Enquire::INCLUDE_QUERY_TERMS query | | * - Xapian::Enquire::INCLUDE_QUERY_TERMS query | |
| * terms may be returned from expand | | * terms may be returned from expand | |
| * - Xapian::Enquire::USE_EXACT_TERMFREQ for mult
i | | * - Xapian::Enquire::USE_EXACT_TERMFREQ for mult
i | |
| * dbs, calculate the exact termfreq; otherwise
an | | * dbs, calculate the exact termfreq; otherwise
an | |
| * approximation is used which can greatly impr
ove | | * approximation is used which can greatly impr
ove | |
| * efficiency, but still returns good results. | | * efficiency, but still returns good results. | |
|
| * @param k the parameter k in the query expansion algorith | | | |
| m | | | |
| * (default is 1.0) | | | |
| * @param edecider a decision functor to use to decide whether a | | * @param edecider a decision functor to use to decide whether a | |
| * given term should be put in the ESet | | * given term should be put in the ESet | |
|
| | | * @param min_wt the minimum weight for included terms | |
| * | | * | |
| * @return An ESet object containing the results of the | | * @return An ESet object containing the results of the | |
| * expand. | | * expand. | |
| * | | * | |
| * @exception Xapian::InvalidArgumentError See class documentation
. | | * @exception Xapian::InvalidArgumentError See class documentation
. | |
| */ | | */ | |
| ESet get_eset(Xapian::termcount maxitems, | | ESet get_eset(Xapian::termcount maxitems, | |
|
| const RSet & omrset, | | const RSet & omrset, | |
| int flags = 0, | | int flags = 0, | |
| double k = 1.0, | | const Xapian::ExpandDecider * edecider = 0, | |
| const Xapian::ExpandDecider * edecider = 0) const; | | double min_wt = 0.0) const; | |
| | | | |
| /** Get the expand set for the given rset. | | /** Get the expand set for the given rset. | |
| * | | * | |
| * @param maxitems the maximum number of items to return. | | * @param maxitems the maximum number of items to return. | |
| * @param omrset the relevance set to use when performing | | * @param omrset the relevance set to use when performing | |
| * the expand operation. | | * the expand operation. | |
| * @param edecider a decision functor to use to decide whether a | | * @param edecider a decision functor to use to decide whether a | |
| * given term should be put in the ESet | | * given term should be put in the ESet | |
| * | | * | |
| * @return An ESet object containing the results of the | | * @return An ESet object containing the results of the | |
| * expand. | | * expand. | |
| * | | * | |
| * @exception Xapian::InvalidArgumentError See class documentation
. | | * @exception Xapian::InvalidArgumentError See class documentation
. | |
| */ | | */ | |
| inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset
, | | inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset
, | |
|
| const Xapian::ExpandDecider * edecider) const | | const Xapian::ExpandDecider * edecider) const { | |
| { | | return get_eset(maxitems, omrset, 0, edecider); | |
| return get_eset(maxitems, omrset, 0, 1.0, edecider); | | | |
| } | | } | |
| | | | |
| /** Get the expand set for the given rset. | | /** Get the expand set for the given rset. | |
| * | | * | |
| * @param maxitems the maximum number of items to return. | | * @param maxitems the maximum number of items to return. | |
| * @param omrset the relevance set to use when performing | | * @param omrset the relevance set to use when performing | |
| * the expand operation. | | * the expand operation. | |
| * @param flags zero or more of these values |-ed together: | | * @param flags zero or more of these values |-ed together: | |
| * - Xapian::Enquire::INCLUDE_QUERY_TERMS query | | * - Xapian::Enquire::INCLUDE_QUERY_TERMS query | |
| * terms may be returned from expand | | * terms may be returned from expand | |
| | | | |
| skipping to change at line 1070 | | skipping to change at line 1115 | |
| * @param edecider a decision functor to use to decide whether a | | * @param edecider a decision functor to use to decide whether a | |
| * given term should be put in the ESet | | * given term should be put in the ESet | |
| * | | * | |
| * @param min_wt the minimum weight for included terms | | * @param min_wt the minimum weight for included terms | |
| * | | * | |
| * @return An ESet object containing the results of the | | * @return An ESet object containing the results of the | |
| * expand. | | * expand. | |
| * | | * | |
| * @exception Xapian::InvalidArgumentError See class documentation
. | | * @exception Xapian::InvalidArgumentError See class documentation
. | |
| */ | | */ | |
|
| ESet get_eset(Xapian::termcount maxitems, | | XAPIAN_DEPRECATED(ESet get_eset(Xapian::termcount maxitems, | |
| const RSet & omrset, | | const RSet & rset, | |
| int flags, | | int flags, | |
| double k, | | double k, | |
| const Xapian::ExpandDecider * edecider, | | const Xapian::ExpandDecider * edecider = NULL, | |
| double min_wt) const; | | double min_wt = 0.0) const) { | |
| | | set_expansion_scheme("trad", k); | |
| | | return get_eset(maxitems, rset, flags, edecider, min_wt); | |
| | | } | |
| | | | |
| /** Get terms which match a given document, by document id. | | /** Get terms which match a given document, by document id. | |
| * | | * | |
| * This method returns the terms in the current query which match | | * This method returns the terms in the current query which match | |
| * the given document. | | * the given document. | |
| * | | * | |
| * It is possible for the document to have been removed from the | | * It is possible for the document to have been removed from the | |
| * database between the time it is returned in an MSet, and the | | * database between the time it is returned in an MSet, and the | |
| * time that this call is made. If possible, you should specify | | * time that this call is made. If possible, you should specify | |
| * an MSetIterator instead of a Xapian::docid, since this will enab
le | | * an MSetIterator instead of a Xapian::docid, since this will enab
le | |
| | | | |
End of changes. 21 change blocks. |
| 30 lines changed or deleted | | 86 lines changed or added | |
|
| matchspy.h | | matchspy.h | |
| /** @file matchspy.h | | /** @file matchspy.h | |
| * @brief MatchSpy implementation. | | * @brief MatchSpy implementation. | |
| */ | | */ | |
|
| /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013 Olly Betts | | /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Olly Betts | |
| * Copyright (C) 2007,2009 Lemur Consulting Ltd | | * Copyright (C) 2007,2009 Lemur Consulting Ltd | |
| * Copyright (C) 2010 Richard Boulton | | * Copyright (C) 2010 Richard Boulton | |
| * | | * | |
| * This program is free software; you can redistribute it and/or modify | | * This program is free software; you can redistribute it and/or modify | |
| * it under the terms of the GNU General Public License as published by | | * it under the terms of the GNU General Public License as published by | |
| * the Free Software Foundation; either version 2 of the License, or | | * the Free Software Foundation; either version 2 of the License, or | |
| * (at your option) any later version. | | * (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| | | | |
| skipping to change at line 26 | | skipping to change at line 26 | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_MATCHSPY_H | | #ifndef XAPIAN_INCLUDED_MATCHSPY_H | |
| #define XAPIAN_INCLUDED_MATCHSPY_H | | #define XAPIAN_INCLUDED_MATCHSPY_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/matchspy.h> directly; include <xapian.h> instead
." | | # error "Never use <xapian/matchspy.h> directly; include <xapian.h> instead
." | |
| #endif | | #endif | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/intrusive_ptr.h> | | #include <xapian/intrusive_ptr.h> | |
| #include <xapian/termiterator.h> | | #include <xapian/termiterator.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| #include <string> | | #include <string> | |
| #include <map> | | #include <map> | |
| | | | |
| skipping to change at line 138 | | skipping to change at line 138 | |
| * can use the default implementation which simply throws | | * can use the default implementation which simply throws | |
| * Xapian::UnimplementedError. | | * Xapian::UnimplementedError. | |
| * | | * | |
| * Note that the returned object will be deallocated by Xapian after u
se | | * Note that the returned object will be deallocated by Xapian after u
se | |
| * with "delete". If you want to handle the deletion in a special way | | * with "delete". If you want to handle the deletion in a special way | |
| * (for example when wrapping the Xapian API for use from another | | * (for example when wrapping the Xapian API for use from another | |
| * language) then you can define a static <code>operator delete</code> | | * language) then you can define a static <code>operator delete</code> | |
| * method in your subclass as shown here: | | * method in your subclass as shown here: | |
| * http://trac.xapian.org/ticket/554#comment:1 | | * http://trac.xapian.org/ticket/554#comment:1 | |
| * | | * | |
|
| * @param s A string containing the serialised results. | | * @param serialised A string containing the serialised results. | |
| * @param context Registry object to use for unserialisation to permit | | * @param context Registry object to use for unserialisation to permit | |
| * MatchSpy subclasses with sub-MatchSpy objects to be | | * MatchSpy subclasses with sub-MatchSpy objects to be | |
| * implemented. | | * implemented. | |
| */ | | */ | |
|
| virtual MatchSpy * unserialise(const std::string & s, | | virtual MatchSpy * unserialise(const std::string & serialised, | |
| const Registry & context) const; | | const Registry & context) const; | |
| | | | |
| /** Serialise the results of this match spy. | | /** Serialise the results of this match spy. | |
| * | | * | |
| * If you don't want to support the remote backend in your match spy,
you | | * If you don't want to support the remote backend in your match spy,
you | |
| * can use the default implementation which simply throws | | * can use the default implementation which simply throws | |
| * Xapian::UnimplementedError. | | * Xapian::UnimplementedError. | |
| */ | | */ | |
| virtual std::string serialise_results() const; | | virtual std::string serialise_results() const; | |
| | | | |
| /** Unserialise some results, and merge them into this matchspy. | | /** Unserialise some results, and merge them into this matchspy. | |
| * | | * | |
| * The order in which results are merged should not be significant, si
nce | | * The order in which results are merged should not be significant, si
nce | |
| * this order is not specified (and will vary depending on the speed o
f | | * this order is not specified (and will vary depending on the speed o
f | |
| * the search in each sub-database). | | * the search in each sub-database). | |
| * | | * | |
| * If you don't want to support the remote backend in your match spy,
you | | * If you don't want to support the remote backend in your match spy,
you | |
| * can use the default implementation which simply throws | | * can use the default implementation which simply throws | |
| * Xapian::UnimplementedError. | | * Xapian::UnimplementedError. | |
| * | | * | |
|
| * @param s A string containing the serialised results. | | * @param serialised A string containing the serialised results. | |
| */ | | */ | |
|
| virtual void merge_results(const std::string & s); | | virtual void merge_results(const std::string & serialised); | |
| | | | |
| /** Return a string describing this object. | | /** Return a string describing this object. | |
| * | | * | |
| * This default implementation returns a generic answer, to avoid forc
ing | | * This default implementation returns a generic answer, to avoid forc
ing | |
| * those deriving their own MatchSpy subclasses from having to impleme
nt | | * those deriving their own MatchSpy subclasses from having to impleme
nt | |
| * this (they may not care what get_description() gives for their | | * this (they may not care what get_description() gives for their | |
| * subclass). | | * subclass). | |
| */ | | */ | |
| virtual std::string get_description() const; | | virtual std::string get_description() const; | |
| }; | | }; | |
| | | | |
| skipping to change at line 215 | | skipping to change at line 215 | |
| public: | | public: | |
| /// Construct an empty ValueCountMatchSpy. | | /// Construct an empty ValueCountMatchSpy. | |
| ValueCountMatchSpy() : internal() {} | | ValueCountMatchSpy() : internal() {} | |
| | | | |
| /// Construct a MatchSpy which counts the values in a particular slot. | | /// Construct a MatchSpy which counts the values in a particular slot. | |
| ValueCountMatchSpy(Xapian::valueno slot_) | | ValueCountMatchSpy(Xapian::valueno slot_) | |
| : internal(new Internal(slot_)) {} | | : internal(new Internal(slot_)) {} | |
| | | | |
| /** Return the total number of documents tallied. */ | | /** Return the total number of documents tallied. */ | |
| size_t XAPIAN_NOTHROW(get_total() const) { | | size_t XAPIAN_NOTHROW(get_total() const) { | |
|
| return internal->total; | | return internal.get() ? internal->total : 0; | |
| } | | } | |
| | | | |
| /** Get an iterator over the values seen in the slot. | | /** Get an iterator over the values seen in the slot. | |
| * | | * | |
| * Items will be returned in ascending alphabetical order. | | * Items will be returned in ascending alphabetical order. | |
| * | | * | |
| * During the iteration, the frequency of the current value can be | | * During the iteration, the frequency of the current value can be | |
| * obtained with the get_termfreq() method on the iterator. | | * obtained with the get_termfreq() method on the iterator. | |
| */ | | */ | |
| TermIterator values_begin() const; | | TermIterator values_begin() const; | |
| | | | |
| skipping to change at line 261 | | skipping to change at line 261 | |
| * This implementation tallies values for a matching document. | | * This implementation tallies values for a matching document. | |
| * | | * | |
| * @param doc The document to tally values for. | | * @param doc The document to tally values for. | |
| * @param wt The weight of the document (ignored by this class). | | * @param wt The weight of the document (ignored by this class). | |
| */ | | */ | |
| void operator()(const Xapian::Document &doc, double wt); | | void operator()(const Xapian::Document &doc, double wt); | |
| | | | |
| virtual MatchSpy * clone() const; | | virtual MatchSpy * clone() const; | |
| virtual std::string name() const; | | virtual std::string name() const; | |
| virtual std::string serialise() const; | | virtual std::string serialise() const; | |
|
| virtual MatchSpy * unserialise(const std::string & s, | | virtual MatchSpy * unserialise(const std::string & serialised, | |
| const Registry & context) const; | | const Registry & context) const; | |
| virtual std::string serialise_results() const; | | virtual std::string serialise_results() const; | |
|
| virtual void merge_results(const std::string & s); | | virtual void merge_results(const std::string & serialised); | |
| virtual std::string get_description() const; | | virtual std::string get_description() const; | |
| }; | | }; | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_MATCHSPY_H | | #endif // XAPIAN_INCLUDED_MATCHSPY_H | |
| | | | |
End of changes. 9 change blocks. |
| 9 lines changed or deleted | | 9 lines changed or added | |
|
| postingsource.h | | postingsource.h | |
| /** @file postingsource.h | | /** @file postingsource.h | |
| * @brief External sources of posting information | | * @brief External sources of posting information | |
| */ | | */ | |
|
| /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013 Olly Betts | | /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Olly Betts | |
| * Copyright (C) 2008,2009 Lemur Consulting Ltd | | * Copyright (C) 2008,2009 Lemur Consulting Ltd | |
| * | | * | |
| * This program is free software; you can redistribute it and/or modify | | * This program is free software; you can redistribute it and/or modify | |
| * it under the terms of the GNU General Public License as published by | | * it under the terms of the GNU General Public License as published by | |
| * the Free Software Foundation; either version 2 of the License, or | | * the Free Software Foundation; either version 2 of the License, or | |
| * (at your option) any later version. | | * (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H | | #ifndef XAPIAN_INCLUDED_POSTINGSOURCE_H | |
| #define XAPIAN_INCLUDED_POSTINGSOURCE_H | | #define XAPIAN_INCLUDED_POSTINGSOURCE_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/postingsource.h> directly; include <xapian.h> in
stead." | | # error "Never use <xapian/postingsource.h> directly; include <xapian.h> in
stead." | |
| #endif | | #endif | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/database.h> | | #include <xapian/database.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| #include <string> | | #include <string> | |
| #include <map> | | #include <map> | |
| | | | |
| skipping to change at line 304 | | skipping to change at line 304 | |
| * Note that the returned object will be deallocated by Xapian after u
se | | * Note that the returned object will be deallocated by Xapian after u
se | |
| * with "delete". If you want to handle the deletion in a special way | | * with "delete". If you want to handle the deletion in a special way | |
| * (for example when wrapping the Xapian API for use from another | | * (for example when wrapping the Xapian API for use from another | |
| * language) then you can define a static <code>operator delete</code> | | * language) then you can define a static <code>operator delete</code> | |
| * method in your subclass as shown here: | | * method in your subclass as shown here: | |
| * http://trac.xapian.org/ticket/554#comment:1 | | * http://trac.xapian.org/ticket/554#comment:1 | |
| * | | * | |
| * If you don't want to support the remote backend, you can use the | | * If you don't want to support the remote backend, you can use the | |
| * default implementation which simply throws Xapian::UnimplementedErr
or. | | * default implementation which simply throws Xapian::UnimplementedErr
or. | |
| * | | * | |
|
| * @param s A serialised instance of this PostingSource subclass. | | * @param serialised A serialised instance of this PostingSource subcl
ass. | |
| */ | | */ | |
|
| virtual PostingSource * unserialise(const std::string &s) const; | | virtual PostingSource * unserialise(const std::string &serialised) cons
t; | |
| | | | |
| /** Create object given string serialisation returned by serialise(). | | /** Create object given string serialisation returned by serialise(). | |
| * | | * | |
| * Note that the returned object will be deallocated by Xapian after u
se | | * Note that the returned object will be deallocated by Xapian after u
se | |
| * with "delete". If you want to handle the deletion in a special way | | * with "delete". If you want to handle the deletion in a special way | |
| * (for example when wrapping the Xapian API for use from another | | * (for example when wrapping the Xapian API for use from another | |
| * language) then you can define a static <code>operator delete</code> | | * language) then you can define a static <code>operator delete</code> | |
| * method in your subclass as shown here: | | * method in your subclass as shown here: | |
| * http://trac.xapian.org/ticket/554#comment:1 | | * http://trac.xapian.org/ticket/554#comment:1 | |
| * | | * | |
| * This method is supplied with a Registry object, which can be used w
hen | | * This method is supplied with a Registry object, which can be used w
hen | |
| * unserialising objects contained within the posting source. The def
ault | | * unserialising objects contained within the posting source. The def
ault | |
| * implementation simply calls unserialise() which doesn't take the | | * implementation simply calls unserialise() which doesn't take the | |
| * Registry object, so you do not need to implement this method unless
you | | * Registry object, so you do not need to implement this method unless
you | |
| * want to take advantage of the Registry object when unserialising. | | * want to take advantage of the Registry object when unserialising. | |
| * | | * | |
|
| * @param s A serialised instance of this PostingSource subclass. | | * @param serialised A serialised instance of this PostingSource subcl
ass. | |
| */ | | */ | |
|
| virtual PostingSource * unserialise_with_registry(const std::string &s, | | virtual PostingSource * unserialise_with_registry(const std::string &se
rialised, | |
| const Registry & registry) const; | | const Registry & registry) const; | |
| | | | |
| /** Set this PostingSource to the start of the list of postings. | | /** Set this PostingSource to the start of the list of postings. | |
| * | | * | |
| * This is called automatically by the matcher prior to each query bei
ng | | * This is called automatically by the matcher prior to each query bei
ng | |
| * processed. | | * processed. | |
| * | | * | |
| * If a PostingSource is used for multiple searches, @a init() will | | * If a PostingSource is used for multiple searches, @a init() will | |
| * therefore be called multiple times, and must handle this by using t
he | | * therefore be called multiple times, and must handle this by using t
he | |
| * database passed in the most recent call. | | * database passed in the most recent call. | |
| | | | |
| skipping to change at line 460 | | skipping to change at line 460 | |
| /** Construct a ValueWeightPostingSource. | | /** Construct a ValueWeightPostingSource. | |
| * | | * | |
| * @param slot_ The value slot to read values from. | | * @param slot_ The value slot to read values from. | |
| */ | | */ | |
| ValueWeightPostingSource(Xapian::valueno slot_); | | ValueWeightPostingSource(Xapian::valueno slot_); | |
| | | | |
| double get_weight() const; | | double get_weight() const; | |
| ValueWeightPostingSource * clone() const; | | ValueWeightPostingSource * clone() const; | |
| std::string name() const; | | std::string name() const; | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| ValueWeightPostingSource * unserialise(const std::string &s) const; | | ValueWeightPostingSource * unserialise(const std::string &serialised) c
onst; | |
| void init(const Database & db_); | | void init(const Database & db_); | |
| | | | |
| std::string get_description() const; | | std::string get_description() const; | |
| }; | | }; | |
| | | | |
| /** Read weights from a value which is known to decrease as docid increases
. | | /** Read weights from a value which is known to decrease as docid increases
. | |
| * | | * | |
| * This posting source can be used, like ValueWeightPostingSource, to add
a | | * This posting source can be used, like ValueWeightPostingSource, to add
a | |
| * weight contribution to a query based on the values stored in a slot. T
he | | * weight contribution to a query based on the values stored in a slot. T
he | |
| * values in the slot must be serialised as by @a sortable_serialise(). | | * values in the slot must be serialised as by @a sortable_serialise(). | |
| | | | |
| skipping to change at line 507 | | skipping to change at line 507 | |
| | | | |
| public: | | public: | |
| DecreasingValueWeightPostingSource(Xapian::valueno slot_, | | DecreasingValueWeightPostingSource(Xapian::valueno slot_, | |
| Xapian::docid range_start_ = 0, | | Xapian::docid range_start_ = 0, | |
| Xapian::docid range_end_ = 0); | | Xapian::docid range_end_ = 0); | |
| | | | |
| double get_weight() const; | | double get_weight() const; | |
| DecreasingValueWeightPostingSource * clone() const; | | DecreasingValueWeightPostingSource * clone() const; | |
| std::string name() const; | | std::string name() const; | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| DecreasingValueWeightPostingSource * unserialise(const std::string &s)
const; | | DecreasingValueWeightPostingSource * unserialise(const std::string &ser
ialised) const; | |
| void init(const Xapian::Database & db_); | | void init(const Xapian::Database & db_); | |
| | | | |
| void next(double min_wt); | | void next(double min_wt); | |
| void skip_to(Xapian::docid min_docid, double min_wt); | | void skip_to(Xapian::docid min_docid, double min_wt); | |
| bool check(Xapian::docid min_docid, double min_wt); | | bool check(Xapian::docid min_docid, double min_wt); | |
| | | | |
| std::string get_description() const; | | std::string get_description() const; | |
| }; | | }; | |
| | | | |
| /** A posting source which looks up weights in a map using values as the ke
y. | | /** A posting source which looks up weights in a map using values as the ke
y. | |
| | | | |
| skipping to change at line 563 | | skipping to change at line 563 | |
| /** Set a default weight for document values not in the map. | | /** Set a default weight for document values not in the map. | |
| * | | * | |
| * @param wt The weight to set as the default. | | * @param wt The weight to set as the default. | |
| */ | | */ | |
| void set_default_weight(double wt); | | void set_default_weight(double wt); | |
| | | | |
| double get_weight() const; | | double get_weight() const; | |
| ValueMapPostingSource * clone() const; | | ValueMapPostingSource * clone() const; | |
| std::string name() const; | | std::string name() const; | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| ValueMapPostingSource * unserialise(const std::string &s) const; | | ValueMapPostingSource * unserialise(const std::string &serialised) cons
t; | |
| void init(const Database & db_); | | void init(const Database & db_); | |
| | | | |
| std::string get_description() const; | | std::string get_description() const; | |
| }; | | }; | |
| | | | |
| /** A posting source which returns a fixed weight for all documents. | | /** A posting source which returns a fixed weight for all documents. | |
| * | | * | |
| * This returns entries for all documents in the given database, with a fi
xed | | * This returns entries for all documents in the given database, with a fi
xed | |
| * weight (specified by a parameter to the constructor). | | * weight (specified by a parameter to the constructor). | |
| */ | | */ | |
| | | | |
| skipping to change at line 614 | | skipping to change at line 614 | |
| void skip_to(Xapian::docid min_docid, double min_wt); | | void skip_to(Xapian::docid min_docid, double min_wt); | |
| bool check(Xapian::docid min_docid, double min_wt); | | bool check(Xapian::docid min_docid, double min_wt); | |
| | | | |
| bool at_end() const; | | bool at_end() const; | |
| | | | |
| Xapian::docid get_docid() const; | | Xapian::docid get_docid() const; | |
| | | | |
| FixedWeightPostingSource * clone() const; | | FixedWeightPostingSource * clone() const; | |
| std::string name() const; | | std::string name() const; | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| FixedWeightPostingSource * unserialise(const std::string &s) const; | | FixedWeightPostingSource * unserialise(const std::string &serialised) c
onst; | |
| void init(const Database & db_); | | void init(const Database & db_); | |
| | | | |
| std::string get_description() const; | | std::string get_description() const; | |
| }; | | }; | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H | | #endif // XAPIAN_INCLUDED_POSTINGSOURCE_H | |
| | | | |
End of changes. 10 change blocks. |
| 10 lines changed or deleted | | 10 lines changed or added | |
|
| query.h | | query.h | |
| /** @file query.h | | /** @file query.h | |
| * @brief Xapian::Query API class | | * @brief Xapian::Query API class | |
| */ | | */ | |
|
| /* Copyright (C) 2011,2012,2013 Olly Betts | | /* Copyright (C) 2011,2012,2013,2014 Olly Betts | |
| * Copyright (C) 2008 Richard Boulton | | * Copyright (C) 2008 Richard Boulton | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_QUERY_H | | #ifndef XAPIAN_INCLUDED_QUERY_H | |
| #define XAPIAN_INCLUDED_QUERY_H | | #define XAPIAN_INCLUDED_QUERY_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/query.h> directly; include <xapian.h> instead." | | # error "Never use <xapian/query.h> directly; include <xapian.h> instead." | |
| #endif | | #endif | |
| | | | |
| #include <string> | | #include <string> | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/intrusive_ptr.h> | | #include <xapian/intrusive_ptr.h> | |
| #include <xapian/postingiterator.h> | | #include <xapian/postingiterator.h> | |
| #include <xapian/registry.h> | | #include <xapian/registry.h> | |
| #include <xapian/termiterator.h> | | #include <xapian/termiterator.h> | |
| | | | |
| skipping to change at line 110 | | skipping to change at line 110 | |
| * <pre> | | * <pre> | |
| * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(),
subqs.end(), 7); | | * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(),
subqs.end(), 7); | |
| * </pre> | | * </pre> | |
| * | | * | |
| * If the number of subqueries is less than this threshold, | | * If the number of subqueries is less than this threshold, | |
| * OP_ELITE_SET behaves identically to OP_OR. | | * OP_ELITE_SET behaves identically to OP_OR. | |
| */ | | */ | |
| OP_ELITE_SET = 10, | | OP_ELITE_SET = 10, | |
| OP_VALUE_GE = 11, | | OP_VALUE_GE = 11, | |
| OP_VALUE_LE = 12, | | OP_VALUE_LE = 12, | |
|
| OP_SYNONYM = 13 | | OP_SYNONYM = 13, | |
| | | OP_MAX = 14, | |
| | | | |
| | | LEAF_TERM = 100, | |
| | | LEAF_POSTING_SOURCE, | |
| | | LEAF_MATCH_ALL, | |
| | | LEAF_MATCH_NOTHING | |
| }; | | }; | |
| | | | |
| /// Default constructor. | | /// Default constructor. | |
| XAPIAN_NOTHROW(Query()) | | XAPIAN_NOTHROW(Query()) | |
| : internal(0) { } | | : internal(0) { } | |
| | | | |
| /// Destructor. | | /// Destructor. | |
| ~Query() { } | | ~Query() { } | |
| | | | |
| /** Copying is allowed. | | /** Copying is allowed. | |
| | | | |
| skipping to change at line 210 | | skipping to change at line 216 | |
| } | | } | |
| | | | |
| Xapian::termcount get_length() const XAPIAN_PURE_FUNCTION; | | Xapian::termcount get_length() const XAPIAN_PURE_FUNCTION; | |
| | | | |
| bool XAPIAN_NOTHROW(empty() const) XAPIAN_PURE_FUNCTION { | | bool XAPIAN_NOTHROW(empty() const) XAPIAN_PURE_FUNCTION { | |
| return internal.get() == 0; | | return internal.get() == 0; | |
| } | | } | |
| | | | |
| std::string serialise() const; | | std::string serialise() const; | |
| | | | |
|
| static const Query unserialise(const std::string & s, | | static const Query unserialise(const std::string & serialised, | |
| const Registry & reg = Registry()); | | const Registry & reg = Registry()); | |
| | | | |
|
| | | /** Get the type of the top level of the query. */ | |
| | | op get_type() const XAPIAN_PURE_FUNCTION; | |
| | | | |
| | | /** Get the number of subqueries of the top level query. */ | |
| | | size_t get_num_subqueries() const XAPIAN_PURE_FUNCTION; | |
| | | | |
| | | /** Read a top level subquery. | |
| | | * | |
| | | * @param n Return the n-th subquery (starting from 0) - only valid w | |
| | | hen | |
| | | * 0 <= n < get_num_subqueries(). | |
| | | */ | |
| | | const Query get_subquery(size_t n) const XAPIAN_PURE_FUNCTION; | |
| | | | |
| std::string get_description() const XAPIAN_PURE_FUNCTION; | | std::string get_description() const XAPIAN_PURE_FUNCTION; | |
| | | | |
| const Query operator&=(const Query & o) { | | const Query operator&=(const Query & o) { | |
| return (*this = Query(OP_AND, *this, o)); | | return (*this = Query(OP_AND, *this, o)); | |
| } | | } | |
| | | | |
| const Query operator|=(const Query & o) { | | const Query operator|=(const Query & o) { | |
| return (*this = Query(OP_OR, *this, o)); | | return (*this = Query(OP_OR, *this, o)); | |
| } | | } | |
| | | | |
| | | | |
| skipping to change at line 235 | | skipping to change at line 254 | |
| } | | } | |
| | | | |
| const Query operator*=(double factor) { | | const Query operator*=(double factor) { | |
| return (*this = Query(factor, *this)); | | return (*this = Query(factor, *this)); | |
| } | | } | |
| | | | |
| const Query operator/=(double factor) { | | const Query operator/=(double factor) { | |
| return (*this = Query(1.0 / factor, *this)); | | return (*this = Query(1.0 / factor, *this)); | |
| } | | } | |
| | | | |
|
| /** @private @internal | | /** @private @internal */ | |
| * | | Query(Internal * internal_) : internal(internal_) { } | |
| * Pass a reference to avoid ambiguity for Query(NULL) (not useful, bu | | | |
| t the | | | |
| * testsuite does it...) FIXME | | | |
| */ | | | |
| Query(Internal & internal_) : internal(&internal_) { } | | | |
| | | | |
| private: | | private: | |
| void init(Query::op op_, size_t n_subqueries, Xapian::termcount window
= 0); | | void init(Query::op op_, size_t n_subqueries, Xapian::termcount window
= 0); | |
| | | | |
| template<typename I> | | template<typename I> | |
| void init(Query::op op_, Xapian::termcount window, | | void init(Query::op op_, Xapian::termcount window, | |
| const I & begin, const I & end, std::random_access_iterator_ta
g) | | const I & begin, const I & end, std::random_access_iterator_ta
g) | |
| { | | { | |
| init(op_, end - begin, window); | | init(op_, end - begin, window); | |
| } | | } | |
| | | | |
| skipping to change at line 373 | | skipping to change at line 388 | |
| virtual void postlist_sub_xor(Xapian::Internal::XorContext& ctx, | | virtual void postlist_sub_xor(Xapian::Internal::XorContext& ctx, | |
| QueryOptimiser * qopt, | | QueryOptimiser * qopt, | |
| double factor) const; | | double factor) const; | |
| | | | |
| virtual termcount get_length() const XAPIAN_PURE_FUNCTION; | | virtual termcount get_length() const XAPIAN_PURE_FUNCTION; | |
| | | | |
| virtual void serialise(std::string & result) const = 0; | | virtual void serialise(std::string & result) const = 0; | |
| | | | |
| static Query::Internal * unserialise(const char ** p, const char * end,
const Registry & reg); | | static Query::Internal * unserialise(const char ** p, const char * end,
const Registry & reg); | |
| | | | |
|
| | | virtual Query::op get_type() const XAPIAN_PURE_FUNCTION = 0; | |
| | | virtual size_t get_num_subqueries() const XAPIAN_PURE_FUNCTION; | |
| | | virtual const Query get_subquery(size_t n) const XAPIAN_PURE_FUNCTION; | |
| | | | |
| virtual std::string get_description() const XAPIAN_PURE_FUNCTION = 0; | | virtual std::string get_description() const XAPIAN_PURE_FUNCTION = 0; | |
| | | | |
| // Pass argument as void* to avoid need to include <vector>. | | // Pass argument as void* to avoid need to include <vector>. | |
| virtual void gather_terms(void * void_terms) const; | | virtual void gather_terms(void * void_terms) const; | |
| }; | | }; | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_QUERY_H | | #endif // XAPIAN_INCLUDED_QUERY_H | |
| | | | |
End of changes. 7 change blocks. |
| 11 lines changed or deleted | | 30 lines changed or added | |
|
| queryparser.h | | queryparser.h | |
| /** @file queryparser.h | | /** @file queryparser.h | |
| * @brief parsing a user query string to build a Xapian::Query object | | * @brief parsing a user query string to build a Xapian::Query object | |
| */ | | */ | |
|
| /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013 Olly Betts | | /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014 Olly Bet
ts | |
| * Copyright (C) 2010 Adam Sjøgren | | * Copyright (C) 2010 Adam Sjøgren | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 26 | | skipping to change at line 26 | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_QUERYPARSER_H | | #ifndef XAPIAN_INCLUDED_QUERYPARSER_H | |
| #define XAPIAN_INCLUDED_QUERYPARSER_H | | #define XAPIAN_INCLUDED_QUERYPARSER_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/queryparser.h> directly; include <xapian.h> inst
ead." | | # error "Never use <xapian/queryparser.h> directly; include <xapian.h> inst
ead." | |
| #endif | | #endif | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/intrusive_ptr.h> | | #include <xapian/intrusive_ptr.h> | |
| #include <xapian/query.h> | | #include <xapian/query.h> | |
| #include <xapian/termiterator.h> | | #include <xapian/termiterator.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| #include <set> | | #include <set> | |
| | | | |
| skipping to change at line 68 | | skipping to change at line 68 | |
| }; | | }; | |
| | | | |
| /// Simple implementation of Stopper class - this will suit most users. | | /// Simple implementation of Stopper class - this will suit most users. | |
| class XAPIAN_VISIBILITY_DEFAULT SimpleStopper : public Stopper { | | class XAPIAN_VISIBILITY_DEFAULT SimpleStopper : public Stopper { | |
| std::set<std::string> stop_words; | | std::set<std::string> stop_words; | |
| | | | |
| public: | | public: | |
| /// Default constructor. | | /// Default constructor. | |
| SimpleStopper() { } | | SimpleStopper() { } | |
| | | | |
|
| /// Initialise from a pair of iterators. | | /** Initialise from a pair of iterators. | |
| | | * | |
| | | * Xapian includes stop list files for many languages. You can initiali | |
| | | se from a file like that: | |
| | | * @code | |
| | | * ifstream inFile ("stopwords/english/stop.txt"); | |
| | | * Xapian::SimplerStopper stopper(istream_iterator<string>(inFile), ist | |
| | | ream_iterator<string>()); | |
| | | * @endcode | |
| | | * | |
| | | */ | |
| #if ! defined __SUNPRO_CC || __SUNPRO_CC - 0 >= 0x580 | | #if ! defined __SUNPRO_CC || __SUNPRO_CC - 0 >= 0x580 | |
| template <class Iterator> | | template <class Iterator> | |
| SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) {
} | | SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) {
} | |
| #else | | #else | |
| // Older versions of Sun's C++ compiler don't cope with the Iterator | | // Older versions of Sun's C++ compiler don't cope with the Iterator | |
| // pointing to const char *. | | // pointing to const char *. | |
| template <class Iterator> | | template <class Iterator> | |
| SimpleStopper(Iterator begin, Iterator end) { | | SimpleStopper(Iterator begin, Iterator end) { | |
| while (begin != end) stop_words.insert(*begin++); | | while (begin != end) stop_words.insert(*begin++); | |
| } | | } | |
| | | | |
| skipping to change at line 524 | | skipping to change at line 532 | |
| | | | |
| /** Set the stemming strategy. | | /** Set the stemming strategy. | |
| * | | * | |
| * This controls how the query parser will apply the stemming algorith
m. | | * This controls how the query parser will apply the stemming algorith
m. | |
| * Note that the stemming algorithm is only applied to words in | | * Note that the stemming algorithm is only applied to words in | |
| * probabilistic fields - boolean filter terms are never stemmed. | | * probabilistic fields - boolean filter terms are never stemmed. | |
| * | | * | |
| * @param strategy The strategy to use - possible values are: | | * @param strategy The strategy to use - possible values are: | |
| * - STEM_NONE: Don't perform any stemming. (default in Xapian <= | | * - STEM_NONE: Don't perform any stemming. (default in Xapian <= | |
| * 1.3.0) | | * 1.3.0) | |
|
| * - STEM_SOME: Search for stemmed forms of terms except for those | | * - STEM_SOME: Stem all terms except for those which start with a | |
| * which start with a capital letter, or are followed b | | * capital letter, or are followed by certain character | |
| y | | s | |
| * certain characters (currently: (/\@<>=*[{" ), or are | | * (currently: <code>(/\@<>=*[{"</code> ), or are used | |
| * used with operators which need positional informatio | | * with operators which need positional information. | |
| n. | | | |
| * Stemmed terms are prefixed with 'Z'. (default in | | * Stemmed terms are prefixed with 'Z'. (default in | |
| * Xapian >= 1.3.1) | | * Xapian >= 1.3.1) | |
|
| * - STEM_ALL: Search for stemmed forms of all words (note: no 'Z' | | * - STEM_ALL: Stem all terms (note: no 'Z' prefix is added). | |
| * prefix is added). | | * - STEM_ALL_Z: Stem all terms (note: 'Z' prefix is added). (new in | |
| * - STEM_ALL_Z: Search for stemmed forms of all words (note: 'Z' | | * Xapian 1.2.11 and 1.3.1) | |
| * prefix is added). (new in Xapian 1.2.11 and 1.3.1) | | | |
| */ | | */ | |
| void set_stemming_strategy(stem_strategy strategy); | | void set_stemming_strategy(stem_strategy strategy); | |
| | | | |
| /** Set the stopper. | | /** Set the stopper. | |
| * | | * | |
| * @param stop The Stopper object to set (default NULL, which means
no | | * @param stop The Stopper object to set (default NULL, which means
no | |
| * stopwords). | | * stopwords). | |
| */ | | */ | |
| void set_stopper(const Stopper *stop = NULL); | | void set_stopper(const Stopper *stop = NULL); | |
| | | | |
| | | | |
| skipping to change at line 786 | | skipping to change at line 793 | |
| * | | * | |
| * This expects the input to be a string produced by @a sortable_serialise
(). | | * This expects the input to be a string produced by @a sortable_serialise
(). | |
| * If the input is not such a string, the value returned is undefined (but | | * If the input is not such a string, the value returned is undefined (but | |
| * no error will be thrown). | | * no error will be thrown). | |
| * | | * | |
| * The result of the conversion will be exactly the value which was | | * The result of the conversion will be exactly the value which was | |
| * supplied to @a sortable_serialise() when making the string on platforms | | * supplied to @a sortable_serialise() when making the string on platforms | |
| * which represent doubles with the precisions specified by IEEE_754, but | | * which represent doubles with the precisions specified by IEEE_754, but | |
| * may be a different (nearby) value on other platforms. | | * may be a different (nearby) value on other platforms. | |
| * | | * | |
|
| * @param value The serialised string to decode. | | * @param serialised The serialised string to decode. | |
| */ | | */ | |
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_VISIBILITY_DEFAULT | |
|
| double sortable_unserialise(const std::string & value) XAPIAN_CONST_FUNCTIO
N; | | double sortable_unserialise(const std::string & serialised) XAPIAN_CONST_FU
NCTION; | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_QUERYPARSER_H | | #endif // XAPIAN_INCLUDED_QUERYPARSER_H | |
| | | | |
End of changes. 7 change blocks. |
| 15 lines changed or deleted | | 23 lines changed or added | |
|
| unicode.h | | unicode.h | |
| /** @file unicode.h | | /** @file unicode.h | |
| * @brief Unicode and UTF-8 related classes and functions. | | * @brief Unicode and UTF-8 related classes and functions. | |
| */ | | */ | |
|
| /* Copyright (C) 2006,2007,2008,2009,2010,2011,2012,2013 Olly Betts | | /* Copyright (C) 2006,2007,2008,2009,2010,2011,2012,2013,2014 Olly Betts | |
| * | | * | |
| * This program is free software; you can redistribute it and/or modify | | * This program is free software; you can redistribute it and/or modify | |
| * it under the terms of the GNU General Public License as published by | | * it under the terms of the GNU General Public License as published by | |
| * the Free Software Foundation; either version 2 of the License, or | | * the Free Software Foundation; either version 2 of the License, or | |
| * (at your option) any later version. | | * (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_UNICODE_H | | #ifndef XAPIAN_INCLUDED_UNICODE_H | |
| #define XAPIAN_INCLUDED_UNICODE_H | | #define XAPIAN_INCLUDED_UNICODE_H | |
| | | | |
|
| #if !defined XAPIAN_INCLUDED_XAPIAN_H && !defined XAPIAN_LIB_BUILD | | #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD | |
| # error "Never use <xapian/unicode.h> directly; include <xapian.h> instead.
" | | # error "Never use <xapian/unicode.h> directly; include <xapian.h> instead.
" | |
| #endif | | #endif | |
| | | | |
| #include <xapian/attributes.h> | | #include <xapian/attributes.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| #include <string> | | #include <string> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
| /** An iterator which returns Unicode character values from a UTF-8 encoded | | /** An iterator which returns Unicode character values from a UTF-8 encoded | |
| * string. | | * string. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT Utf8Iterator { | | class XAPIAN_VISIBILITY_DEFAULT Utf8Iterator { | |
| const unsigned char *p; | | const unsigned char *p; | |
| const unsigned char *end; | | const unsigned char *end; | |
| mutable unsigned seqlen; | | mutable unsigned seqlen; | |
| | | | |
|
| void calculate_sequence_length() const; | | bool calculate_sequence_length() const; | |
| | | | |
| unsigned get_char() const; | | unsigned get_char() const; | |
| | | | |
| Utf8Iterator(const unsigned char *p_, const unsigned char *end_, unsign
ed seqlen_) | | Utf8Iterator(const unsigned char *p_, const unsigned char *end_, unsign
ed seqlen_) | |
| : p(p_), end(end_), seqlen(seqlen_) { } | | : p(p_), end(end_), seqlen(seqlen_) { } | |
| | | | |
| public: | | public: | |
| /** Return the raw const char * pointer for the current position. */ | | /** Return the raw const char * pointer for the current position. */ | |
| const char * raw() const XAPIAN_PURE_FUNCTION { | | const char * raw() const XAPIAN_PURE_FUNCTION { | |
| return reinterpret_cast<const char *>(p ? p : end); | | return reinterpret_cast<const char *>(p ? p : end); | |
| | | | |
| skipping to change at line 135 | | skipping to change at line 135 | |
| /** Create an iterator which is at the end of its iteration. | | /** Create an iterator which is at the end of its iteration. | |
| * | | * | |
| * This can be compared to another iterator to check if the other iter
ator | | * This can be compared to another iterator to check if the other iter
ator | |
| * has reached its end. | | * has reached its end. | |
| */ | | */ | |
| XAPIAN_NOTHROW(Utf8Iterator()) | | XAPIAN_NOTHROW(Utf8Iterator()) | |
| : p(NULL), end(0), seqlen(0) { } | | : p(NULL), end(0), seqlen(0) { } | |
| | | | |
| /** Get the current Unicode character value pointed to by the iterator. | | /** Get the current Unicode character value pointed to by the iterator. | |
| * | | * | |
|
| | | * If an invalid UTF-8 sequence is encountered, then the byte values | |
| | | * comprising it are returned until valid UTF-8 or the end of the inpu | |
| | | t is | |
| | | * reached. | |
| | | * | |
| * Returns unsigned(-1) if the iterator has reached the end of its buf
fer. | | * Returns unsigned(-1) if the iterator has reached the end of its buf
fer. | |
| */ | | */ | |
| unsigned operator*() const XAPIAN_PURE_FUNCTION; | | unsigned operator*() const XAPIAN_PURE_FUNCTION; | |
| | | | |
|
| | | /** @private @internal Get the current Unicode character | |
| | | * value pointed to by the iterator. | |
| | | * | |
| | | * If an invalid UTF-8 sequence is encountered, then the byte values | |
| | | * comprising it are returned with the top bit set (so the caller can | |
| | | * differentiate these from the same values arising from valid UTF-8) | |
| | | * until valid UTF-8 or the end of the input is reached. | |
| | | * | |
| | | * Returns unsigned(-1) if the iterator has reached the end of its buf | |
| | | fer. | |
| | | */ | |
| | | unsigned strict_deref() const XAPIAN_PURE_FUNCTION; | |
| | | | |
| /** Move forward to the next Unicode character. | | /** Move forward to the next Unicode character. | |
| * | | * | |
| * @return An iterator pointing to the position before the move. | | * @return An iterator pointing to the position before the move. | |
| */ | | */ | |
| Utf8Iterator operator++(int) { | | Utf8Iterator operator++(int) { | |
| // If we've not calculated seqlen yet, do so. | | // If we've not calculated seqlen yet, do so. | |
| if (seqlen == 0) calculate_sequence_length(); | | if (seqlen == 0) calculate_sequence_length(); | |
| const unsigned char *old_p = p; | | const unsigned char *old_p = p; | |
| unsigned old_seqlen = seqlen; | | unsigned old_seqlen = seqlen; | |
| p += seqlen; | | p += seqlen; | |
| | | | |
| skipping to change at line 232 | | skipping to change at line 248 | |
| INITIAL_QUOTE_PUNCTUATION, | | INITIAL_QUOTE_PUNCTUATION, | |
| FINAL_QUOTE_PUNCTUATION, | | FINAL_QUOTE_PUNCTUATION, | |
| OTHER_PUNCTUATION, | | OTHER_PUNCTUATION, | |
| MATH_SYMBOL, | | MATH_SYMBOL, | |
| CURRENCY_SYMBOL, | | CURRENCY_SYMBOL, | |
| MODIFIER_SYMBOL, | | MODIFIER_SYMBOL, | |
| OTHER_SYMBOL | | OTHER_SYMBOL | |
| } category; | | } category; | |
| | | | |
| namespace Internal { | | namespace Internal { | |
|
| /** @internal Extract the information about a character from the Unicod | | /** @private @internal Extract the information about a character from t | |
| e | | he | |
| * character tables. | | * Unicode character tables. | |
| * | | * | |
|
| * ch must be a valid Unicode character value (i.e. < 0x110000) | | * Characters outside of the Unicode range (i.e. ch >= 0x110000) are | |
| | | * treated as UNASSIGNED with no case variants. | |
| */ | | */ | |
| XAPIAN_VISIBILITY_DEFAULT | | XAPIAN_VISIBILITY_DEFAULT | |
| int get_character_info(unsigned ch) XAPIAN_CONST_FUNCTION; | | int get_character_info(unsigned ch) XAPIAN_CONST_FUNCTION; | |
| | | | |
|
| /** @internal Extract how to convert the case of a Unicode character fr | | /** @private @internal Extract how to convert the case of a Unicode | |
| om | | * character from its info. | |
| * its info. | | | |
| */ | | */ | |
| inline int get_case_type(int info) { return ((info & 0xe0) >> 5); } | | inline int get_case_type(int info) { return ((info & 0xe0) >> 5); } | |
| | | | |
|
| /// @internal Extract the category of a Unicode character from its info | | /** @private @internal Extract the category of a Unicode character from | |
| . | | its | |
| | | * info. | |
| | | */ | |
| inline category get_category(int info) { return static_cast<category>(i
nfo & 0x1f); } | | inline category get_category(int info) { return static_cast<category>(i
nfo & 0x1f); } | |
| | | | |
|
| /** @internal Extract the delta to use for case conversion of a charact | | /** @private @internal Extract the delta to use for case conversion of | |
| er | | a | |
| * from its info. | | * character from its info. | |
| */ | | */ | |
| inline int get_delta(int info) { | | inline int get_delta(int info) { | |
| /* It's implementation defined if sign extension happens on right sh
ift | | /* It's implementation defined if sign extension happens on right sh
ift | |
| * of a signed int, hence the conditional (hopefully the compiler wi
ll | | * of a signed int, hence the conditional (hopefully the compiler wi
ll | |
| * spot this and optimise it to a sign-extending shift on architectu
res | | * spot this and optimise it to a sign-extending shift on architectu
res | |
| * with a suitable instruction). | | * with a suitable instruction). | |
| */ | | */ | |
| #ifdef __GNUC__ | | #ifdef __GNUC__ | |
| // GCC 4.7.1 doesn't optimise the more complex expression down | | // GCC 4.7.1 doesn't optimise the more complex expression down | |
| // (reported as http://gcc.gnu.org/PR55299), but the documented | | // (reported as http://gcc.gnu.org/PR55299), but the documented | |
| | | | |
| skipping to change at line 309 | | skipping to change at line 328 | |
| /** Append the UTF-8 representation of a single Unicode character to a | | /** Append the UTF-8 representation of a single Unicode character to a | |
| * std::string. | | * std::string. | |
| */ | | */ | |
| inline void append_utf8(std::string &s, unsigned ch) { | | inline void append_utf8(std::string &s, unsigned ch) { | |
| char buf[4]; | | char buf[4]; | |
| s.append(buf, to_utf8(ch, buf)); | | s.append(buf, to_utf8(ch, buf)); | |
| } | | } | |
| | | | |
| /// Return the category which a given Unicode character falls into. | | /// Return the category which a given Unicode character falls into. | |
| inline category get_category(unsigned ch) { | | inline category get_category(unsigned ch) { | |
|
| // Categorise non-Unicode values as UNASSIGNED. | | | |
| if (ch >= 0x110000) return Xapian::Unicode::UNASSIGNED; | | | |
| return Internal::get_category(Internal::get_character_info(ch)); | | return Internal::get_category(Internal::get_character_info(ch)); | |
| } | | } | |
| | | | |
| /// Test if a given Unicode character is "word character". | | /// Test if a given Unicode character is "word character". | |
| inline bool is_wordchar(unsigned ch) { | | inline bool is_wordchar(unsigned ch) { | |
| const unsigned int WORDCHAR_MASK = | | const unsigned int WORDCHAR_MASK = | |
| (1 << Xapian::Unicode::UPPERCASE_LETTER) | | | (1 << Xapian::Unicode::UPPERCASE_LETTER) | | |
| (1 << Xapian::Unicode::LOWERCASE_LETTER) | | | (1 << Xapian::Unicode::LOWERCASE_LETTER) | | |
| (1 << Xapian::Unicode::TITLECASE_LETTER) | | | (1 << Xapian::Unicode::TITLECASE_LETTER) | | |
| (1 << Xapian::Unicode::MODIFIER_LETTER) | | | (1 << Xapian::Unicode::MODIFIER_LETTER) | | |
| | | | |
| skipping to change at line 349 | | skipping to change at line 366 | |
| return ((WHITESPACE_MASK >> get_category(ch)) & 1); | | return ((WHITESPACE_MASK >> get_category(ch)) & 1); | |
| } | | } | |
| | | | |
| /// Test if a given Unicode character is a currency symbol. | | /// Test if a given Unicode character is a currency symbol. | |
| inline bool is_currency(unsigned ch) { | | inline bool is_currency(unsigned ch) { | |
| return (get_category(ch) == Xapian::Unicode::CURRENCY_SYMBOL); | | return (get_category(ch) == Xapian::Unicode::CURRENCY_SYMBOL); | |
| } | | } | |
| | | | |
| /// Convert a Unicode character to lowercase. | | /// Convert a Unicode character to lowercase. | |
| inline unsigned tolower(unsigned ch) { | | inline unsigned tolower(unsigned ch) { | |
|
| int info; | | int info = Xapian::Unicode::Internal::get_character_info(ch); | |
| // Leave non-Unicode values unchanged. | | if (!(Internal::get_case_type(info) & 2)) | |
| if (ch >= 0x110000 || !(Internal::get_case_type((info = Xapian::Unicode | | | |
| ::Internal::get_character_info(ch))) & 2)) | | | |
| return ch; | | return ch; | |
| return ch + Internal::get_delta(info); | | return ch + Internal::get_delta(info); | |
| } | | } | |
| | | | |
| /// Convert a Unicode character to uppercase. | | /// Convert a Unicode character to uppercase. | |
| inline unsigned toupper(unsigned ch) { | | inline unsigned toupper(unsigned ch) { | |
|
| int info; | | int info = Xapian::Unicode::Internal::get_character_info(ch); | |
| // Leave non-Unicode values unchanged. | | if (!(Internal::get_case_type(info) & 4)) | |
| if (ch >= 0x110000 || !(Internal::get_case_type((info = Xapian::Unicode | | | |
| ::Internal::get_character_info(ch))) & 4)) | | | |
| return ch; | | return ch; | |
| return ch - Internal::get_delta(info); | | return ch - Internal::get_delta(info); | |
| } | | } | |
| | | | |
| /// Convert a UTF-8 std::string to lowercase. | | /// Convert a UTF-8 std::string to lowercase. | |
| inline std::string | | inline std::string | |
| tolower(const std::string &term) | | tolower(const std::string &term) | |
| { | | { | |
| std::string result; | | std::string result; | |
| result.reserve(term.size()); | | result.reserve(term.size()); | |
| | | | |
End of changes. 13 change blocks. |
| 25 lines changed or deleted | | 39 lines changed or added | |
|
| weight.h | | weight.h | |
| /** @file weight.h | | /** @file weight.h | |
| * @brief Weighting scheme API. | | * @brief Weighting scheme API. | |
| */ | | */ | |
| /* Copyright (C) 2007,2008,2009,2010,2011,2012 Olly Betts | | /* Copyright (C) 2007,2008,2009,2010,2011,2012 Olly Betts | |
| * Copyright (C) 2009 Lemur Consulting Ltd | | * Copyright (C) 2009 Lemur Consulting Ltd | |
|
| | | * Copyright (C) 2013,2014 Aarsh Shah | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| | | | |
| skipping to change at line 48 | | skipping to change at line 49 | |
| RSET_SIZE = 2, | | RSET_SIZE = 2, | |
| AVERAGE_LENGTH = 4, | | AVERAGE_LENGTH = 4, | |
| TERMFREQ = 8, | | TERMFREQ = 8, | |
| RELTERMFREQ = 16, | | RELTERMFREQ = 16, | |
| QUERY_LENGTH = 32, | | QUERY_LENGTH = 32, | |
| WQF = 64, | | WQF = 64, | |
| WDF = 128, | | WDF = 128, | |
| DOC_LENGTH = 256, | | DOC_LENGTH = 256, | |
| DOC_LENGTH_MIN = 512, | | DOC_LENGTH_MIN = 512, | |
| DOC_LENGTH_MAX = 1024, | | DOC_LENGTH_MAX = 1024, | |
|
| WDF_MAX = 2048 | | WDF_MAX = 2048, | |
| | | COLLECTION_FREQ = 4096, | |
| | | UNIQUE_TERMS = 8192 | |
| } stat_flags; | | } stat_flags; | |
| | | | |
| /** Tell Xapian that your subclass will want a particular statistic. | | /** Tell Xapian that your subclass will want a particular statistic. | |
| * | | * | |
| * Some of the statistics can be costly to fetch or calculate, so | | * Some of the statistics can be costly to fetch or calculate, so | |
| * Xapian needs to know which are actually going to be used. You | | * Xapian needs to know which are actually going to be used. You | |
| * should call need_stat() from your constructor for each such | | * should call need_stat() from your constructor for each such | |
| * statistic. | | * statistic. | |
| * | | * | |
| * @param flag The stat_flags value for a required statistic. | | * @param flag The stat_flags value for a required statistic. | |
| | | | |
| skipping to change at line 94 | | skipping to change at line 97 | |
| | | | |
| /// The number of documents marked as relevant. | | /// The number of documents marked as relevant. | |
| Xapian::doccount rset_size_; | | Xapian::doccount rset_size_; | |
| | | | |
| /// The average length of a document in the collection. | | /// The average length of a document in the collection. | |
| Xapian::doclength average_length_; | | Xapian::doclength average_length_; | |
| | | | |
| /// The number of documents which this term indexes. | | /// The number of documents which this term indexes. | |
| Xapian::doccount termfreq_; | | Xapian::doccount termfreq_; | |
| | | | |
|
| | | // The collection frequency of the term. | |
| | | Xapian::termcount collectionfreq_; | |
| | | | |
| /// The number of relevant documents which this term indexes. | | /// The number of relevant documents which this term indexes. | |
| Xapian::doccount reltermfreq_; | | Xapian::doccount reltermfreq_; | |
| | | | |
| /// The length of the query. | | /// The length of the query. | |
| Xapian::termcount query_length_; | | Xapian::termcount query_length_; | |
| | | | |
| /// The within-query-frequency of this term. | | /// The within-query-frequency of this term. | |
| Xapian::termcount wqf_; | | Xapian::termcount wqf_; | |
| | | | |
| /// A lower bound on the minimum length of any document in the database
. | | /// A lower bound on the minimum length of any document in the database
. | |
| Xapian::termcount doclength_lower_bound_; | | Xapian::termcount doclength_lower_bound_; | |
| | | | |
| /// An upper bound on the maximum length of any document in the databas
e. | | /// An upper bound on the maximum length of any document in the databas
e. | |
| Xapian::termcount doclength_upper_bound_; | | Xapian::termcount doclength_upper_bound_; | |
| | | | |
| /// An upper bound on the wdf of this term. | | /// An upper bound on the wdf of this term. | |
| Xapian::termcount wdf_upper_bound_; | | Xapian::termcount wdf_upper_bound_; | |
| | | | |
| public: | | public: | |
|
| | | | |
| | | /** Type of smoothing to use with the Language Model Weighting scheme. | |
| | | * | |
| | | * Default is TWO_STAGE_SMOOTHING. | |
| | | */ | |
| | | typedef enum { | |
| | | TWO_STAGE_SMOOTHING = 1, | |
| | | DIRICHLET_SMOOTHING = 2, | |
| | | ABSOLUTE_DISCOUNT_SMOOTHING = 3, | |
| | | JELINEK_MERCER_SMOOTHING = 4 | |
| | | } type_smoothing; | |
| | | | |
| class Internal; | | class Internal; | |
| | | | |
| /** Virtual destructor, because we have virtual methods. */ | | /** Virtual destructor, because we have virtual methods. */ | |
| virtual ~Weight(); | | virtual ~Weight(); | |
| | | | |
| /** Clone this object. | | /** Clone this object. | |
| * | | * | |
| * This method allocates and returns a copy of the object it is called
on. | | * This method allocates and returns a copy of the object it is called
on. | |
| * | | * | |
| * If your subclass is called FooWeight and has parameters a and b, th
en | | * If your subclass is called FooWeight and has parameters a and b, th
en | |
| | | | |
| skipping to change at line 173 | | skipping to change at line 191 | |
| * If you don't want to support the remote backend, you can use the | | * If you don't want to support the remote backend, you can use the | |
| * default implementation which simply throws Xapian::UnimplementedErr
or. | | * default implementation which simply throws Xapian::UnimplementedErr
or. | |
| * | | * | |
| * Note that the returned object will be deallocated by Xapian after u
se | | * Note that the returned object will be deallocated by Xapian after u
se | |
| * with "delete". If you want to handle the deletion in a special way | | * with "delete". If you want to handle the deletion in a special way | |
| * (for example when wrapping the Xapian API for use from another | | * (for example when wrapping the Xapian API for use from another | |
| * language) then you can define a static <code>operator delete</code> | | * language) then you can define a static <code>operator delete</code> | |
| * method in your subclass as shown here: | | * method in your subclass as shown here: | |
| * http://trac.xapian.org/ticket/554#comment:1 | | * http://trac.xapian.org/ticket/554#comment:1 | |
| * | | * | |
|
| * @param s A string containing the serialised parameters. | | * @param serialised A string containing the serialised parameter
s. | |
| */ | | */ | |
|
| virtual Weight * unserialise(const std::string & s) const; | | virtual Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| /** Calculate the weight contribution for this object's term to a docum
ent. | | /** Calculate the weight contribution for this object's term to a docum
ent. | |
| * | | * | |
| * The parameters give information about the document which may be use
d | | * The parameters give information about the document which may be use
d | |
| * in the calculations: | | * in the calculations: | |
| * | | * | |
| * @param wdf The within document frequency of the term in the docu
ment. | | * @param wdf The within document frequency of the term in the docu
ment. | |
| * @param doclen The document's length (unnormalised). | | * @param doclen The document's length (unnormalised). | |
|
| | | * @param uniqterms Number of unique terms in the document (used | |
| | | * for absolute smoothing). | |
| */ | | */ | |
| virtual double get_sumpart(Xapian::termcount wdf, | | virtual double get_sumpart(Xapian::termcount wdf, | |
|
| Xapian::termcount doclen) const = 0; | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const = 0; | |
| | | | |
| /** Return an upper bound on what get_sumpart() can return for any docu
ment. | | /** Return an upper bound on what get_sumpart() can return for any docu
ment. | |
| * | | * | |
| * This information is used by the matcher to perform various | | * This information is used by the matcher to perform various | |
| * optimisations, so strive to make the bound as tight as possible. | | * optimisations, so strive to make the bound as tight as possible. | |
| */ | | */ | |
| virtual double get_maxpart() const = 0; | | virtual double get_maxpart() const = 0; | |
| | | | |
| /** Calculate the term-independent weight component for a document. | | /** Calculate the term-independent weight component for a document. | |
| * | | * | |
| * The parameter gives information about the document which may be use
d | | * The parameter gives information about the document which may be use
d | |
| * in the calculations: | | * in the calculations: | |
| * | | * | |
| * @param doclen The document's length (unnormalised). | | * @param doclen The document's length (unnormalised). | |
|
| | | * @param uniqterms The number of unique terms in the document. | |
| */ | | */ | |
|
| virtual double get_sumextra(Xapian::termcount doclen) const = 0; | | virtual double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const = 0; | |
| | | | |
| /** Return an upper bound on what get_sumextra() can return for any | | /** Return an upper bound on what get_sumextra() can return for any | |
| * document. | | * document. | |
| * | | * | |
| * This information is used by the matcher to perform various | | * This information is used by the matcher to perform various | |
| * optimisations, so strive to make the bound as tight as possible. | | * optimisations, so strive to make the bound as tight as possible. | |
| */ | | */ | |
| virtual double get_maxextra() const = 0; | | virtual double get_maxextra() const = 0; | |
| | | | |
| /** @private @internal Initialise this object to calculate weights for
term | | /** @private @internal Initialise this object to calculate weights for
term | |
| | | | |
| skipping to change at line 233 | | skipping to change at line 256 | |
| double factor); | | double factor); | |
| | | | |
| /** @private @internal Initialise this object to calculate weights for
a | | /** @private @internal Initialise this object to calculate weights for
a | |
| * synonym. | | * synonym. | |
| * | | * | |
| * @param stats Source of statistics. | | * @param stats Source of statistics. | |
| * @param query_len_ Query length. | | * @param query_len_ Query length. | |
| * @param factor Any scaling factor (e.g. from OP_SCALE_WEIGHT). | | * @param factor Any scaling factor (e.g. from OP_SCALE_WEIGHT). | |
| * @param termfreq The termfreq to use. | | * @param termfreq The termfreq to use. | |
| * @param reltermfreq The reltermfreq to use. | | * @param reltermfreq The reltermfreq to use. | |
|
| | | * @param collection_freq The collection frequency to use. | |
| */ | | */ | |
| void init_(const Internal & stats, Xapian::termcount query_len_, | | void init_(const Internal & stats, Xapian::termcount query_len_, | |
| double factor, Xapian::doccount termfreq, | | double factor, Xapian::doccount termfreq, | |
|
| Xapian::doccount reltermfreq); | | Xapian::doccount reltermfreq, Xapian::termcount collection_fr
eq); | |
| | | | |
| /** @private @internal Initialise this object to calculate the extra we
ight | | /** @private @internal Initialise this object to calculate the extra we
ight | |
| * component. | | * component. | |
| * | | * | |
| * @param stats Source of statistics. | | * @param stats Source of statistics. | |
| * @param query_len_ Query length. | | * @param query_len_ Query length. | |
| */ | | */ | |
| void init_(const Internal & stats, Xapian::termcount query_len_); | | void init_(const Internal & stats, Xapian::termcount query_len_); | |
| | | | |
| /** @private @internal Return true if the document length is needed. | | /** @private @internal Return true if the document length is needed. | |
| | | | |
| skipping to change at line 265 | | skipping to change at line 289 | |
| | | | |
| /** @private @internal Return true if the WDF is needed. | | /** @private @internal Return true if the WDF is needed. | |
| * | | * | |
| * If this method returns true, then the WDF will be fetched and passe
d to | | * If this method returns true, then the WDF will be fetched and passe
d to | |
| * @a get_sumpart(). Otherwise 0 may be passed for the wdf. | | * @a get_sumpart(). Otherwise 0 may be passed for the wdf. | |
| */ | | */ | |
| bool get_sumpart_needs_wdf_() const { | | bool get_sumpart_needs_wdf_() const { | |
| return stats_needed & WDF; | | return stats_needed & WDF; | |
| } | | } | |
| | | | |
|
| | | /** @private @internal Return true if the number of unique terms is nee | |
| | | ded. | |
| | | * | |
| | | * If this method returns true, then the number of unique terms will b | |
| | | e | |
| | | * fetched and passed to @a get_sumpart(). Otherwise 0 may be passed | |
| | | for | |
| | | * the number of unique terms. | |
| | | */ | |
| | | bool get_sumpart_needs_uniqueterms_() const { | |
| | | return stats_needed & UNIQUE_TERMS; | |
| | | } | |
| | | | |
| protected: | | protected: | |
| /** Don't allow copying. | | /** Don't allow copying. | |
| * | | * | |
| * This would ideally be private, but that causes a compilation error | | * This would ideally be private, but that causes a compilation error | |
| * with GCC 4.1 (which appears to be a bug). | | * with GCC 4.1 (which appears to be a bug). | |
| */ | | */ | |
| Weight(const Weight &); | | Weight(const Weight &); | |
| | | | |
| /// Default constructor, needed by subclass constructors. | | /// Default constructor, needed by subclass constructors. | |
| Weight() : stats_needed() { } | | Weight() : stats_needed() { } | |
| | | | |
| skipping to change at line 291 | | skipping to change at line 325 | |
| | | | |
| /// The average length of a document in the collection. | | /// The average length of a document in the collection. | |
| Xapian::doclength get_average_length() const { return average_length_;
} | | Xapian::doclength get_average_length() const { return average_length_;
} | |
| | | | |
| /// The number of documents which this term indexes. | | /// The number of documents which this term indexes. | |
| Xapian::doccount get_termfreq() const { return termfreq_; } | | Xapian::doccount get_termfreq() const { return termfreq_; } | |
| | | | |
| /// The number of relevant documents which this term indexes. | | /// The number of relevant documents which this term indexes. | |
| Xapian::doccount get_reltermfreq() const { return reltermfreq_; } | | Xapian::doccount get_reltermfreq() const { return reltermfreq_; } | |
| | | | |
|
| | | // The collection frequency of the term. | |
| | | Xapian::termcount get_collection_freq() const { return collectionfreq_; | |
| | | } | |
| | | | |
| /// The length of the query. | | /// The length of the query. | |
| Xapian::termcount get_query_length() const { return query_length_; } | | Xapian::termcount get_query_length() const { return query_length_; } | |
| | | | |
| /// The within-query-frequency of this term. | | /// The within-query-frequency of this term. | |
| Xapian::termcount get_wqf() const { return wqf_; } | | Xapian::termcount get_wqf() const { return wqf_; } | |
| | | | |
| /** An upper bound on the maximum length of any document in the databas
e. | | /** An upper bound on the maximum length of any document in the databas
e. | |
| * | | * | |
| * This should only be used by get_maxpart() and get_maxextra(). | | * This should only be used by get_maxpart() and get_maxextra(). | |
| */ | | */ | |
| | | | |
| skipping to change at line 340 | | skipping to change at line 377 | |
| | | | |
| void init(double factor); | | void init(double factor); | |
| | | | |
| public: | | public: | |
| /** Construct a BoolWeight. */ | | /** Construct a BoolWeight. */ | |
| BoolWeight() { } | | BoolWeight() { } | |
| | | | |
| std::string name() const; | | std::string name() const; | |
| | | | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| BoolWeight * unserialise(const std::string & s) const; | | BoolWeight * unserialise(const std::string & serialised) const; | |
| | | | |
| double get_sumpart(Xapian::termcount wdf, | | double get_sumpart(Xapian::termcount wdf, | |
|
| Xapian::termcount doclen) const; | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| double get_maxpart() const; | | double get_maxpart() const; | |
| | | | |
|
| double get_sumextra(Xapian::termcount doclen) const; | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| double get_maxextra() const; | | double get_maxextra() const; | |
| }; | | }; | |
| | | | |
| /// Xapian::Weight subclass implementing the tf-idf weighting scheme. | | /// Xapian::Weight subclass implementing the tf-idf weighting scheme. | |
| class XAPIAN_VISIBILITY_DEFAULT TfIdfWeight : public Weight { | | class XAPIAN_VISIBILITY_DEFAULT TfIdfWeight : public Weight { | |
| /* Three character string indicating the normalizations for tf(wdf), id
f and | | /* Three character string indicating the normalizations for tf(wdf), id
f and | |
| tfidf weight. */ | | tfidf weight. */ | |
| std::string normalizations; | | std::string normalizations; | |
| | | | |
|
| | | /// The factor to multiply with the weight. | |
| | | double factor; | |
| | | | |
| TfIdfWeight * clone() const; | | TfIdfWeight * clone() const; | |
| | | | |
| void init(double factor); | | void init(double factor); | |
| | | | |
| /* When additional normalizations are implemented in the future, the ad
ditional statistics for them | | /* When additional normalizations are implemented in the future, the ad
ditional statistics for them | |
| should be accessed by these functions. */ | | should be accessed by these functions. */ | |
| double get_wdfn(Xapian::termcount wdf, char c) const; | | double get_wdfn(Xapian::termcount wdf, char c) const; | |
| double get_idfn(Xapian::doccount termfreq, char c) const; | | double get_idfn(Xapian::doccount termfreq, char c) const; | |
| double get_wtn(double wt, char c) const; | | double get_wtn(double wt, char c) const; | |
| | | | |
| | | | |
| skipping to change at line 429 | | skipping to change at line 471 | |
| { | | { | |
| need_stat(TERMFREQ); | | need_stat(TERMFREQ); | |
| need_stat(WDF); | | need_stat(WDF); | |
| need_stat(WDF_MAX); | | need_stat(WDF_MAX); | |
| need_stat(COLLECTION_SIZE); | | need_stat(COLLECTION_SIZE); | |
| } | | } | |
| | | | |
| std::string name() const; | | std::string name() const; | |
| | | | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| TfIdfWeight * unserialise(const std::string & s) const; | | TfIdfWeight * unserialise(const std::string & serialised) const; | |
| | | | |
| double get_sumpart(Xapian::termcount wdf, | | double get_sumpart(Xapian::termcount wdf, | |
|
| Xapian::termcount doclen) const; | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterm) const; | |
| double get_maxpart() const; | | double get_maxpart() const; | |
| | | | |
|
| double get_sumextra(Xapian::termcount doclen) const; | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| double get_maxextra() const; | | double get_maxextra() const; | |
| }; | | }; | |
| | | | |
| /// Xapian::Weight subclass implementing the BM25 probabilistic formula. | | /// Xapian::Weight subclass implementing the BM25 probabilistic formula. | |
| class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight { | | class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight { | |
| /// Factor to multiply the document length by. | | /// Factor to multiply the document length by. | |
| mutable Xapian::doclength len_factor; | | mutable Xapian::doclength len_factor; | |
| | | | |
| /// Factor combining all the document independent factors. | | /// Factor combining all the document independent factors. | |
| mutable double termweight; | | mutable double termweight; | |
| | | | |
| skipping to change at line 531 | | skipping to change at line 575 | |
| need_stat(WDF_MAX); | | need_stat(WDF_MAX); | |
| need_stat(DOC_LENGTH_MIN); | | need_stat(DOC_LENGTH_MIN); | |
| need_stat(AVERAGE_LENGTH); | | need_stat(AVERAGE_LENGTH); | |
| need_stat(DOC_LENGTH); | | need_stat(DOC_LENGTH); | |
| need_stat(WQF); | | need_stat(WQF); | |
| } | | } | |
| | | | |
| std::string name() const; | | std::string name() const; | |
| | | | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| BM25Weight * unserialise(const std::string & s) const; | | BM25Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| double get_sumpart(Xapian::termcount wdf, | | double get_sumpart(Xapian::termcount wdf, | |
|
| Xapian::termcount doclen) const; | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterm) const; | |
| double get_maxpart() const; | | double get_maxpart() const; | |
| | | | |
|
| double get_sumextra(Xapian::termcount doclen) const; | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| double get_maxextra() const; | | double get_maxextra() const; | |
| }; | | }; | |
| | | | |
| /** Xapian::Weight subclass implementing the traditional probabilistic form
ula. | | /** Xapian::Weight subclass implementing the traditional probabilistic form
ula. | |
| * | | * | |
| * This class implements the "traditional" Probabilistic Weighting scheme,
as | | * This class implements the "traditional" Probabilistic Weighting scheme,
as | |
| * described by the early papers on Probabilistic Retrieval. BM25 generall
y | | * described by the early papers on Probabilistic Retrieval. BM25 generall
y | |
| * gives better results. | | * gives better results. | |
| * | | * | |
| * TradWeight(k) is equivalent to BM25Weight(k, 0, 0, 1, 0), except that | | * TradWeight(k) is equivalent to BM25Weight(k, 0, 0, 1, 0), except that | |
| | | | |
| skipping to change at line 590 | | skipping to change at line 636 | |
| need_stat(TERMFREQ); | | need_stat(TERMFREQ); | |
| need_stat(RELTERMFREQ); | | need_stat(RELTERMFREQ); | |
| need_stat(DOC_LENGTH_MIN); | | need_stat(DOC_LENGTH_MIN); | |
| need_stat(WDF); | | need_stat(WDF); | |
| need_stat(WDF_MAX); | | need_stat(WDF_MAX); | |
| } | | } | |
| | | | |
| std::string name() const; | | std::string name() const; | |
| | | | |
| std::string serialise() const; | | std::string serialise() const; | |
|
| TradWeight * unserialise(const std::string & s) const; | | TradWeight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqueterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the InL2 weighting scheme. | |
| | | * | |
| | | * InL2 is a representative scheme of the Divergence from Randomness Frame | |
| | | work | |
| | | * by Gianni Amati. | |
| | | * | |
| | | * This weighting scheme is useful for tasks that require early precision. | |
| | | * | |
| | | * It uses the Inverse document frequency model (In), the Laplace method t | |
| | | o | |
| | | * find the aftereffect of sampling (L) and the second wdf normalization | |
| | | * proposed by Amati to normalize the wdf in the document to the length of | |
| | | the | |
| | | * document (H2). | |
| | | * | |
| | | * For more information about the DFR Framework and the InL2 scheme, pleas | |
| | | e | |
| | | * refer to: Gianni Amati and Cornelis Joost Van Rijsbergen Probabilistic | |
| | | * models of information retrieval based on measuring the divergence from | |
| | | * randomness ACM Transactions on Information Systems (TOIS) 20, (4), 2002 | |
| | | , | |
| | | * pp. 357-389. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT InL2Weight : public Weight { | |
| | | /// The wdf normalization parameter in the formula. | |
| | | double param_c; | |
| | | | |
| | | /// The upper bound on the weight a term can give to a document. | |
| | | double upper_bound; | |
| | | | |
| | | /// The constant values which are used on every call to get_sumpart(). | |
| | | double wqf_product_idf; | |
| | | double c_product_avlen; | |
| | | | |
| | | InL2Weight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct an InL2Weight. | |
| | | * | |
| | | * @param c A non-negative and non zero parameter controlling the ext | |
| | | ent | |
| | | * of the normalization of the wdf to the document length. Th | |
| | | e | |
| | | * default value of 1 is suitable for longer queries but it m | |
| | | ay | |
| | | * need to be changed for shorter queries. For more informati | |
| | | on, | |
| | | * please refer to Gianni Amati's PHD thesis. | |
| | | */ | |
| | | explicit InL2Weight(double c); | |
| | | | |
| | | InL2Weight() | |
| | | : param_c(1.0) | |
| | | { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(WDF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(WQF); | |
| | | need_stat(TERMFREQ); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | InL2Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the IfB2 weighting scheme. | |
| | | * | |
| | | * IfB2 is a representative scheme of the Divergence from Randomness Frame | |
| | | work | |
| | | * by Gianni Amati. | |
| | | * | |
| | | * It uses the Inverse term frequency model (If), the Bernoulli method to | |
| | | find | |
| | | * the aftereffect of sampling (B) and the second wdf normalization propos | |
| | | ed | |
| | | * by Amati to normalize the wdf in the document to the length of the docu | |
| | | ment | |
| | | * (H2). | |
| | | * | |
| | | * For more information about the DFR Framework and the IfB2 scheme, pleas | |
| | | e | |
| | | * refer to: Gianni Amati and Cornelis Joost Van Rijsbergen Probabilistic | |
| | | * models of information retrieval based on measuring the divergence from | |
| | | * randomness ACM Transactions on Information Systems (TOIS) 20, (4), 2002 | |
| | | , | |
| | | * pp. 357-389. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT IfB2Weight : public Weight { | |
| | | /// The wdf normalization parameter in the formula. | |
| | | double param_c; | |
| | | | |
| | | /// The upper bound on the weight. | |
| | | double upper_bound; | |
| | | | |
| | | /// The constant values which are used for calculations in get_sumpart( | |
| | | ). | |
| | | double wqf_product_idf; | |
| | | double c_product_avlen; | |
| | | double B_constant; | |
| | | | |
| | | IfB2Weight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct an IfB2Weight. | |
| | | * | |
| | | * @param c A non-negative and non zero parameter controlling the ext | |
| | | ent | |
| | | * of the normalization of the wdf to the document length. Th | |
| | | e | |
| | | * default value of 1 is suitable for longer queries but it m | |
| | | ay | |
| | | * need to be changed for shorter queries. For more informati | |
| | | on, | |
| | | * please refer to Gianni Amati's PHD thesis titled | |
| | | * Probabilistic Models for Information Retrieval based on | |
| | | * Divergence from Randomness. | |
| | | */ | |
| | | explicit IfB2Weight(double c); | |
| | | | |
| | | IfB2Weight( ) : param_c(1.0) { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | need_stat(WDF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(WQF); | |
| | | need_stat(TERMFREQ); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | IfB2Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterm) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the IneB2 weighting scheme. | |
| | | * | |
| | | * IneB2 is a representative scheme of the Divergence from Randomness | |
| | | * Framework by Gianni Amati. | |
| | | * | |
| | | * It uses the Inverse expected document frequency model (Ine), the Bernou | |
| | | lli | |
| | | * method to find the aftereffect of sampling (B) and the second wdf | |
| | | * normalization proposed by Amati to normalize the wdf in the document to | |
| | | the | |
| | | * length of the document (H2). | |
| | | * | |
| | | * For more information about the DFR Framework and the IneB2 scheme, plea | |
| | | se | |
| | | * refer to: Gianni Amati and Cornelis Joost Van Rijsbergen Probabilistic | |
| | | * models of information retrieval based on measuring the divergence from | |
| | | * randomness ACM Transactions on Information Systems (TOIS) 20, (4), 2002 | |
| | | , | |
| | | * pp. 357-389. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT IneB2Weight : public Weight { | |
| | | /// The wdf normalization parameter in the formula. | |
| | | double param_c; | |
| | | | |
| | | /// The upper bound of the weight. | |
| | | double upper_bound; | |
| | | | |
| | | /// Constant values used in get_sumpart(). | |
| | | double wqf_product_idf; | |
| | | double c_product_avlen; | |
| | | double B_constant; | |
| | | | |
| | | IneB2Weight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct an IneB2Weight. | |
| | | * | |
| | | * @param c A non-negative and non zero parameter controlling the ext | |
| | | ent | |
| | | * of the normalization of the wdf to the document length. Th | |
| | | e | |
| | | * default value of 1 is suitable for longer queries but it m | |
| | | ay | |
| | | * need to be changed for shorter queries. For more informati | |
| | | on, | |
| | | * please refer to Gianni Amati's PHD thesis. | |
| | | */ | |
| | | explicit IneB2Weight(double c); | |
| | | | |
| | | IneB2Weight( ) : param_c(1.0) { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(WDF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(WQF); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | need_stat(TERMFREQ); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | IneB2Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the BB2 weighting scheme. | |
| | | * | |
| | | * BB2 is a representative scheme of the Divergence from Randomness Framew | |
| | | ork | |
| | | * by Gianni Amati. | |
| | | * | |
| | | * It uses the Bose-Einstein probabilistic distribution (B) along with | |
| | | * Stirling's power approximation, the Bernoulli method to find the | |
| | | * aftereffect of sampling (B) and the second wdf normalization proposed b | |
| | | y | |
| | | * Amati to normalize the wdf in the document to the length of the documen | |
| | | t | |
| | | * (H2). | |
| | | * | |
| | | * For more information about the DFR Framework and the BB2 scheme, please | |
| | | * refer to : Gianni Amati and Cornelis Joost Van Rijsbergen Probabilistic | |
| | | * models of information retrieval based on measuring the divergence from | |
| | | * randomness ACM Transactions on Information Systems (TOIS) 20, (4), 2002 | |
| | | , | |
| | | * pp. 357-389. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT BB2Weight : public Weight { | |
| | | /// The wdf normalization parameter in the formula. | |
| | | double param_c; | |
| | | | |
| | | /// The upper bound on the weight. | |
| | | double upper_bound; | |
| | | | |
| | | /// The constant values to be used in get_sumpart(). | |
| | | double c_product_avlen; | |
| | | double B_constant; | |
| | | double wt; | |
| | | double stirling_constant_1; | |
| | | double stirling_constant_2; | |
| | | | |
| | | BB2Weight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct a BB2Weight. | |
| | | * | |
| | | * @param c A non-negative and non zero parameter controlling the ext | |
| | | ent | |
| | | * of the normalization of the wdf to the document length. A | |
| | | * default value of 1 is suitable for longer queries but it m | |
| | | ay | |
| | | * need to be changed for shorter queries. For more informati | |
| | | on, | |
| | | * please refer to Gianni Amati's PHD thesis titled | |
| | | * Probabilistic Models for Information Retrieval based on | |
| | | * Divergence from Randomness. | |
| | | */ | |
| | | explicit BB2Weight(double c); | |
| | | | |
| | | BB2Weight( ) : param_c(1.0) { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | need_stat(WDF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(WQF); | |
| | | need_stat(TERMFREQ); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | BB2Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the DLH weighting scheme, which is a representati | |
| | | ve | |
| | | * scheme of the Divergence from Randomness Framework by Gianni Amati. | |
| | | * | |
| | | * This is a parameter free weighting scheme and it should be used with qu | |
| | | ery | |
| | | * expansion to obtain better results. It uses the HyperGeometric Probabil | |
| | | istic | |
| | | * model and Laplace's normalization to calculate the risk gain. | |
| | | * | |
| | | * For more information about the DFR Framework and the DLH scheme, please | |
| | | * refer to : | |
| | | * a.) Gianni Amati and Cornelis Joost Van Rijsbergen Probabilistic | |
| | | * models of information retrieval based on measuring the divergence from | |
| | | * randomness ACM Transactions on Information Systems (TOIS) 20, (4), 2002 | |
| | | , pp. | |
| | | * 357-389. | |
| | | * b.) FUB, IASI-CNR and University of Tor Vergata at TREC 2007 Blog Track | |
| | | . | |
| | | * G. Amati and E. Ambrosi and M. Bianchi and C. Gaibisso and G. Gambosi. | |
| | | * Proceedings of the 16th Text REtrieval Conference (TREC-2007), 2008. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT DLHWeight : public Weight { | |
| | | /// The lower bound on the weight. | |
| | | double lower_bound; | |
| | | | |
| | | /// The upper bound on the weight. | |
| | | double upper_bound; | |
| | | | |
| | | /// The constant value to be used in get_sumpart(). | |
| | | double log_constant; | |
| | | double wqf_product_factor; | |
| | | | |
| | | DLHWeight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | DLHWeight() { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | need_stat(WDF); | |
| | | need_stat(WQF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | DLHWeight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the PL2 weighting scheme. | |
| | | * | |
| | | * PL2 is a representative scheme of the Divergence from Randomness Framew | |
| | | ork | |
| | | * by Gianni Amati. | |
| | | * | |
| | | * This weighting scheme is useful for tasks that require early precision. | |
| | | * | |
| | | * It uses the Poisson approximation of the Binomial Probabilistic distrib | |
| | | ution | |
| | | * (P) along with Stirling's approximation for the factorial value, the La | |
| | | place | |
| | | * method to find the aftereffect of sampling (L) and the second wdf | |
| | | * normalization proposed by Amati to normalize the wdf in the document to | |
| | | the | |
| | | * length of the document (H2). | |
| | | * | |
| | | * For more information about the DFR Framework and the PL2 scheme, please | |
| | | * refer to : Gianni Amati and Cornelis Joost Van Rijsbergen Probabilistic | |
| | | models | |
| | | * of information retrieval based on measuring the divergence from randomn | |
| | | ess | |
| | | * ACM Transactions on Information Systems (TOIS) 20, (4), 2002, pp. 357-3 | |
| | | 89. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT PL2Weight : public Weight { | |
| | | /// The wdf normalization parameter in the formula. | |
| | | double param_c; | |
| | | | |
| | | /// The lower bound of the weight. | |
| | | double lower_bound; | |
| | | | |
| | | /// The upper bound on the weight. | |
| | | double upper_bound; | |
| | | | |
| | | /// Constants for a given term in a given query. | |
| | | double P1, P2; | |
| | | | |
| | | /// Set by init() to (param_c * get_average_length()) | |
| | | double cl; | |
| | | | |
| | | PL2Weight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct a PL2Weight. | |
| | | * | |
| | | * @param c A non-negative and non zero parameter controlling the ext | |
| | | ent | |
| | | * of the normalization of the wdf to the document length. Th | |
| | | e | |
| | | * default value of 1 is suitable for longer queries but it m | |
| | | ay | |
| | | * need to be changed for shorter queries. For more informati | |
| | | on, | |
| | | * please refer to Gianni Amati's PHD thesis titled | |
| | | * Probabilistic Models for Information Retrieval based on | |
| | | * Divergence from Randomness. | |
| | | */ | |
| | | explicit PL2Weight(double c); | |
| | | | |
| | | PL2Weight( ) : param_c(1.0) { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | need_stat(WDF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(WQF); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | PL2Weight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** This class implements the DPH weighting scheme. | |
| | | * | |
| | | * DPH is a representative scheme of the Divergence from Randomness Framew | |
| | | ork | |
| | | * by Gianni Amati. | |
| | | * | |
| | | * This is a parameter free weighting scheme and it should be used with qu | |
| | | ery | |
| | | * expansion to obtain better results. It uses the HyperGeometric Probabil | |
| | | istic | |
| | | * model and Popper's normalization to calculate the risk gain. | |
| | | * | |
| | | * For more information about the DFR Framework and the DPH scheme, please | |
| | | * refer to : | |
| | | * a.) Gianni Amati and Cornelis Joost Van Rijsbergen | |
| | | * Probabilistic models of information retrieval based on measuring the | |
| | | * divergence from randomness ACM Transactions on Information Systems (TOI | |
| | | S) 20, | |
| | | * (4), 2002, pp. 357-389. | |
| | | * b.) FUB, IASI-CNR and University of Tor Vergata at TREC 2007 Blog Track | |
| | | . | |
| | | * G. Amati and E. Ambrosi and M. Bianchi and C. Gaibisso and G. Gambosi. | |
| | | * Proceedings of the 16th Text Retrieval Conference (TREC-2007), 2008. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT DPHWeight : public Weight { | |
| | | /// The upper bound on the weight. | |
| | | double upper_bound; | |
| | | | |
| | | /// The lower bound on the weight. | |
| | | double lower_bound; | |
| | | | |
| | | /// The constant value used in get_sumpart() . | |
| | | double log_constant; | |
| | | double wqf_product_factor; | |
| | | | |
| | | DPHWeight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct a DPHWeight. */ | |
| | | DPHWeight() { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | need_stat(WDF); | |
| | | need_stat(WQF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(DOC_LENGTH_MIN); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | DPHWeight * unserialise(const std::string & serialised) const; | |
| | | | |
| | | double get_sumpart(Xapian::termcount wdf, | |
| | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxpart() const; | |
| | | | |
| | | double get_sumextra(Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterms) const; | |
| | | double get_maxextra() const; | |
| | | }; | |
| | | | |
| | | /** Xapian::Weight subclass implementing the Language Model formula. | |
| | | * | |
| | | * This class implements the "Language Model" Weighting scheme, as | |
| | | * described by the early papers on LM by Bruce Croft. | |
| | | * | |
| | | * LM works by comparing the query to a Language Model of the document. | |
| | | * The language model itself is parameter-free, though LMWeight takes | |
| | | * parameters which specify the smoothing used. | |
| | | */ | |
| | | class XAPIAN_VISIBILITY_DEFAULT LMWeight : public Weight { | |
| | | | |
| | | /** The type of smoothing to use. */ | |
| | | type_smoothing select_smoothing; | |
| | | | |
| | | // Parameters for handling negative value of log, and for smoothing. | |
| | | double param_log, param_smoothing1, param_smoothing2; | |
| | | | |
| | | //Collection weight. | |
| | | double weight_collection; | |
| | | | |
| | | LMWeight * clone() const; | |
| | | | |
| | | void init(double factor); | |
| | | | |
| | | public: | |
| | | /** Construct a LMWeight. | |
| | | * | |
| | | * @param param_log_ A non-negative parameter controlling how muc | |
| | | h | |
| | | * to clamp negative values returned by the log | |
| | | . | |
| | | * The log is calculated by multiplying the | |
| | | * actual weight by param_log. If param_log is | |
| | | * 0.0, then the document length upper bound wi | |
| | | ll | |
| | | * be used (default: document length upper boun | |
| | | d) | |
| | | * | |
| | | * @param select_smoothing_ A parameter of type enum | |
| | | * type_smoothing. This parameter | |
| | | * controls which smoothing type to use | |
| | | . | |
| | | * (default: TWO_STAGE_SMOOTHING) | |
| | | * | |
| | | * @param param_smoothing1_ A non-negative parameter for smoothi | |
| | | ng | |
| | | * whose meaning depends on | |
| | | * select_smoothing_. In | |
| | | * JELINEK_MERCER_SMOOTHING, it plays t | |
| | | he | |
| | | * role of estimation and in | |
| | | * DIRICHLET_SMOOTHING the role of quer | |
| | | y | |
| | | * modelling. (default JELINEK_MERCER, | |
| | | * ABSOLUTE, TWOSTAGE(0.7), | |
| | | * DIRCHLET(2000)) | |
| | | * | |
| | | * @param param_smoothing2_ A non-negative parameter which is us | |
| | | ed | |
| | | * only with TWO_STAGE_SMOOTHING as | |
| | | * parameter for Dirichlet's smoothing. | |
| | | * (default: 2000) | |
| | | */ | |
| | | // Unigram LM Constructor to specifically mention all parameters for ha | |
| | | ndling negative log value and smoothing. | |
| | | explicit LMWeight(double param_log_ = 0.0, | |
| | | type_smoothing select_smoothing_ = TWO_STAGE_SMOOTHING | |
| | | , | |
| | | double param_smoothing1_ = 0.7, | |
| | | double param_smoothing2_ = 2000.0) | |
| | | : select_smoothing(select_smoothing_), param_log(param_log_), param_ | |
| | | smoothing1(param_smoothing1_), | |
| | | param_smoothing2(param_smoothing2_) | |
| | | { | |
| | | need_stat(AVERAGE_LENGTH); | |
| | | need_stat(DOC_LENGTH); | |
| | | need_stat(COLLECTION_SIZE); | |
| | | need_stat(RSET_SIZE); | |
| | | need_stat(TERMFREQ); | |
| | | need_stat(RELTERMFREQ); | |
| | | need_stat(DOC_LENGTH_MAX); | |
| | | need_stat(WDF); | |
| | | need_stat(WDF_MAX); | |
| | | need_stat(COLLECTION_FREQ); | |
| | | if (select_smoothing == ABSOLUTE_DISCOUNT_SMOOTHING) | |
| | | need_stat(UNIQUE_TERMS); | |
| | | } | |
| | | | |
| | | std::string name() const; | |
| | | | |
| | | std::string serialise() const; | |
| | | LMWeight * unserialise(const std::string & s) const; | |
| | | | |
| double get_sumpart(Xapian::termcount wdf, | | double get_sumpart(Xapian::termcount wdf, | |
|
| Xapian::termcount doclen) const; | | Xapian::termcount doclen, | |
| | | Xapian::termcount uniqterm) const; | |
| double get_maxpart() const; | | double get_maxpart() const; | |
| | | | |
|
| double get_sumextra(Xapian::termcount doclen) const; | | double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const; | |
| double get_maxextra() const; | | double get_maxextra() const; | |
| }; | | }; | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_WEIGHT_H | | #endif // XAPIAN_INCLUDED_WEIGHT_H | |
| | | | |
End of changes. 27 change blocks. |
| 18 lines changed or deleted | | 722 lines changed or added | |
|