| derefwrapper.h | | derefwrapper.h | |
| /** @file derefwrapper.h | | /** @file derefwrapper.h | |
|
| * @brief Class for wrapping std::string returned by an input_iterator. | | * @brief Class for wrapping type returned by an input_iterator. | |
| */ | | */ | |
|
| /* Copyright (C) 2004,2008 Olly Betts | | /* Copyright (C) 2004,2008,2009 Olly Betts | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_DEREFWRAPPER_H | | #ifndef XAPIAN_INCLUDED_DEREFWRAPPER_H | |
| #define XAPIAN_INCLUDED_DEREFWRAPPER_H | | #define XAPIAN_INCLUDED_DEREFWRAPPER_H | |
| | | | |
|
| #include <string> | | | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
|
| /** @internal Class which returns a std::string when dereferenced with *. | | /** @private @internal Class which returns a value when dereferenced with | |
| | | * operator*. | |
| * | | * | |
| * We need this wrapper class to implement input_iterator semantics for th
e | | * We need this wrapper class to implement input_iterator semantics for th
e | |
|
| * postfix operator++ methods of TermIterator and ValueIterator. | | * postfix operator++ methods of some of our iterator classes. | |
| */ | | */ | |
|
| class DerefStringWrapper_ { | | template<typename T> | |
| std::string s; | | class DerefWrapper_ { | |
| | | /// Don't allow assignment. | |
| | | void operator=(const DerefWrapper_ &); | |
| | | | |
| | | /// The value. | |
| | | T res; | |
| | | | |
| public: | | public: | |
|
| explicit DerefStringWrapper_(const std::string & s_) : s(s_) { } | | explicit DerefWrapper_(const T &res_) : res(res_) { } | |
| const std::string & operator*() const { return s; } | | const T & operator*() const { return res; } | |
| }; | | }; | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_DEREFWRAPPER_H | | #endif // XAPIAN_INCLUDED_DEREFWRAPPER_H | |
| | | | |
End of changes. 7 change blocks. |
| 10 lines changed or deleted | | 14 lines changed or added | |
|
| document.h | | document.h | |
| /** \file document.h | | /** \file document.h | |
| * \brief API for working with documents | | * \brief API for working with documents | |
| */ | | */ | |
| /* Copyright 1999,2000,2001 BrightStation PLC | | /* Copyright 1999,2000,2001 BrightStation PLC | |
| * Copyright 2002 Ananova Ltd | | * Copyright 2002 Ananova Ltd | |
|
| * Copyright 2002,2003,2004,2006,2007,2009 Olly Betts | | * Copyright 2002,2003,2004,2006,2007,2009,2010 Olly Betts | |
| * Copyright 2009 Lemur Consulting Ltd | | * Copyright 2009 Lemur Consulting Ltd | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 38 | | skipping to change at line 38 | |
| #include <string> | | #include <string> | |
| | | | |
| #include <xapian/base.h> | | #include <xapian/base.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/termiterator.h> | | #include <xapian/termiterator.h> | |
| #include <xapian/valueiterator.h> | | #include <xapian/valueiterator.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
|
| /// A document in the database - holds data, values, terms, and postings | | /** A handle representing a document in a Xapian database. | |
| | | * | |
| | | * The Document class fetches information from the database lazily. Usual | |
| | | ly | |
| | | * this behaviour isn't visible to users (except for the speed benefits), | |
| | | but | |
| | | * if the document in the database is modified or deleted, then preexistin | |
| | | g | |
| | | * Document objects may return the old or new versions of data (or throw | |
| | | * Xapian::DocNotFoundError in the case of deletion). | |
| | | * | |
| | | * Since Database objects work on a snapshot of the database's state, the | |
| | | * situation above can only happen with a WritableDatabase object, or if | |
| | | * you call Database::reopen() on a Database object. | |
| | | * | |
| | | * We recommend you avoid designs where this behaviour is an issue, but if | |
| | | * you need a way to make a non-lazy version of a Document object, you can | |
| | | do | |
| | | * this like so: | |
| | | * | |
| | | * doc = Xapian::Document::unserialise(doc.serialise()); | |
| | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT Document { | | class XAPIAN_VISIBILITY_DEFAULT Document { | |
| public: | | public: | |
| class Internal; | | class Internal; | |
| /// @private @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::RefCntPtr<Internal> internal; | | Xapian::Internal::RefCntPtr<Internal> internal; | |
| | | | |
| /** @private @internal Constructor is only used by internal classes. | | /** @private @internal Constructor is only used by internal classes. | |
| * | | * | |
| * @param internal_ pointer to internal opaque class | | * @param internal_ pointer to internal opaque class | |
| */ | | */ | |
| | | | |
| skipping to change at line 125 | | skipping to change at line 142 | |
| Xapian::termpos tpos, | | Xapian::termpos tpos, | |
| Xapian::termcount wdfinc = 1); | | Xapian::termcount wdfinc = 1); | |
| | | | |
| /** Add a term to the document, without positional information. | | /** Add a term to the document, without positional information. | |
| * | | * | |
| * Any existing positional information for the term will be left | | * Any existing positional information for the term will be left | |
| * unmodified. | | * unmodified. | |
| * | | * | |
| * @param tname The name of the term. | | * @param tname The name of the term. | |
| * @param wdfinc The increment that will be applied to the wdf | | * @param wdfinc The increment that will be applied to the wdf | |
|
| * for this term. | | * for this term (default: 1). | |
| */ | | */ | |
| void add_term(const std::string & tname, Xapian::termcount wdfinc =
1); | | void add_term(const std::string & tname, Xapian::termcount wdfinc =
1); | |
| | | | |
|
| | | /** Add a boolean filter term to the document. | |
| | | * | |
| | | * This method adds @a term to the document with wdf of 0 - | |
| | | * this is generally what you want for a term used for boolean | |
| | | * filtering as the wdf of such terms is ignored, and it doesn't | |
| | | * make sense for them to contribute to the document's length. | |
| | | * | |
| | | * If the specified term already indexes this document, this method | |
| | | * has no effect. | |
| | | * | |
| | | * It is exactly the same as add_term(term, 0). | |
| | | * | |
| | | * This method was added in Xapian 1.0.18. | |
| | | * | |
| | | * @param term The term to add. | |
| | | */ | |
| | | void add_boolean_term(const std::string & term) { add_term(term, 0); | |
| | | } | |
| | | | |
| /** Remove a posting of a term from the document. | | /** Remove a posting of a term from the document. | |
| * | | * | |
| * Note that the term will still index the document even if all | | * Note that the term will still index the document even if all | |
| * occurrences are removed. To remove a term from a document | | * occurrences are removed. To remove a term from a document | |
| * completely, use remove_term(). | | * completely, use remove_term(). | |
| * | | * | |
| * @param tname The name of the term. | | * @param tname The name of the term. | |
| * @param tpos The position of the term. | | * @param tpos The position of the term. | |
| * @param wdfdec The decrement that will be applied to the wdf | | * @param wdfdec The decrement that will be applied to the wdf | |
| * when removing this posting. The wdf will not g
o | | * when removing this posting. The wdf will not g
o | |
| | | | |
End of changes. 4 change blocks. |
| 3 lines changed or deleted | | 43 lines changed or added | |
|
| matchspy.h | | matchspy.h | |
| /** @file matchspy.h | | /** @file matchspy.h | |
| * @brief MatchSpy implementation. | | * @brief MatchSpy implementation. | |
| */ | | */ | |
| /* Copyright (C) 2007,2008,2009 Olly Betts | | /* Copyright (C) 2007,2008,2009 Olly Betts | |
| * Copyright (C) 2007,2009 Lemur Consulting Ltd | | * Copyright (C) 2007,2009 Lemur Consulting Ltd | |
|
| | | * Copyright (C) 2010 Richard Boulton | |
| * | | * | |
| * This program is free software; you can redistribute it and/or modify | | * This program is free software; you can redistribute it and/or modify | |
| * it under the terms of the GNU General Public License as published by | | * it under the terms of the GNU General Public License as published by | |
| * the Free Software Foundation; either version 2 of the License, or | | * the Free Software Foundation; either version 2 of the License, or | |
| * (at your option) any later version. | | * (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| * | | * | |
| * You should have received a copy of the GNU General Public License | | * You should have received a copy of the GNU General Public License | |
| * along with this program; if not, write to the Free Software | | * along with this program; if not, write to the Free Software | |
| * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 US
A | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_MATCHSPY_H | | #ifndef XAPIAN_INCLUDED_MATCHSPY_H | |
| #define XAPIAN_INCLUDED_MATCHSPY_H | | #define XAPIAN_INCLUDED_MATCHSPY_H | |
| | | | |
|
| | | #include <xapian/base.h> | |
| #include <xapian/enquire.h> | | #include <xapian/enquire.h> | |
|
| | | #include <xapian/termiterator.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| #include <string> | | #include <string> | |
| #include <map> | | #include <map> | |
| #include <set> | | #include <set> | |
| #include <string> | | #include <string> | |
| #include <vector> | | #include <vector> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
| | | | |
| skipping to change at line 159 | | skipping to change at line 162 | |
| /** Return a string describing this object. | | /** Return a string describing this object. | |
| * | | * | |
| * This default implementation returns a generic answer, to avoid forc
ing | | * This default implementation returns a generic answer, to avoid forc
ing | |
| * those deriving their own MatchSpy subclasses from having to impleme
nt | | * those deriving their own MatchSpy subclasses from having to impleme
nt | |
| * this (they may not care what get_description() gives for their | | * this (they may not care what get_description() gives for their | |
| * subclass). | | * subclass). | |
| */ | | */ | |
| virtual std::string get_description() const; | | virtual std::string get_description() const; | |
| }; | | }; | |
| | | | |
|
| /** A string with a corresponding frequency. | | | |
| */ | | | |
| class XAPIAN_VISIBILITY_DEFAULT StringAndFrequency { | | | |
| std::string str; | | | |
| Xapian::doccount frequency; | | | |
| public: | | | |
| /// Construct a StringAndFrequency object. | | | |
| StringAndFrequency(std::string str_, Xapian::doccount frequency_) | | | |
| : str(str_), frequency(frequency_) {} | | | |
| | | | |
| /// Return the string. | | | |
| std::string get_string() const { return str; } | | | |
| | | | |
| /// Return the frequency. | | | |
| Xapian::doccount get_frequency() const { return frequency; } | | | |
| }; | | | |
| | | | |
| /** Class for counting the frequencies of values in the matching documents. | | /** Class for counting the frequencies of values in the matching documents. | |
| * | | * | |
| * Warning: this API is currently experimental, and is liable to change | | * Warning: this API is currently experimental, and is liable to change | |
| * between releases without warning. | | * between releases without warning. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT ValueCountMatchSpy : public MatchSpy { | | class XAPIAN_VISIBILITY_DEFAULT ValueCountMatchSpy : public MatchSpy { | |
|
| protected: | | public: | |
| /// The slot to count. | | struct Internal; | |
| Xapian::valueno slot; | | | |
| | | | |
|
| /// Total number of documents seen by the match spy. | | #ifndef SWIG // SWIG doesn't need to know about the internal class | |
| Xapian::doccount total; | | struct XAPIAN_VISIBILITY_DEFAULT Internal | |
| | | : public Xapian::Internal::RefCntBase | |
| | | { | |
| | | /// The slot to count. | |
| | | Xapian::valueno slot; | |
| | | | |
|
| /// The values seen so far, together with their frequency. | | /// Total number of documents seen by the match spy. | |
| std::map<std::string, Xapian::doccount> values; | | Xapian::doccount total; | |
| | | | |
| | | /// The values seen so far, together with their frequency. | |
| | | std::map<std::string, Xapian::doccount> values; | |
| | | | |
| | | Internal() : slot(Xapian::BAD_VALUENO), total(0) {} | |
| | | Internal(Xapian::valueno slot_) : slot(slot_), total(0) {} | |
| | | }; | |
| | | #endif | |
| | | | |
| | | protected: | |
| | | Xapian::Internal::RefCntPtr<Internal> internal; | |
| | | | |
| public: | | public: | |
| /// Construct an empty ValueCountMatchSpy. | | /// Construct an empty ValueCountMatchSpy. | |
|
| ValueCountMatchSpy() : slot(Xapian::BAD_VALUENO), total(0) {} | | ValueCountMatchSpy() : internal() {} | |
| | | | |
|
| /** Construct a MatchSpy which counts the values in a particular slot. | | /// Construct a MatchSpy which counts the values in a particular slot. | |
| * | | | |
| * Further slots can be added by calling @a add_slot(). | | | |
| */ | | | |
| ValueCountMatchSpy(Xapian::valueno slot_) | | ValueCountMatchSpy(Xapian::valueno slot_) | |
|
| : slot(slot_), total(0) { | | : internal(new Internal(slot_)) {} | |
| } | | | |
| | | | |
| /// Return the values seen in the slot. | | | |
| const std::map<std::string, Xapian::doccount> & get_values() const { | | | |
| return values; | | | |
| } | | | |
| | | | |
| /** Return the total number of documents tallied. */ | | /** Return the total number of documents tallied. */ | |
| size_t get_total() const { | | size_t get_total() const { | |
|
| return total; | | return internal->total; | |
| } | | } | |
| | | | |
|
| /** Get the most frequent values in the slot. | | /** Get an iterator over the values seen in the slot. | |
| * | | * | |
|
| * @param result A vector which will be filled with the most frequent | | * Items will be returned in ascending alphabetical order. | |
| * values, in descending order of frequency. Values wit | | * | |
| h | | * During the iteration, the frequency of the current value can be | |
| * the same frequency will be sorted in ascending | | * obtained with the get_termfreq() method on the iterator. | |
| * alphabetical order. | | */ | |
| | | TermIterator values_begin() const; | |
| | | | |
| | | /** End iterator corresponding to values_begin() */ | |
| | | TermIterator values_end() const { | |
| | | return TermIterator(NULL); | |
| | | } | |
| | | | |
| | | /** Get an iterator over the most frequent values seen in the slot. | |
| | | * | |
| | | * Items will be returned in descending order of frequency. Values wi | |
| | | th | |
| | | * the same frequency will be returned in ascending alphabetical order | |
| | | . | |
| | | * | |
| | | * During the iteration, the frequency of the current value can be | |
| | | * obtained with the get_termfreq() method on the iterator. | |
| * | | * | |
| * @param maxvalues The maximum number of values to return. | | * @param maxvalues The maximum number of values to return. | |
| */ | | */ | |
|
| void get_top_values(std::vector<StringAndFrequency> & result, | | TermIterator top_values_begin(size_t maxvalues) const; | |
| size_t maxvalues) const; | | | |
| | | /** End iterator corresponding to top_values_begin() */ | |
| | | TermIterator top_values_end(size_t) const { | |
| | | return TermIterator(NULL); | |
| | | } | |
| | | | |
| /** Implementation of virtual operator(). | | /** Implementation of virtual operator(). | |
| * | | * | |
| * This implementation tallies values for a matching document. | | * This implementation tallies values for a matching document. | |
| */ | | */ | |
| void operator()(const Xapian::Document &doc, Xapian::weight wt); | | void operator()(const Xapian::Document &doc, Xapian::weight wt); | |
| | | | |
| virtual MatchSpy * clone() const; | | virtual MatchSpy * clone() const; | |
| virtual std::string name() const; | | virtual std::string name() const; | |
| virtual std::string serialise() const; | | virtual std::string serialise() const; | |
| virtual MatchSpy * unserialise(const std::string & s, | | virtual MatchSpy * unserialise(const std::string & s, | |
| const Registry & context) const; | | const Registry & context) const; | |
| virtual std::string serialise_results() const; | | virtual std::string serialise_results() const; | |
| virtual void merge_results(const std::string & s); | | virtual void merge_results(const std::string & s); | |
| virtual std::string get_description() const; | | virtual std::string get_description() const; | |
| }; | | }; | |
| | | | |
|
| /** A numeric range. | | | |
| * | | | |
| * This is used to represent ranges of values returned by the match spies. | | | |
| * | | | |
| * Warning: this API is currently experimental, and is liable to change | | | |
| * between releases without warning. | | | |
| */ | | | |
| class XAPIAN_VISIBILITY_DEFAULT NumericRange { | | | |
| /// The lower value in the range. | | | |
| double lower; | | | |
| | | | |
| /// The upper value in the range. | | | |
| double upper; | | | |
| | | | |
| public: | | | |
| /** Construct a NumericRange object. | | | |
| * | | | |
| * @param lower_ The start of the range. | | | |
| * @param upper_ The end of the range. | | | |
| */ | | | |
| NumericRange(double lower_, double upper_) | | | |
| : lower(lower_), upper(upper_) {} | | | |
| | | | |
| /// Get the start of the range. | | | |
| double get_lower() const { return lower; } | | | |
| | | | |
| /// Get the end of the range. | | | |
| double get_upper() const { return upper; } | | | |
| | | | |
| /// Provide an ordering of NumericRange objects. | | | |
| bool operator<(const NumericRange & other) const { | | | |
| if (lower < other.lower) return true; | | | |
| if (lower > other.lower) return false; | | | |
| return (upper < other.upper); | | | |
| } | | | |
| }; | | | |
| | | | |
| /// A set of numeric ranges, with corresponding frequencies. | | | |
| class XAPIAN_VISIBILITY_DEFAULT NumericRanges { | | | |
| /** The ranges of values, together with the frequency sum of each range | | | |
| . | | | |
| */ | | | |
| std::map<Xapian::NumericRange, Xapian::doccount> ranges; | | | |
| | | | |
| /** @return The total number of values seen. | | | |
| * | | | |
| * This is the sum of the frequencies for all the values supplied. | | | |
| */ | | | |
| doccount values_seen; | | | |
| | | | |
| public: | | | |
| /// Construct an empty NumericRanges object. | | | |
| NumericRanges() : values_seen(0) {} | | | |
| | | | |
| /** Construct a NumericRanges from values and a target number of ranges | | | |
| . | | | |
| * | | | |
| * The values supplied should be sort-encoded numeric values. | | | |
| * | | | |
| * For "continuous" values (such as price, height, weight, etc), there | | | |
| * will usually be too many different values to offer the user, and th | | | |
| e | | | |
| * user won't want to restrict to an exact value anyway. | | | |
| * | | | |
| * This method produces a set of NumericRange objects for a particular | | | |
| * value number. | | | |
| * | | | |
| * @param values The values representing the initial numbers. | | | |
| * @param max_ranges Group into at most this many ranges. | | | |
| */ | | | |
| NumericRanges(const std::map<std::string, Xapian::doccount> & values, | | | |
| size_t max_ranges); | | | |
| | | | |
| /// Get the number of values seen. | | | |
| doccount get_values_seen() const { return values_seen; } | | | |
| | | | |
| /// Get the ranges. | | | |
| const std::map<Xapian::NumericRange, Xapian::doccount> & get_ranges() c | | | |
| onst { return ranges; } | | | |
| }; | | | |
| | | | |
| /** Return a score reflecting how evenly divided a set of values is. | | | |
| * | | | |
| * Warning: this API is currently experimental, and is liable to change | | | |
| * between releases without warning. | | | |
| * | | | |
| * If you don't want to show a poor categorisation, or have multiple | | | |
| * categories and only space in your user interface to show a few, you wan | | | |
| t to | | | |
| * be able to decide how "good" a categorisation is. One definition of "g | | | |
| ood" | | | |
| * is that it offers a fairly even split of the available values, and | | | |
| * (optionally) about a specified number of options. | | | |
| * | | | |
| * @param values The values making up the categorisation, together with th | | | |
| eir | | | |
| * frequencies. | | | |
| * | | | |
| * @param total The total number of documents seen. | | | |
| * | | | |
| * @param desired_no_of_categories The desired number of categories - this | | | |
| is | | | |
| * a floating point value, so you can ask for 5.5 if you'd like "about 5 o | | | |
| r 6 | | | |
| * categories". The default is to desire the number of categories that th | | | |
| ere | | | |
| * actually are, so the score then only reflects how even the split is. | | | |
| * | | | |
| * @return A score for the categorisation for the value - lower is better, | | | |
| * with a perfectly even split across the right number of categories scori | | | |
| ng | | | |
| * 0. | | | |
| */ | | | |
| //@{ | | | |
| double XAPIAN_VISIBILITY_DEFAULT score_evenness( | | | |
| const std::map<std::string, Xapian::doccount> & values, | | | |
| Xapian::doccount total, | | | |
| double desired_no_of_categories = 0.0); | | | |
| double XAPIAN_VISIBILITY_DEFAULT score_evenness( | | | |
| const std::map<Xapian::NumericRange, Xapian::doccount> & values, | | | |
| Xapian::doccount total, | | | |
| double desired_no_of_categories = 0.0); | | | |
| double XAPIAN_VISIBILITY_DEFAULT score_evenness( | | | |
| const ValueCountMatchSpy & spy, | | | |
| double desired_no_of_categories = 0.0); | | | |
| double XAPIAN_VISIBILITY_DEFAULT score_evenness( | | | |
| const NumericRanges & ranges, | | | |
| double desired_no_of_categories = 0.0); | | | |
| //@} | | | |
| | | | |
| } | | } | |
| | | | |
| #endif // XAPIAN_INCLUDED_MATCHSPY_H | | #endif // XAPIAN_INCLUDED_MATCHSPY_H | |
| | | | |
End of changes. 15 change blocks. |
| 175 lines changed or deleted | | 56 lines changed or added | |
|
| positioniterator.h | | positioniterator.h | |
| /** \file positioniterator.h | | /** \file positioniterator.h | |
| * \brief Classes for iterating through position lists | | * \brief Classes for iterating through position lists | |
| */ | | */ | |
| /* Copyright 1999,2000,2001 BrightStation PLC | | /* Copyright 1999,2000,2001 BrightStation PLC | |
| * Copyright 2002 Ananova Ltd | | * Copyright 2002 Ananova Ltd | |
|
| * Copyright 2003,2004,2007 Olly Betts | | * Copyright 2003,2004,2007,2009 Olly Betts | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| | | | |
| skipping to change at line 31 | | skipping to change at line 31 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_POSITIONITERATOR_H | | #ifndef XAPIAN_INCLUDED_POSITIONITERATOR_H | |
| #define XAPIAN_INCLUDED_POSITIONITERATOR_H | | #define XAPIAN_INCLUDED_POSITIONITERATOR_H | |
| | | | |
| #include <iterator> | | #include <iterator> | |
| #include <string> | | #include <string> | |
| | | | |
| #include <xapian/base.h> | | #include <xapian/base.h> | |
|
| | | #include <xapian/derefwrapper.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
| class Database; | | class Database; | |
| class PostingIterator; | | class PostingIterator; | |
| class TermIterator; | | class TermIterator; | |
| | | | |
|
| /** @internal A wrapper class for a termpos which returns the termpos if | | | |
| * dereferenced with *. We need this to implement input_iterator semantic | | | |
| s. | | | |
| */ | | | |
| class TermPosWrapper { | | | |
| private: | | | |
| termpos pos; | | | |
| public: | | | |
| explicit TermPosWrapper(termpos pos_) : pos(pos_) { } | | | |
| termpos operator*() const { return pos; } | | | |
| }; | | | |
| | | | |
| /** An iterator pointing to items in a list of positions. | | /** An iterator pointing to items in a list of positions. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT PositionIterator { | | class XAPIAN_VISIBILITY_DEFAULT PositionIterator { | |
| private: | | private: | |
| // friend classes which need to be able to construct us | | // friend classes which need to be able to construct us | |
| friend class PostingIterator; | | friend class PostingIterator; | |
| friend class TermIterator; | | friend class TermIterator; | |
| friend class Database; | | friend class Database; | |
| | | | |
| public: | | public: | |
| | | | |
| skipping to change at line 90 | | skipping to change at line 80 | |
| | | | |
| /** Assignment is allowed. The internals are reference counted, | | /** Assignment is allowed. The internals are reference counted, | |
| * so assignment is also cheap. | | * so assignment is also cheap. | |
| */ | | */ | |
| void operator=(const PositionIterator &o); | | void operator=(const PositionIterator &o); | |
| | | | |
| Xapian::termpos operator *() const; | | Xapian::termpos operator *() const; | |
| | | | |
| PositionIterator & operator++(); | | PositionIterator & operator++(); | |
| | | | |
|
| TermPosWrapper operator++(int) { | | DerefWrapper_<termpos> operator++(int) { | |
| Xapian::termpos tmp = **this; | | Xapian::termpos tmp = **this; | |
| operator++(); | | operator++(); | |
|
| return TermPosWrapper(tmp); | | return DerefWrapper_<termpos>(tmp); | |
| } | | } | |
| | | | |
| // extra method, not required for an input_iterator | | // extra method, not required for an input_iterator | |
| void skip_to(Xapian::termpos pos); | | void skip_to(Xapian::termpos pos); | |
| | | | |
| /// Return a string describing this object. | | /// Return a string describing this object. | |
| std::string get_description() const; | | std::string get_description() const; | |
| | | | |
| // Allow use as an STL iterator | | // Allow use as an STL iterator | |
| typedef std::input_iterator_tag iterator_category; | | typedef std::input_iterator_tag iterator_category; | |
| | | | |
End of changes. 5 change blocks. |
| 15 lines changed or deleted | | 4 lines changed or added | |
|
| postingiterator.h | | postingiterator.h | |
| | | | |
| skipping to change at line 31 | | skipping to change at line 31 | |
| * USA | | * USA | |
| */ | | */ | |
| | | | |
| #ifndef XAPIAN_INCLUDED_POSTINGITERATOR_H | | #ifndef XAPIAN_INCLUDED_POSTINGITERATOR_H | |
| #define XAPIAN_INCLUDED_POSTINGITERATOR_H | | #define XAPIAN_INCLUDED_POSTINGITERATOR_H | |
| | | | |
| #include <iterator> | | #include <iterator> | |
| #include <string> | | #include <string> | |
| | | | |
| #include <xapian/base.h> | | #include <xapian/base.h> | |
|
| | | #include <xapian/derefwrapper.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/positioniterator.h> | | #include <xapian/positioniterator.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
| class Database; | | class Database; | |
| | | | |
|
| /** @internal A wrapper class for a docid which returns the docid if | | | |
| * dereferenced with *. We need this to implement input_iterator semantic | | | |
| s. | | | |
| */ | | | |
| class DocIDWrapper { | | | |
| private: | | | |
| docid did; | | | |
| public: | | | |
| explicit DocIDWrapper(docid did_) : did(did_) { } | | | |
| docid operator*() const { return did; } | | | |
| }; | | | |
| | | | |
| /** An iterator pointing to items in a list of postings. | | /** An iterator pointing to items in a list of postings. | |
| */ | | */ | |
| class XAPIAN_VISIBILITY_DEFAULT PostingIterator { | | class XAPIAN_VISIBILITY_DEFAULT PostingIterator { | |
| public: | | public: | |
| class Internal; | | class Internal; | |
| /// @private @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::RefCntPtr<Internal> internal; | | Xapian::Internal::RefCntPtr<Internal> internal; | |
| | | | |
| private: | | private: | |
| friend class Database; // So Database can construct us | | friend class Database; // So Database can construct us | |
| | | | |
| skipping to change at line 85 | | skipping to change at line 75 | |
| */ | | */ | |
| PostingIterator(const PostingIterator &other); | | PostingIterator(const PostingIterator &other); | |
| | | | |
| /** Assignment is allowed. The internals are reference counted, | | /** Assignment is allowed. The internals are reference counted, | |
| * so assignment is also cheap. | | * so assignment is also cheap. | |
| */ | | */ | |
| void operator=(const PostingIterator &other); | | void operator=(const PostingIterator &other); | |
| | | | |
| PostingIterator & operator++(); | | PostingIterator & operator++(); | |
| | | | |
|
| DocIDWrapper operator++(int) { | | DerefWrapper_<docid> operator++(int) { | |
| Xapian::docid tmp = **this; | | Xapian::docid tmp = **this; | |
| operator++(); | | operator++(); | |
|
| return DocIDWrapper(tmp); | | return DerefWrapper_<docid>(tmp); | |
| } | | } | |
| | | | |
| /** Skip the iterator to document did, or the first document after d
id | | /** Skip the iterator to document did, or the first document after d
id | |
| * if did isn't in the list of documents being iterated. | | * if did isn't in the list of documents being iterated. | |
| */ | | */ | |
| void skip_to(Xapian::docid did); | | void skip_to(Xapian::docid did); | |
| | | | |
| /// Get the document id at the current position in the postlist. | | /// Get the document id at the current position in the postlist. | |
| Xapian::docid operator *() const; | | Xapian::docid operator *() const; | |
| | | | |
| | | | |
End of changes. 4 change blocks. |
| 14 lines changed or deleted | | 3 lines changed or added | |
|
| termiterator.h | | termiterator.h | |
| /** \file termiterator.h | | /** \file termiterator.h | |
| * \brief Classes for iterating through term lists | | * \brief Classes for iterating through term lists | |
| */ | | */ | |
| /* Copyright 1999,2000,2001 BrightStation PLC | | /* Copyright 1999,2000,2001 BrightStation PLC | |
| * Copyright 2002 Ananova Ltd | | * Copyright 2002 Ananova Ltd | |
|
| * Copyright 2003,2004,2005,2006,2007,2008 Olly Betts | | * Copyright 2003,2004,2005,2006,2007,2008,2009 Olly Betts | |
| * | | * | |
| * This program is free software; you can redistribute it and/or | | * This program is free software; you can redistribute it and/or | |
| * modify it under the terms of the GNU General Public License as | | * modify it under the terms of the GNU General Public License as | |
| * published by the Free Software Foundation; either version 2 of the | | * published by the Free Software Foundation; either version 2 of the | |
| * License, or (at your option) any later version. | | * License, or (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| | | | |
| skipping to change at line 72 | | skipping to change at line 72 | |
| /** Assignment is allowed. The internals are reference counted, | | /** Assignment is allowed. The internals are reference counted, | |
| * so assignment is also cheap. | | * so assignment is also cheap. | |
| */ | | */ | |
| void operator=(const TermIterator &other); | | void operator=(const TermIterator &other); | |
| | | | |
| /// Return the current term. | | /// Return the current term. | |
| std::string operator *() const; | | std::string operator *() const; | |
| | | | |
| TermIterator & operator++(); | | TermIterator & operator++(); | |
| | | | |
|
| DerefStringWrapper_ operator++(int) { | | DerefWrapper_<std::string> operator++(int) { | |
| std::string term(**this); | | const std::string & term(**this); | |
| operator++(); | | operator++(); | |
|
| return DerefStringWrapper_(term); | | return DerefWrapper_<std::string>(term); | |
| } | | } | |
| | | | |
| /** Skip the iterator to term tname, or the first term after tname | | /** Skip the iterator to term tname, or the first term after tname | |
| * if tname isn't in the list of terms being iterated. | | * if tname isn't in the list of terms being iterated. | |
| */ | | */ | |
| void skip_to(const std::string & tname); | | void skip_to(const std::string & tname); | |
| | | | |
| /** Return the wdf of the current term (if meaningful). | | /** Return the wdf of the current term (if meaningful). | |
| * | | * | |
| * The wdf (within document frequency) is the number of occurences | | * The wdf (within document frequency) is the number of occurences | |
| | | | |
End of changes. 3 change blocks. |
| 4 lines changed or deleted | | 4 lines changed or added | |
|
| unicode.h | | unicode.h | |
| /** @file unicode.h | | /** @file unicode.h | |
| * @brief Unicode and UTF-8 related classes and functions. | | * @brief Unicode and UTF-8 related classes and functions. | |
| */ | | */ | |
|
| /* Copyright (C) 2006,2007,2008,2009 Olly Betts | | /* Copyright (C) 2006,2007,2008,2009,2010 Olly Betts | |
| * | | * | |
| * This program is free software; you can redistribute it and/or modify | | * This program is free software; you can redistribute it and/or modify | |
| * it under the terms of the GNU General Public License as published by | | * it under the terms of the GNU General Public License as published by | |
| * the Free Software Foundation; either version 2 of the License, or | | * the Free Software Foundation; either version 2 of the License, or | |
| * (at your option) any later version. | | * (at your option) any later version. | |
| * | | * | |
| * This program is distributed in the hope that it will be useful, | | * This program is distributed in the hope that it will be useful, | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| * GNU General Public License for more details. | | * GNU General Public License for more details. | |
| | | | |
| skipping to change at line 300 | | skipping to change at line 300 | |
| | | | |
| /// Test if a given Unicode character is "word character". | | /// Test if a given Unicode character is "word character". | |
| inline bool is_wordchar(unsigned ch) { | | inline bool is_wordchar(unsigned ch) { | |
| const unsigned int WORDCHAR_MASK = | | const unsigned int WORDCHAR_MASK = | |
| (1 << Xapian::Unicode::UPPERCASE_LETTER) | | | (1 << Xapian::Unicode::UPPERCASE_LETTER) | | |
| (1 << Xapian::Unicode::LOWERCASE_LETTER) | | | (1 << Xapian::Unicode::LOWERCASE_LETTER) | | |
| (1 << Xapian::Unicode::TITLECASE_LETTER) | | | (1 << Xapian::Unicode::TITLECASE_LETTER) | | |
| (1 << Xapian::Unicode::MODIFIER_LETTER) | | | (1 << Xapian::Unicode::MODIFIER_LETTER) | | |
| (1 << Xapian::Unicode::OTHER_LETTER) | | | (1 << Xapian::Unicode::OTHER_LETTER) | | |
| (1 << Xapian::Unicode::NON_SPACING_MARK) | | | (1 << Xapian::Unicode::NON_SPACING_MARK) | | |
|
| | | (1 << Xapian::Unicode::ENCLOSING_MARK) | | |
| | | (1 << Xapian::Unicode::COMBINING_SPACING_MARK) | | |
| (1 << Xapian::Unicode::DECIMAL_DIGIT_NUMBER) | | | (1 << Xapian::Unicode::DECIMAL_DIGIT_NUMBER) | | |
| (1 << Xapian::Unicode::LETTER_NUMBER) | | | (1 << Xapian::Unicode::LETTER_NUMBER) | | |
| (1 << Xapian::Unicode::OTHER_NUMBER) | | | (1 << Xapian::Unicode::OTHER_NUMBER) | | |
| (1 << Xapian::Unicode::CONNECTOR_PUNCTUATION); | | (1 << Xapian::Unicode::CONNECTOR_PUNCTUATION); | |
| return ((WORDCHAR_MASK >> get_category(ch)) & 1); | | return ((WORDCHAR_MASK >> get_category(ch)) & 1); | |
| } | | } | |
| | | | |
| /// Test if a given Unicode character is a whitespace character. | | /// Test if a given Unicode character is a whitespace character. | |
| inline bool is_whitespace(unsigned ch) { | | inline bool is_whitespace(unsigned ch) { | |
| const unsigned int WHITESPACE_MASK = | | const unsigned int WHITESPACE_MASK = | |
| | | | |
End of changes. 2 change blocks. |
| 1 lines changed or deleted | | 3 lines changed or added | |
|
| valueiterator.h | | valueiterator.h | |
| | | | |
| skipping to change at line 35 | | skipping to change at line 35 | |
| #include <iterator> | | #include <iterator> | |
| #include <string> | | #include <string> | |
| | | | |
| #include <xapian/base.h> | | #include <xapian/base.h> | |
| #include <xapian/derefwrapper.h> | | #include <xapian/derefwrapper.h> | |
| #include <xapian/types.h> | | #include <xapian/types.h> | |
| #include <xapian/visibility.h> | | #include <xapian/visibility.h> | |
| | | | |
| namespace Xapian { | | namespace Xapian { | |
| | | | |
|
| /// @internal A proxy class for an end ValueIterator. | | /// @private @internal A proxy class for an end ValueIterator. | |
| class ValueIteratorEnd_ { }; | | class ValueIteratorEnd_ { }; | |
| | | | |
| /// Class for iterating over document values. | | /// Class for iterating over document values. | |
| class XAPIAN_VISIBILITY_DEFAULT ValueIterator { | | class XAPIAN_VISIBILITY_DEFAULT ValueIterator { | |
| public: | | public: | |
| /// Class representing the valueiterator internals. | | /// Class representing the valueiterator internals. | |
| class Internal; | | class Internal; | |
| /// @private @internal Reference counted internals. | | /// @private @internal Reference counted internals. | |
| Xapian::Internal::RefCntPtr<Internal> internal; | | Xapian::Internal::RefCntPtr<Internal> internal; | |
| | | | |
| | | | |
| skipping to change at line 78 | | skipping to change at line 78 | |
| /// Destructor. | | /// Destructor. | |
| ~ValueIterator(); | | ~ValueIterator(); | |
| | | | |
| /// Return the value at the current position. | | /// Return the value at the current position. | |
| std::string operator*() const; | | std::string operator*() const; | |
| | | | |
| /// Advance the iterator to the next position. | | /// Advance the iterator to the next position. | |
| ValueIterator & operator++(); | | ValueIterator & operator++(); | |
| | | | |
| /// Advance the iterator to the next position (postfix version). | | /// Advance the iterator to the next position (postfix version). | |
|
| DerefStringWrapper_ operator++(int) { | | DerefWrapper_<std::string> operator++(int) { | |
| std::string value(**this); | | const std::string & value(**this); | |
| operator++(); | | operator++(); | |
|
| return DerefStringWrapper_(value); | | return DerefWrapper_<std::string>(value); | |
| } | | } | |
| | | | |
| /** Return the docid at the current position. | | /** Return the docid at the current position. | |
| * | | * | |
| * If we're iterating over values of a document, this method will thro
w | | * If we're iterating over values of a document, this method will thro
w | |
| * Xapian::InvalidOperationError. | | * Xapian::InvalidOperationError. | |
| */ | | */ | |
| Xapian::docid get_docid() const; | | Xapian::docid get_docid() const; | |
| | | | |
| /** Return the value slot number for the current position. | | /** Return the value slot number for the current position. | |
| | | | |
| skipping to change at line 123 | | skipping to change at line 123 | |
| * them. | | * them. | |
| */ | | */ | |
| void skip_to(Xapian::docid docid_or_slot); | | void skip_to(Xapian::docid docid_or_slot); | |
| | | | |
| /** Check if the specified docid occurs. | | /** Check if the specified docid occurs. | |
| * | | * | |
| * The caller is required to ensure that the specified document id | | * The caller is required to ensure that the specified document id | |
| * @a did actually exists in the database. | | * @a did actually exists in the database. | |
| * | | * | |
| * This method acts like skip_to() if that can be done at little extra | | * This method acts like skip_to() if that can be done at little extra | |
|
| * cost, in which case it then returns true. This is how chert behave | | * cost, in which case it then returns true. This is how brass and | |
| s | | * chert databases behave because they store values in streams which a | |
| * because it stores values in streams which allow for an efficient | | llow | |
| * implementation of skip_to(). | | * for an efficient implementation of skip_to(). | |
| * | | * | |
| * Otherwise it simply checks if a particular docid is present. If it | | * Otherwise it simply checks if a particular docid is present. If it | |
| * is, it returns true. If it isn't, it returns false, and leaves the | | * is, it returns true. If it isn't, it returns false, and leaves the | |
| * position unspecified (and hence the result of calling methods which | | * position unspecified (and hence the result of calling methods which | |
| * depends on the current position, such as get_docid(), are also | | * depends on the current position, such as get_docid(), are also | |
| * unspecified). In this state, next() will advance to the first matc
hing | | * unspecified). In this state, next() will advance to the first matc
hing | |
| * position after document @a did, and skip_to() will act as it would
if | | * position after document @a did, and skip_to() will act as it would
if | |
| * the position was the first matching position after document @a did. | | * the position was the first matching position after document @a did. | |
| * | | * | |
| * Currently the inmemory, flint, and remote backends behave in the | | * Currently the inmemory, flint, and remote backends behave in the | |
| | | | |
End of changes. 4 change blocks. |
| 8 lines changed or deleted | | 8 lines changed or added | |
|