| counters.h | | counters.h | |
|
| // counters.h | | // Copyright 2012 the V8 project authors. All rights reserved. | |
| /* | | // Redistribution and use in source and binary forms, with or without | |
| * Copyright (C) 2010 10gen Inc. | | // modification, are permitted provided that the following conditions are | |
| * | | // met: | |
| * This program is free software: you can redistribute it and/or modify | | // | |
| * it under the terms of the GNU Affero General Public License, version | | // * Redistributions of source code must retain the above copyright | |
| 3, | | // notice, this list of conditions and the following disclaimer. | |
| * as published by the Free Software Foundation. | | // * Redistributions in binary form must reproduce the above | |
| * | | // copyright notice, this list of conditions and the following | |
| * This program is distributed in the hope that it will be useful, | | // disclaimer in the documentation and/or other materials provided | |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | | // with the distribution. | |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | // * Neither the name of Google Inc. nor the names of its | |
| * GNU Affero General Public License for more details. | | // contributors may be used to endorse or promote products derived | |
| * | | // from this software without specific prior written permission. | |
| * You should have received a copy of the GNU Affero General Public Lice | | // | |
| nse | | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| * along with this program. If not, see <http://www.gnu.org/licenses/>. | | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| * | | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| * As a special exception, the copyright holders give permission to link | | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| the | | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| * code of portions of this program with the OpenSSL library under certa | | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| in | | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| * conditions as described in each individual source file and distribute | | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| * linked combinations including the program with the OpenSSL library. Y | | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| ou | | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| * must comply with the GNU Affero General Public License in all respect | | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| s for | | | |
| * all of the code used other than as permitted herein. If you modify fi | | #ifndef V8_COUNTERS_H_ | |
| le(s) | | #define V8_COUNTERS_H_ | |
| * with this exception, you may extend this exception to your version of | | | |
| the | | #include "../include/v8.h" | |
| * file(s), but you are not obligated to do so. If you do not wish to do | | #include "allocation.h" | |
| so, | | | |
| * delete this exception statement from your version. If you delete this | | namespace v8 { | |
| * exception statement from all source files in the program, then also d | | namespace internal { | |
| elete | | | |
| * it in the license file. | | // StatsCounters is an interface for plugging into external | |
| */ | | // counters for monitoring. Counters can be looked up and | |
| | | // manipulated by name. | |
| #pragma once | | | |
| | | class StatsTable { | |
| #include "mongo/pch.h" | | public: | |
| #include "mongo/db/jsobj.h" | | // Register an application-defined function where | |
| #include "mongo/util/net/message.h" | | // counters can be looked up. | |
| #include "mongo/util/processinfo.h" | | void SetCounterFunction(CounterLookupCallback f) { | |
| #include "mongo/util/concurrency/spin_lock.h" | | lookup_function_ = f; | |
| #include "mongo/db/pdfile.h" | | } | |
| | | | |
| namespace mongo { | | // Register an application-defined function to create | |
| | | // a histogram for passing to the AddHistogramSample function | |
| /** | | void SetCreateHistogramFunction(CreateHistogramCallback f) { | |
| * for storing operation counters | | create_histogram_function_ = f; | |
| * note: not thread safe. ok with that for speed | | } | |
| */ | | | |
| class OpCounters { | | // Register an application-defined function to add a sample | |
| public: | | // to a histogram created with CreateHistogram function | |
| | | void SetAddHistogramSampleFunction(AddHistogramSampleCallback f) { | |
| OpCounters(); | | add_histogram_sample_function_ = f; | |
| void incInsertInWriteLock(int n); | | } | |
| void gotInsert(); | | | |
| void gotQuery(); | | bool HasCounterFunction() const { | |
| void gotUpdate(); | | return lookup_function_ != NULL; | |
| void gotDelete(); | | } | |
| void gotGetMore(); | | | |
| void gotCommand(); | | // Lookup the location of a counter by name. If the lookup | |
| | | // is successful, returns a non-NULL pointer for writing the | |
| void gotOp( int op , bool isCommand ); | | // value of the counter. Each thread calling this function | |
| | | // may receive a different location to store it's counter. | |
| BSONObj getObj() const; | | // The return value must not be cached and re-used across | |
| | | // threads, although a single thread is free to cache it. | |
| // thse are used by snmp, and other things, do not remove | | int* FindLocation(const char* name) { | |
| const AtomicUInt * getInsert() const { return &_insert; } | | if (!lookup_function_) return NULL; | |
| const AtomicUInt * getQuery() const { return &_query; } | | return lookup_function_(name); | |
| const AtomicUInt * getUpdate() const { return &_update; } | | } | |
| const AtomicUInt * getDelete() const { return &_delete; } | | | |
| const AtomicUInt * getGetMore() const { return &_getmore; } | | // Create a histogram by name. If the create is successful, | |
| const AtomicUInt * getCommand() const { return &_command; } | | // returns a non-NULL pointer for use with AddHistogramSample | |
| | | // function. min and max define the expected minimum and maximum | |
| private: | | // sample values. buckets is the maximum number of buckets | |
| void _checkWrap(); | | // that the samples will be grouped into. | |
| | | void* CreateHistogram(const char* name, | |
| // todo: there will be a lot of cache line contention on these. ne | | int min, | |
| ed to do something | | int max, | |
| // else eventually. | | size_t buckets) { | |
| AtomicUInt _insert; | | if (!create_histogram_function_) return NULL; | |
| AtomicUInt _query; | | return create_histogram_function_(name, min, max, buckets); | |
| AtomicUInt _update; | | } | |
| AtomicUInt _delete; | | | |
| AtomicUInt _getmore; | | // Add a sample to a histogram created with the CreateHistogram | |
| AtomicUInt _command; | | // function. | |
| }; | | void AddHistogramSample(void* histogram, int sample) { | |
| | | if (!add_histogram_sample_function_) return; | |
| extern OpCounters globalOpCounters; | | return add_histogram_sample_function_(histogram, sample); | |
| extern OpCounters replOpCounters; | | } | |
| | | | |
| class NetworkCounter { | | private: | |
| public: | | StatsTable(); | |
| NetworkCounter() : _bytesIn(0), _bytesOut(0), _requests(0), _overfl | | | |
| ows(0) {} | | CounterLookupCallback lookup_function_; | |
| void hit( long long bytesIn , long long bytesOut ); | | CreateHistogramCallback create_histogram_function_; | |
| void append( BSONObjBuilder& b ); | | AddHistogramSampleCallback add_histogram_sample_function_; | |
| private: | | | |
| long long _bytesIn; | | friend class Isolate; | |
| long long _bytesOut; | | | |
| long long _requests; | | DISALLOW_COPY_AND_ASSIGN(StatsTable); | |
| | | }; | |
| | | | |
| | | // StatsCounters are dynamically created values which can be tracked in | |
| | | // the StatsTable. They are designed to be lightweight to create and | |
| | | // easy to use. | |
| | | // | |
| | | // Internally, a counter represents a value in a row of a StatsTable. | |
| | | // The row has a 32bit value for each process/thread in the table and also | |
| | | // a name (stored in the table metadata). Since the storage location can b | |
| | | e | |
| | | // thread-specific, this class cannot be shared across threads. | |
| | | // | |
| | | // This class is designed to be POD initialized. It will be registered wit | |
| | | h | |
| | | // the counter system on first use. For example: | |
| | | // StatsCounter c = { "c:myctr", NULL, false }; | |
| | | struct StatsCounter { | |
| | | const char* name_; | |
| | | int* ptr_; | |
| | | bool lookup_done_; | |
| | | | |
| | | // Sets the counter to a specific value. | |
| | | void Set(int value) { | |
| | | int* loc = GetPtr(); | |
| | | if (loc) *loc = value; | |
| | | } | |
| | | | |
| | | // Increments the counter. | |
| | | void Increment() { | |
| | | int* loc = GetPtr(); | |
| | | if (loc) (*loc)++; | |
| | | } | |
| | | | |
| | | void Increment(int value) { | |
| | | int* loc = GetPtr(); | |
| | | if (loc) | |
| | | (*loc) += value; | |
| | | } | |
| | | | |
| | | // Decrements the counter. | |
| | | void Decrement() { | |
| | | int* loc = GetPtr(); | |
| | | if (loc) (*loc)--; | |
| | | } | |
| | | | |
| | | void Decrement(int value) { | |
| | | int* loc = GetPtr(); | |
| | | if (loc) (*loc) -= value; | |
| | | } | |
| | | | |
| | | // Is this counter enabled? | |
| | | // Returns false if table is full. | |
| | | bool Enabled() { | |
| | | return GetPtr() != NULL; | |
| | | } | |
| | | | |
| | | // Get the internal pointer to the counter. This is used | |
| | | // by the code generator to emit code that manipulates a | |
| | | // given counter without calling the runtime system. | |
| | | int* GetInternalPointer() { | |
| | | int* loc = GetPtr(); | |
| | | ASSERT(loc != NULL); | |
| | | return loc; | |
| | | } | |
| | | | |
| | | protected: | |
| | | // Returns the cached address of this counter location. | |
| | | int* GetPtr() { | |
| | | if (lookup_done_) return ptr_; | |
| | | lookup_done_ = true; | |
| | | ptr_ = FindLocationInStatsTable(); | |
| | | return ptr_; | |
| | | } | |
| | | | |
| | | private: | |
| | | int* FindLocationInStatsTable() const; | |
| | | }; | |
| | | | |
| | | // StatsCounterTimer t = { { L"t:foo", NULL, false }, 0, 0 }; | |
| | | struct StatsCounterTimer { | |
| | | StatsCounter counter_; | |
| | | | |
| | | int64_t start_time_; | |
| | | int64_t stop_time_; | |
| | | | |
| | | // Start the timer. | |
| | | void Start(); | |
| | | | |
| | | // Stop the timer and record the results. | |
| | | void Stop(); | |
| | | | |
| | | // Returns true if the timer is running. | |
| | | bool Running() { | |
| | | return counter_.Enabled() && start_time_ != 0 && stop_time_ == 0; | |
| | | } | |
| | | }; | |
| | | | |
| | | // A Histogram represents a dynamically created histogram in the StatsTable | |
| | | . | |
| | | // | |
| | | // This class is designed to be POD initialized. It will be registered wit | |
| | | h | |
| | | // the histogram system on first use. For example: | |
| | | // Histogram h = { "myhist", 0, 10000, 50, NULL, false }; | |
| | | struct Histogram { | |
| | | const char* name_; | |
| | | int min_; | |
| | | int max_; | |
| | | int num_buckets_; | |
| | | void* histogram_; | |
| | | bool lookup_done_; | |
| | | | |
| | | // Add a single sample to this histogram. | |
| | | void AddSample(int sample); | |
| | | | |
| | | // Returns true if this histogram is enabled. | |
| | | bool Enabled() { | |
| | | return GetHistogram() != NULL; | |
| | | } | |
| | | | |
| | | protected: | |
| | | // Returns the handle to the histogram. | |
| | | void* GetHistogram() { | |
| | | if (!lookup_done_) { | |
| | | lookup_done_ = true; | |
| | | histogram_ = CreateHistogram(); | |
| | | } | |
| | | return histogram_; | |
| | | } | |
| | | | |
| | | private: | |
| | | void* CreateHistogram() const; | |
| | | }; | |
| | | | |
| | | // A HistogramTimer allows distributions of results to be created | |
| | | // HistogramTimer t = { {L"foo", 0, 10000, 50, NULL, false}, 0, 0 }; | |
| | | struct HistogramTimer { | |
| | | Histogram histogram_; | |
| | | | |
| | | int64_t start_time_; | |
| | | int64_t stop_time_; | |
| | | | |
| | | // Start the timer. | |
| | | void Start(); | |
| | | | |
| | | // Stop the timer and record the results. | |
| | | void Stop(); | |
| | | | |
| | | // Returns true if the timer is running. | |
| | | bool Running() { | |
| | | return histogram_.Enabled() && (start_time_ != 0) && (stop_time_ == 0); | |
| | | } | |
| | | }; | |
| | | | |
| | | // Helper class for scoping a HistogramTimer. | |
| | | class HistogramTimerScope BASE_EMBEDDED { | |
| | | public: | |
| | | explicit HistogramTimerScope(HistogramTimer* timer) : | |
| | | timer_(timer) { | |
| | | timer_->Start(); | |
| | | } | |
| | | ~HistogramTimerScope() { | |
| | | timer_->Stop(); | |
| | | } | |
| | | private: | |
| | | HistogramTimer* timer_; | |
| | | }; | |
| | | | |
|
| long long _overflows; | | } } // namespace v8::internal | |
| | | | |
|
| SpinLock _lock; | | #endif // V8_COUNTERS_H_ | |
| }; | | | |
| | | | |
| extern NetworkCounter networkCounter; | | | |
| } | | | |
| | | | |
End of changes. 3 change blocks. |
| 106 lines changed or deleted | | 273 lines changed or added | |
|
| expression_index.h | | expression_index.h | |
| | | | |
| skipping to change at line 80 | | skipping to change at line 80 | |
| coverer.set_min_level(min(coarsestIndexedLevel, | | coverer.set_min_level(min(coarsestIndexedLevel, | |
| 2 + S2::kAvgEdge.GetClosestLevel(edge
Len))); | | 2 + S2::kAvgEdge.GetClosestLevel(edge
Len))); | |
| coverer.set_max_level(4 + coverer.min_level()); | | coverer.set_max_level(4 + coverer.min_level()); | |
| | | | |
| vector<S2CellId> cover; | | vector<S2CellId> cover; | |
| coverer.GetCovering(region, &cover); | | coverer.GetCovering(region, &cover); | |
| | | | |
| // Look at the cells we cover and all cells that are within our
covering and finer. | | // Look at the cells we cover and all cells that are within our
covering and finer. | |
| // Anything with our cover as a strict prefix is contained with
in the cover and should | | // Anything with our cover as a strict prefix is contained with
in the cover and should | |
| // be intersection tested. | | // be intersection tested. | |
|
| bool considerCoarser = false; | | | |
| set<string> intervalSet; | | set<string> intervalSet; | |
|
| | | set<string> exactSet; | |
| for (size_t i = 0; i < cover.size(); ++i) { | | for (size_t i = 0; i < cover.size(); ++i) { | |
|
| intervalSet.insert(cover[i].toString()); | | | |
| // If any of our covers could be covered by something in th | | | |
| e index, we have | | | |
| // to look at things coarser. | | | |
| if (cover[i].level() > coarsestIndexedLevel) { | | | |
| considerCoarser = true; | | | |
| } | | | |
| } | | | |
| | | | |
|
| set<string> exactSet; | | S2CellId coveredCell = cover[i]; | |
| if (considerCoarser) { | | intervalSet.insert(coveredCell.toString()); | |
| | | | |
| // Look at the cells that cover us. We want to look at eve
ry cell that contains the | | // Look at the cells that cover us. We want to look at eve
ry cell that contains the | |
| // covering we would index on if we were to insert the quer
y geometry. We generate | | // covering we would index on if we were to insert the quer
y geometry. We generate | |
| // the would-index-with-this-covering and find all the cell
s strictly containing the | | // the would-index-with-this-covering and find all the cell
s strictly containing the | |
| // cells in that set, until we hit the coarsest indexed cel
l. We use equality, not | | // cells in that set, until we hit the coarsest indexed cel
l. We use equality, not | |
| // a prefix match. Why not prefix? Because we've already
looked at everything | | // a prefix match. Why not prefix? Because we've already
looked at everything | |
| // finer or as fine as our initial covering. | | // finer or as fine as our initial covering. | |
| // | | // | |
| // Say we have a fine point with cell id 212121, we go up o
ne, get 21212, we don't | | // Say we have a fine point with cell id 212121, we go up o
ne, get 21212, we don't | |
| // want to look at cells 21212[not-1] because we know they'
re not going to intersect | | // want to look at cells 21212[not-1] because we know they'
re not going to intersect | |
| // with 212121, but entries inserted with cell value 21212
(no trailing digits) may. | | // with 212121, but entries inserted with cell value 21212
(no trailing digits) may. | |
| // And we've already looked at points with the cell id 2111
11 from the regex search | | // And we've already looked at points with the cell id 2111
11 from the regex search | |
| // created above, so we only want things where the value of
the last digit is not | | // created above, so we only want things where the value of
the last digit is not | |
| // stored (and therefore could be 1). | | // stored (and therefore could be 1). | |
|
| for (size_t i = 0; i < cover.size(); ++i) { | | | |
| for (S2CellId id = cover[i].parent(); id.level() >= coa | | while (coveredCell.level() > coarsestIndexedLevel) { | |
| rsestIndexedLevel; | | | |
| id = id.parent()) { | | // Add the parent cell of the currently covered cell si | |
| exactSet.insert(id.toString()); | | nce we aren't at the | |
| } | | // coarsest level yet | |
| | | // NOTE: Be careful not to generate cells strictly less | |
| | | than the | |
| | | // coarsestIndexedLevel - this can result in S2 failure | |
| | | s when level < 0. | |
| | | | |
| | | coveredCell = coveredCell.parent(); | |
| | | exactSet.insert(coveredCell.toString()); | |
| } | | } | |
| } | | } | |
| | | | |
| // We turned the cell IDs into strings which define point inter
vals or prefixes of | | // We turned the cell IDs into strings which define point inter
vals or prefixes of | |
| // strings we want to look for. | | // strings we want to look for. | |
| set<string>::iterator exactIt = exactSet.begin(); | | set<string>::iterator exactIt = exactSet.begin(); | |
| set<string>::iterator intervalIt = intervalSet.begin(); | | set<string>::iterator intervalIt = intervalSet.begin(); | |
| while (exactSet.end() != exactIt && intervalSet.end() != interv
alIt) { | | while (exactSet.end() != exactIt && intervalSet.end() != interv
alIt) { | |
| const string& exact = *exactIt; | | const string& exact = *exactIt; | |
| const string& ival = *intervalIt; | | const string& ival = *intervalIt; | |
| | | | |
End of changes. 5 change blocks. |
| 17 lines changed or deleted | | 17 lines changed or added | |
|
| index_scan.h | | index_scan.h | |
| | | | |
| skipping to change at line 83 | | skipping to change at line 83 | |
| * TODO: we probably should split this into 2 stages: one btree-only "f
ast" ixscan and one that | | * TODO: we probably should split this into 2 stages: one btree-only "f
ast" ixscan and one that | |
| * strictly talks through the index API. Need to figure out what we re
ally want to ship down | | * strictly talks through the index API. Need to figure out what we re
ally want to ship down | |
| * through that API predicate-wise though, currently the language is a
BSONObj but that's | | * through that API predicate-wise though, currently the language is a
BSONObj but that's | |
| * clearly not enough (or we need different index scan exec nodes per i
ndex type?). See | | * clearly not enough (or we need different index scan exec nodes per i
ndex type?). See | |
| * SERVER-12397 for tracking. | | * SERVER-12397 for tracking. | |
| * | | * | |
| * Sub-stage preconditions: None. Is a leaf and consumes no stage data
. | | * Sub-stage preconditions: None. Is a leaf and consumes no stage data
. | |
| */ | | */ | |
| class IndexScan : public PlanStage { | | class IndexScan : public PlanStage { | |
| public: | | public: | |
|
| | | | |
| | | /** | |
| | | * Keeps track of what this index scan is currently doing so that i | |
| | | t | |
| | | * can do the right thing on the next call to work(). | |
| | | */ | |
| | | enum ScanState { | |
| | | // Need to initialize the underlying index traversal machinery. | |
| | | INITIALIZING, | |
| | | | |
| | | // Skipping keys in order to check whether we have reached the | |
| | | end. | |
| | | CHECKING_END, | |
| | | | |
| | | // Retrieving the next key, and applying the filter if necessar | |
| | | y. | |
| | | GETTING_NEXT, | |
| | | | |
| | | // The index scan is finished. | |
| | | HIT_END | |
| | | }; | |
| | | | |
| IndexScan(const IndexScanParams& params, WorkingSet* workingSet, | | IndexScan(const IndexScanParams& params, WorkingSet* workingSet, | |
| const MatchExpression* filter); | | const MatchExpression* filter); | |
| | | | |
| virtual ~IndexScan() { } | | virtual ~IndexScan() { } | |
| | | | |
| virtual StageState work(WorkingSetID* out); | | virtual StageState work(WorkingSetID* out); | |
| virtual bool isEOF(); | | virtual bool isEOF(); | |
| virtual void prepareToYield(); | | virtual void prepareToYield(); | |
| virtual void recoverFromYield(); | | virtual void recoverFromYield(); | |
| virtual void invalidate(const DiskLoc& dl, InvalidationType type); | | virtual void invalidate(const DiskLoc& dl, InvalidationType type); | |
| | | | |
| skipping to change at line 105 | | skipping to change at line 124 | |
| | | | |
| private: | | private: | |
| /** | | /** | |
| * Initialize the underlying IndexCursor, grab information from the
catalog for stats. | | * Initialize the underlying IndexCursor, grab information from the
catalog for stats. | |
| */ | | */ | |
| void initIndexScan(); | | void initIndexScan(); | |
| | | | |
| /** See if the cursor is pointing at or past _endKey, if _endKey is
non-empty. */ | | /** See if the cursor is pointing at or past _endKey, if _endKey is
non-empty. */ | |
| void checkEnd(); | | void checkEnd(); | |
| | | | |
|
| // The number of keys examined during a call to checkEnd() that hav | | | |
| e not yet been | | | |
| // accounted for by returning a NEED_TIME. | | | |
| // | | | |
| // Good plan ranking requires that the index scan uses one work cyc | | | |
| le per index key | | | |
| // examined. Since checkEnd() may examine multiple keys, we keep tr | | | |
| ack of them here | | | |
| // and make up for it later by returning NEED_TIME. | | | |
| // | | | |
| // Example of how this is useful for plan ranking: | | | |
| // Say you have indices {a: 1, b: 1} and {a: 1, x: 1, b: 1}, with | | | |
| predicates over | | | |
| // fields 'a' and 'b'. It's cheaper to use index {a: 1, b: 1}. Wh | | | |
| y? Because for | | | |
| // index {a: 1, x: 1, b: 1} you have to skip lots of keys due to | | | |
| the interceding | | | |
| // 'x' field. This skipping is done inside checkEnd(), and we use | | | |
| '_checkEndKeys' | | | |
| // to account for it. | | | |
| size_t _checkEndKeys; | | | |
| | | | |
| // The WorkingSet we annotate with results. Not owned by us. | | // The WorkingSet we annotate with results. Not owned by us. | |
| WorkingSet* _workingSet; | | WorkingSet* _workingSet; | |
| | | | |
| // Index access. | | // Index access. | |
| const IndexAccessMethod* _iam; // owned by Collection -> IndexCatal
og | | const IndexAccessMethod* _iam; // owned by Collection -> IndexCatal
og | |
| scoped_ptr<IndexCursor> _indexCursor; | | scoped_ptr<IndexCursor> _indexCursor; | |
| BSONObj _keyPattern; | | BSONObj _keyPattern; | |
| | | | |
|
| // Have we hit the end of the index scan? | | // Keeps track of what work we need to do next. | |
| bool _hitEnd; | | ScanState _scanState; | |
| | | | |
| // Contains expressions only over fields in the index key. We assu
me this is built | | // Contains expressions only over fields in the index key. We assu
me this is built | |
| // correctly by whomever creates this class. | | // correctly by whomever creates this class. | |
| // The filter is not owned by us. | | // The filter is not owned by us. | |
| const MatchExpression* _filter; | | const MatchExpression* _filter; | |
| | | | |
| // Could our index have duplicates? If so, we use _returned to ded
up. | | // Could our index have duplicates? If so, we use _returned to ded
up. | |
| bool _shouldDedup; | | bool _shouldDedup; | |
| unordered_set<DiskLoc, DiskLoc::Hasher> _returned; | | unordered_set<DiskLoc, DiskLoc::Hasher> _returned; | |
| | | | |
| // For yielding. | | // For yielding. | |
| BSONObj _savedKey; | | BSONObj _savedKey; | |
| DiskLoc _savedLoc; | | DiskLoc _savedLoc; | |
| | | | |
|
| // True if there was a yield and the yield changed the cursor posit | | | |
| ion. | | | |
| bool _yieldMovedCursor; | | | |
| | | | |
| IndexScanParams _params; | | IndexScanParams _params; | |
| | | | |
| // For our "fast" Btree-only navigation AKA the index bounds optimi
zation. | | // For our "fast" Btree-only navigation AKA the index bounds optimi
zation. | |
| scoped_ptr<IndexBoundsChecker> _checker; | | scoped_ptr<IndexBoundsChecker> _checker; | |
| BtreeIndexCursor* _btreeCursor; | | BtreeIndexCursor* _btreeCursor; | |
| int _keyEltsToUse; | | int _keyEltsToUse; | |
| bool _movePastKeyElts; | | bool _movePastKeyElts; | |
| vector<const BSONElement*> _keyElts; | | vector<const BSONElement*> _keyElts; | |
| vector<bool> _keyEltsInc; | | vector<bool> _keyEltsInc; | |
| | | | |
| | | | |
End of changes. 4 change blocks. |
| 28 lines changed or deleted | | 24 lines changed or added | |
|
| rs.h | | rs.h | |
| | | | |
| skipping to change at line 572 | | skipping to change at line 572 | |
| void getTargets(list<Target>&, int &configVersion); | | void getTargets(list<Target>&, int &configVersion); | |
| void startThreads(); | | void startThreads(); | |
| friend class FeedbackThread; | | friend class FeedbackThread; | |
| friend class CmdReplSetElect; | | friend class CmdReplSetElect; | |
| friend class Member; | | friend class Member; | |
| friend class Manager; | | friend class Manager; | |
| friend class GhostSync; | | friend class GhostSync; | |
| friend class Consensus; | | friend class Consensus; | |
| | | | |
| private: | | private: | |
|
| bool _syncDoInitialSync_clone(Cloner &cloner, const char *master, | | bool _initialSyncClone(Cloner &cloner, const std::string& master, | |
| const list<string>& dbs, bool dataPas | | const list<string>& dbs, bool dataPass); | |
| s); | | bool _initialSyncApplyOplog(replset::SyncTail& syncer, OplogReader* | |
| bool _syncDoInitialSync_applyToHead( replset::SyncTail& syncer, Opl | | r , | |
| ogReader* r , | | const Member* source); | |
| const Member* source, const BS | | void _initialSync(); | |
| ONObj& lastOp, | | | |
| BSONObj& minValidOut); | | | |
| void _syncDoInitialSync(); | | | |
| void syncDoInitialSync(); | | void syncDoInitialSync(); | |
| void _syncThread(); | | void _syncThread(); | |
| void syncTail(); | | void syncTail(); | |
| unsigned _syncRollback(OplogReader& r); | | unsigned _syncRollback(OplogReader& r); | |
| void syncFixUp(HowToFixUp& h, OplogReader& r); | | void syncFixUp(HowToFixUp& h, OplogReader& r); | |
| | | | |
| // keep a list of hosts that we've tried recently that didn't work | | // keep a list of hosts that we've tried recently that didn't work | |
| map<string,time_t> _veto; | | map<string,time_t> _veto; | |
| // persistent pool of worker threads for writing ops to the databas
es | | // persistent pool of worker threads for writing ops to the databas
es | |
| threadpool::ThreadPool _writerPool; | | threadpool::ThreadPool _writerPool; | |
| | | | |
| skipping to change at line 629 | | skipping to change at line 628 | |
| /** | | /** | |
| * When a member reaches its minValid optime it is in a consistent
state. Thus, minValid is | | * When a member reaches its minValid optime it is in a consistent
state. Thus, minValid is | |
| * set as the last step in initial sync. At the beginning of initi
al sync, _initialSyncFlag | | * set as the last step in initial sync. At the beginning of initi
al sync, _initialSyncFlag | |
| * is appended onto minValid to indicate that initial sync was star
ted but has not yet | | * is appended onto minValid to indicate that initial sync was star
ted but has not yet | |
| * completed. | | * completed. | |
| * minValid is also used during "normal" sync: the last op in each
batch is used to set | | * minValid is also used during "normal" sync: the last op in each
batch is used to set | |
| * minValid, to indicate that we are in a consistent state when the
batch has been fully | | * minValid, to indicate that we are in a consistent state when the
batch has been fully | |
| * applied. | | * applied. | |
| */ | | */ | |
| static void setMinValid(BSONObj obj); | | static void setMinValid(BSONObj obj); | |
|
| | | static void setMinValid(OpTime ts); | |
| static OpTime getMinValid(); | | static OpTime getMinValid(); | |
| static void clearInitialSyncFlag(); | | static void clearInitialSyncFlag(); | |
| static bool getInitialSyncFlag(); | | static bool getInitialSyncFlag(); | |
| static void setInitialSyncFlag(); | | static void setInitialSyncFlag(); | |
| | | | |
| int oplogVersion; | | int oplogVersion; | |
| | | | |
| // bool for indicating resync need on this node and the mutex that
protects it | | // bool for indicating resync need on this node and the mutex that
protects it | |
| bool initialSyncRequested; | | bool initialSyncRequested; | |
| boost::mutex initialSyncMutex; | | boost::mutex initialSyncMutex; | |
| | | | |
End of changes. 2 change blocks. |
| 9 lines changed or deleted | | 7 lines changed or added | |
|
| rs_sync.h | | rs_sync.h | |
| | | | |
| skipping to change at line 52 | | skipping to change at line 52 | |
| | | | |
| class BackgroundSyncInterface; | | class BackgroundSyncInterface; | |
| | | | |
| /** | | /** | |
| * "Normal" replica set syncing | | * "Normal" replica set syncing | |
| */ | | */ | |
| class SyncTail : public Sync { | | class SyncTail : public Sync { | |
| typedef void (*MultiSyncApplyFunc)(const std::vector<BSONObj>& ops,
SyncTail* st); | | typedef void (*MultiSyncApplyFunc)(const std::vector<BSONObj>& ops,
SyncTail* st); | |
| public: | | public: | |
| SyncTail(BackgroundSyncInterface *q); | | SyncTail(BackgroundSyncInterface *q); | |
|
| | | SyncTail(BackgroundSyncInterface *q, MultiSyncApplyFunc func); | |
| | | | |
| virtual ~SyncTail(); | | virtual ~SyncTail(); | |
| virtual bool syncApply(const BSONObj &o, bool convertUpdateToUpsert
= false); | | virtual bool syncApply(const BSONObj &o, bool convertUpdateToUpsert
= false); | |
| | | | |
| /** | | /** | |
|
| * Apply ops from applyGTEObj's ts to at least minValidObj's ts. N | | * Runs _applyOplogUntil(stopOpTime) | |
| ote that, due to | | | |
| * batching, this may end up applying ops beyond minValidObj's ts. | | | |
| * | | | |
| * @param applyGTEObj the op to start replicating at. This is actu | | | |
| ally not used except in | | | |
| * comparison to minValidObj: the background syn | | | |
| c thread keeps its own | | | |
| * record of where we're synced to and starts pr | | | |
| oviding ops from that | | | |
| * point. | | | |
| * @param minValidObj the op to finish syncing at. This function c | | | |
| annot return (other than | | | |
| * fatally erroring out) without applying at lea | | | |
| st this op. | | | |
| * @param func whether this should use initial sync logic (r | | | |
| ecloning docs) or | | | |
| * "normal" logic. | | | |
| * @return BSONObj the op that was synced to. This may be great | | | |
| er than minValidObj, as a | | | |
| * single batch might blow right by minvalid. If | | | |
| applyGTEObj is the same | | | |
| * op as minValidObj, this will be applyGTEObj. | | | |
| */ | | | |
| BSONObj oplogApplySegment(const BSONObj& applyGTEObj, const BSONObj | | | |
| & minValidObj, | | | |
| MultiSyncApplyFunc func); | | | |
| | | | |
| /** | | | |
| * Runs oplogApplySegment without allowing recloning documents. | | | |
| */ | | */ | |
|
| virtual BSONObj oplogApplication(const BSONObj& applyGTEObj, const
BSONObj& minValidObj); | | virtual void oplogApplication(const OpTime& stopOpTime); | |
| | | | |
| void oplogApplication(); | | void oplogApplication(); | |
| bool peek(BSONObj* obj); | | bool peek(BSONObj* obj); | |
| | | | |
| class OpQueue { | | class OpQueue { | |
| public: | | public: | |
| OpQueue() : _size(0) {} | | OpQueue() : _size(0) {} | |
| size_t getSize() { return _size; } | | size_t getSize() { return _size; } | |
| std::deque<BSONObj>& getDeque() { return _deque; } | | std::deque<BSONObj>& getDeque() { return _deque; } | |
| void push_back(BSONObj& op) { | | void push_back(BSONObj& op) { | |
| _deque.push_back(op); | | _deque.push_back(op); | |
| _size += op.objsize(); | | _size += op.objsize(); | |
| } | | } | |
| bool empty() { | | bool empty() { | |
| return _deque.empty(); | | return _deque.empty(); | |
| } | | } | |
|
| | | BSONObj back(){ | |
| | | verify(!_deque.empty()); | |
| | | return _deque.back(); | |
| | | } | |
| | | | |
| private: | | private: | |
| std::deque<BSONObj> _deque; | | std::deque<BSONObj> _deque; | |
| size_t _size; | | size_t _size; | |
| }; | | }; | |
| | | | |
| // returns true if we should continue waiting for BSONObjs, false i
f we should | | // returns true if we should continue waiting for BSONObjs, false i
f we should | |
| // stop waiting and apply the queue we have. Only returns false if
!ops.empty(). | | // stop waiting and apply the queue we have. Only returns false if
!ops.empty(). | |
| bool tryPopAndWaitForMore(OpQueue* ops); | | bool tryPopAndWaitForMore(OpQueue* ops); | |
| | | | |
| // After ops have been written to db, call this | | // After ops have been written to db, call this | |
| | | | |
| skipping to change at line 116 | | skipping to change at line 104 | |
| // Ops are removed from the deque. | | // Ops are removed from the deque. | |
| void applyOpsToOplog(std::deque<BSONObj>* ops); | | void applyOpsToOplog(std::deque<BSONObj>* ops); | |
| | | | |
| protected: | | protected: | |
| // Cap the batches using the limit on journal commits. | | // Cap the batches using the limit on journal commits. | |
| // This works out to be 100 MB (64 bit) or 50 MB (32 bit) | | // This works out to be 100 MB (64 bit) or 50 MB (32 bit) | |
| static const unsigned int replBatchLimitBytes = dur::UncommittedByt
esLimit; | | static const unsigned int replBatchLimitBytes = dur::UncommittedByt
esLimit; | |
| static const int replBatchLimitSeconds = 1; | | static const int replBatchLimitSeconds = 1; | |
| static const unsigned int replBatchLimitOperations = 5000; | | static const unsigned int replBatchLimitOperations = 5000; | |
| | | | |
|
| // Prefetch and write a deque of operations, using the supplied fun | | // Prefetch and write a deque of operations. | |
| ction. | | void multiApply(std::deque<BSONObj>& ops); | |
| // Initial Sync and Sync Tail each use a different function. | | | |
| void multiApply(std::deque<BSONObj>& ops, MultiSyncApplyFunc applyF | | /** | |
| unc); | | * Applies oplog entries until reaching "endOpTime". | |
| | | * | |
| | | * Returns the OpTime from the last doc applied | |
| | | * | |
| | | * NOTE:Will not transition or check states | |
| | | */ | |
| | | void _applyOplogUntil(const OpTime& endOpTime); | |
| | | | |
| // The version of the last op to be read | | // The version of the last op to be read | |
| int oplogVersion; | | int oplogVersion; | |
| | | | |
| private: | | private: | |
| BackgroundSyncInterface* _networkQueue; | | BackgroundSyncInterface* _networkQueue; | |
| | | | |
|
| | | // Function to use during applyOps | |
| | | MultiSyncApplyFunc _applyFunc; | |
| | | | |
| // Doles out all the work to the reader pool threads and waits for
them to complete | | // Doles out all the work to the reader pool threads and waits for
them to complete | |
| void prefetchOps(const std::deque<BSONObj>& ops); | | void prefetchOps(const std::deque<BSONObj>& ops); | |
| // Used by the thread pool readers to prefetch an op | | // Used by the thread pool readers to prefetch an op | |
| static void prefetchOp(const BSONObj& op); | | static void prefetchOp(const BSONObj& op); | |
| | | | |
| // Doles out all the work to the writer pool threads and waits for
them to complete | | // Doles out all the work to the writer pool threads and waits for
them to complete | |
|
| void applyOps(const std::vector< std::vector<BSONObj> >& writerVect | | void applyOps(const std::vector< std::vector<BSONObj> >& writerVect | |
| ors, | | ors); | |
| MultiSyncApplyFunc applyFunc); | | | |
| | | | |
| void fillWriterVectors(const std::deque<BSONObj>& ops, | | void fillWriterVectors(const std::deque<BSONObj>& ops, | |
| std::vector< std::vector<BSONObj> >* writerV
ectors); | | std::vector< std::vector<BSONObj> >* writerV
ectors); | |
| void handleSlaveDelay(const BSONObj& op); | | void handleSlaveDelay(const BSONObj& op); | |
| void setOplogVersion(const BSONObj& op); | | void setOplogVersion(const BSONObj& op); | |
| }; | | }; | |
| | | | |
| /** | | /** | |
| * Initial clone and sync | | * Initial clone and sync | |
| */ | | */ | |
| class InitialSync : public SyncTail { | | class InitialSync : public SyncTail { | |
| public: | | public: | |
| virtual ~InitialSync(); | | virtual ~InitialSync(); | |
| InitialSync(BackgroundSyncInterface *q); | | InitialSync(BackgroundSyncInterface *q); | |
|
| | | virtual void oplogApplication(const OpTime& stopOpTime); | |
| | | | |
|
| /** | | | |
| * Creates the initial oplog entry: applies applyGTEObj and writes | | | |
| it to the oplog. Then | | | |
| * this runs oplogApplySegment allowing recloning documents. | | | |
| */ | | | |
| BSONObj oplogApplication(const BSONObj& applyGTEObj, const BSONObj& | | | |
| minValidObj); | | | |
| }; | | }; | |
| | | | |
| // TODO: move hbmsg into an error-keeping class (SERVER-4444) | | // TODO: move hbmsg into an error-keeping class (SERVER-4444) | |
| void sethbmsg(const string& s, const int logLevel=0); | | void sethbmsg(const string& s, const int logLevel=0); | |
| | | | |
| // These free functions are used by the thread pool workers to write op
s to the db. | | // These free functions are used by the thread pool workers to write op
s to the db. | |
| void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st); | | void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st); | |
| void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* s
t); | | void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* s
t); | |
| | | | |
| } // namespace replset | | } // namespace replset | |
| | | | |
End of changes. 9 change blocks. |
| 46 lines changed or deleted | | 26 lines changed or added | |
|
| runner_yield_policy.h | | runner_yield_policy.h | |
| | | | |
| skipping to change at line 33 | | skipping to change at line 33 | |
| * file(s), but you are not obligated to do so. If you do not wish to do
so, | | * file(s), but you are not obligated to do so. If you do not wish to do
so, | |
| * delete this exception statement from your version. If you delete this | | * delete this exception statement from your version. If you delete this | |
| * exception statement from all source files in the program, then also d
elete | | * exception statement from all source files in the program, then also d
elete | |
| * it in the license file. | | * it in the license file. | |
| */ | | */ | |
| | | | |
| #pragma once | | #pragma once | |
| | | | |
| #include "mongo/db/clientcursor.h" | | #include "mongo/db/clientcursor.h" | |
| #include "mongo/db/catalog/collection.h" | | #include "mongo/db/catalog/collection.h" | |
|
| | | #include "mongo/db/kill_current_op.h" | |
| #include "mongo/util/elapsed_tracker.h" | | #include "mongo/util/elapsed_tracker.h" | |
| | | | |
| namespace mongo { | | namespace mongo { | |
| | | | |
| class RunnerYieldPolicy { | | class RunnerYieldPolicy { | |
| public: | | public: | |
| RunnerYieldPolicy() : _elapsedTracker(128, 10), _runnerYielding(NUL
L) { } | | RunnerYieldPolicy() : _elapsedTracker(128, 10), _runnerYielding(NUL
L) { } | |
| | | | |
| ~RunnerYieldPolicy() { | | ~RunnerYieldPolicy() { | |
| if (NULL != _runnerYielding) { | | if (NULL != _runnerYielding) { | |
| | | | |
| skipping to change at line 83 | | skipping to change at line 84 | |
| | | | |
| // If micros > 0, we should yield. | | // If micros > 0, we should yield. | |
| runner->saveState(); | | runner->saveState(); | |
| _runnerYielding = runner; | | _runnerYielding = runner; | |
| | | | |
| runner->collection()->cursorCache()->registerRunner( _runnerYie
lding ); | | runner->collection()->cursorCache()->registerRunner( _runnerYie
lding ); | |
| | | | |
| // Note that this call checks for interrupt, and thus can throw
if interrupt flag is set | | // Note that this call checks for interrupt, and thus can throw
if interrupt flag is set | |
| staticYield(micros, record); | | staticYield(micros, record); | |
| | | | |
|
| | | // staticYield does not check for interrupt after regaining the | |
| | | lock, but the | |
| | | // bg index building interrupt code is depending on this behavi | |
| | | or. Otherwise, we may | |
| | | // return an unexpected error when restoreState(), below, notic | |
| | | es its cursor has been | |
| | | // invalidated. | |
| | | killCurrentOp.checkForInterrupt(); | |
| | | | |
| if ( runner->collection() ) { | | if ( runner->collection() ) { | |
| // if the runner was killed, runner->collection() will retu
rn NULL | | // if the runner was killed, runner->collection() will retu
rn NULL | |
| // so we don't deregister as it was done when killed | | // so we don't deregister as it was done when killed | |
| runner->collection()->cursorCache()->deregisterRunner( _run
nerYielding ); | | runner->collection()->cursorCache()->deregisterRunner( _run
nerYielding ); | |
| } | | } | |
| _runnerYielding = NULL; | | _runnerYielding = NULL; | |
| _elapsedTracker.resetLastTime(); | | _elapsedTracker.resetLastTime(); | |
| return runner->restoreState(); | | return runner->restoreState(); | |
| } | | } | |
| | | | |
| | | | |
End of changes. 2 change blocks. |
| 0 lines changed or deleted | | 10 lines changed or added | |
|