diff --git a/include/lucene++/AbstractAllTermDocs.h b/include/lucene++/AbstractAllTermDocs.h index f6100403..eb6342ea 100644 --- a/include/lucene++/AbstractAllTermDocs.h +++ b/include/lucene++/AbstractAllTermDocs.h @@ -32,7 +32,7 @@ class LPPAPI AbstractAllTermDocs : public TermDocs, public LuceneObject { virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); virtual bool isDeleted(int32_t doc) = 0; diff --git a/include/lucene++/Array.h b/include/lucene++/Array.h index 7dc30234..f868341e 100644 --- a/include/lucene++/Array.h +++ b/include/lucene++/Array.h @@ -100,7 +100,6 @@ class Array { } TYPE& operator[] (int32_t i) const { - BOOST_ASSERT(i >= 0 && i < array->size); return array->data[i]; } diff --git a/include/lucene++/BooleanScorer.h b/include/lucene++/BooleanScorer.h index fab5adae..9009e191 100644 --- a/include/lucene++/BooleanScorer.h +++ b/include/lucene++/BooleanScorer.h @@ -46,6 +46,7 @@ class LPPAPI BooleanScorer : public Scorer { int32_t minNrShouldMatch; int32_t end; BucketPtr current; + Bucket* __current = nullptr; int32_t doc; protected: @@ -71,8 +72,10 @@ class BooleanScorerCollector : public Collector { protected: BucketTableWeakPtr _bucketTable; + BucketTable* __bucketTable = nullptr; int32_t mask; ScorerWeakPtr _scorer; + Scorer* __scorer = nullptr; public: virtual void collect(int32_t doc); @@ -121,6 +124,7 @@ class Bucket : public LuceneObject { int32_t bits; // used for bool constraints int32_t coord; // count of terms in score BucketWeakPtr _next; // next valid bucket + Bucket* __next = nullptr; // next valid bucket }; /// A simple hash table of document scores within a range. @@ -137,6 +141,7 @@ class BucketTable : public LuceneObject { Collection buckets; BucketPtr first; // head of valid list + Bucket* __first = nullptr; // head of valid list public: CollectorPtr newCollector(int32_t mask); diff --git a/include/lucene++/BufferedIndexInput.h b/include/lucene++/BufferedIndexInput.h index e71b514f..6ebe4083 100644 --- a/include/lucene++/BufferedIndexInput.h +++ b/include/lucene++/BufferedIndexInput.h @@ -30,12 +30,18 @@ class LPPAPI BufferedIndexInput : public IndexInput { int32_t bufferLength; // end of valid bytes int32_t bufferPosition; // next byte to read ByteArray buffer; + decltype(buffer.get()) __buffer; public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); + /// Reads an int stored in variable-length format. Reads between one and five + /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. + /// @see IndexOutput#writeVInt(int32_t) + virtual int32_t readVInt(); + /// Change the buffer size used by this IndexInput. void setBufferSize(int32_t newSize); diff --git a/include/lucene++/Collection.h b/include/lucene++/Collection.h index 2721503d..0263f3f1 100644 --- a/include/lucene++/Collection.h +++ b/include/lucene++/Collection.h @@ -193,6 +193,10 @@ class Collection : public LuceneSync { bool operator!= (const this_type& other) { return (container != other.container); } + + collection_type* get() { + return container.get(); + } }; template diff --git a/include/lucene++/DirectoryReader.h b/include/lucene++/DirectoryReader.h index 0dd2753c..dfe19441 100644 --- a/include/lucene++/DirectoryReader.h +++ b/include/lucene++/DirectoryReader.h @@ -262,7 +262,7 @@ class MultiTermDocs : public TermPositions, public LuceneObject { /// Attempts to read multiple entries from the enumeration, up to length of docs. /// Optimized implementation. - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); /// Skips entries to the first beyond the current whose document number is greater than or equal to target. virtual bool skipTo(int32_t target); diff --git a/include/lucene++/FilterIndexReader.h b/include/lucene++/FilterIndexReader.h index ebf58528..b04c0234 100644 --- a/include/lucene++/FilterIndexReader.h +++ b/include/lucene++/FilterIndexReader.h @@ -93,7 +93,7 @@ class LPPAPI FilterTermDocs : public TermPositions, public LuceneObject { virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); }; diff --git a/include/lucene++/LuceneFactory.h b/include/lucene++/LuceneFactory.h index b6a56264..bf4bd68c 100644 --- a/include/lucene++/LuceneFactory.h +++ b/include/lucene++/LuceneFactory.h @@ -104,7 +104,11 @@ boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 co template boost::shared_ptr newLucene() { - boost::shared_ptr instance(newInstance()); +#if BOOST_VERSION <= 103800 + boost::shared_ptr instance = boost::shared_ptr(new T); +#else + boost::shared_ptr instance = boost::make_shared(); +#endif instance->initialize(); return instance; } diff --git a/include/lucene++/MiscUtils.h b/include/lucene++/MiscUtils.h index 06286bd5..98572248 100644 --- a/include/lucene++/MiscUtils.h +++ b/include/lucene++/MiscUtils.h @@ -132,6 +132,14 @@ class LPPAPI MiscUtils { static int32_t unsignedShift(int32_t num, int32_t shift); }; +inline int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) { + return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1); +} + +inline int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) { + return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1); +} + } #endif diff --git a/include/lucene++/MultipleTermPositions.h b/include/lucene++/MultipleTermPositions.h index 8e3e9360..f61dade1 100644 --- a/include/lucene++/MultipleTermPositions.h +++ b/include/lucene++/MultipleTermPositions.h @@ -41,7 +41,7 @@ class LPPAPI MultipleTermPositions : public TermPositions, public LuceneObject { virtual void seek(const TermEnumPtr& termEnum); /// Not implemented. - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); /// Not implemented. virtual ByteArray getPayload(ByteArray data, int32_t offset); diff --git a/include/lucene++/PhrasePositions.h b/include/lucene++/PhrasePositions.h index 0ef67d82..84c5d058 100644 --- a/include/lucene++/PhrasePositions.h +++ b/include/lucene++/PhrasePositions.h @@ -25,7 +25,7 @@ class PhrasePositions : public LuceneObject { int32_t count; // remaining pos in this doc int32_t offset; // position in phrase TermPositionsPtr tp; // stream of positions - PhrasePositionsPtr _next; // used to make lists + PhrasePositions* __next = nullptr; // used to make lists bool repeats; // there's other pp for same term (eg. query="1st word 2nd word"~1) public: diff --git a/include/lucene++/PhraseQueue.h b/include/lucene++/PhraseQueue.h index ef8b4074..c2c00f0e 100644 --- a/include/lucene++/PhraseQueue.h +++ b/include/lucene++/PhraseQueue.h @@ -10,8 +10,10 @@ #include "PriorityQueue.h" namespace Lucene { +// raw pointer +typedef PhrasePositions* PhrasePositionsStar; -class PhraseQueue : public PriorityQueue { +class PhraseQueue : public PriorityQueue { public: PhraseQueue(int32_t size); virtual ~PhraseQueue(); @@ -19,7 +21,8 @@ class PhraseQueue : public PriorityQueue { LUCENE_CLASS(PhraseQueue); protected: - virtual bool lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second); + virtual bool lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second); + }; } diff --git a/include/lucene++/PhraseScorer.h b/include/lucene++/PhraseScorer.h index c36e3ce3..cddb5701 100644 --- a/include/lucene++/PhraseScorer.h +++ b/include/lucene++/PhraseScorer.h @@ -8,6 +8,7 @@ #define PHRASESCORER_H #include "Scorer.h" +#include namespace Lucene { @@ -27,14 +28,16 @@ class PhraseScorer : public Scorer { protected: WeightPtr weight; + Weight* __weight = nullptr; ByteArray norms; double value; bool firstTime; bool more; PhraseQueuePtr pq; - PhrasePositionsPtr first; - PhrasePositionsPtr last; + std::vector _holds; + PhrasePositions* __first = nullptr; + PhrasePositions* __last = nullptr; double freq; // phrase frequency in current doc as computed by phraseFreq(). diff --git a/include/lucene++/SegmentTermDocs.h b/include/lucene++/SegmentTermDocs.h index 8bc20226..0eb92346 100644 --- a/include/lucene++/SegmentTermDocs.h +++ b/include/lucene++/SegmentTermDocs.h @@ -20,10 +20,13 @@ class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject { protected: SegmentReaderWeakPtr _parent; + SegmentReader* __parent; IndexInputPtr _freqStream; + IndexInput* __freqStream; int32_t count; int32_t df; BitVectorPtr deletedDocs; + BitVector* __deletedDocs; int32_t _doc; int32_t _freq; @@ -61,7 +64,7 @@ class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject { virtual bool next(); /// Optimized implementation. - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); /// Optimized implementation. virtual bool skipTo(int32_t target); @@ -72,7 +75,7 @@ class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject { protected: virtual void skippingDoc(); - virtual int32_t readNoTf(Collection docs, Collection freqs, int32_t length); + virtual int32_t readNoTf(Collection& docs, Collection& freqs, int32_t length); /// Overridden by SegmentTermPositions to skip in prox stream. virtual void skipProx(int64_t proxPointer, int32_t payloadLength); diff --git a/include/lucene++/SegmentTermPositions.h b/include/lucene++/SegmentTermPositions.h index d62ddc67..2e10d15b 100644 --- a/include/lucene++/SegmentTermPositions.h +++ b/include/lucene++/SegmentTermPositions.h @@ -46,7 +46,7 @@ class LPPAPI SegmentTermPositions : public SegmentTermDocs { virtual bool next(); /// Not supported - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); /// Returns the length of the payload at the current term position. virtual int32_t getPayloadLength(); diff --git a/include/lucene++/Similarity.h b/include/lucene++/Similarity.h index d34f8aa3..ac55bff3 100644 --- a/include/lucene++/Similarity.h +++ b/include/lucene++/Similarity.h @@ -434,8 +434,8 @@ class LPPAPI Similarity : public LuceneObject { protected: static const int32_t NO_DOC_ID_PROVIDED; -protected: - static const Collection NORM_TABLE(); +public: + static const Collection NORM_TABLE; public: /// Return the default Similarity implementation used by indexing and search code. @@ -450,7 +450,7 @@ class LPPAPI Similarity : public LuceneObject { /// Returns a table for decoding normalization bytes. /// @see #encodeNorm(double) - static const Collection getNormDecoder(); + static const Collection& getNormDecoder(); /// Compute the normalization value for a field, given the accumulated state of term processing for this /// field (see {@link FieldInvertState}). diff --git a/include/lucene++/SloppyPhraseScorer.h b/include/lucene++/SloppyPhraseScorer.h index 5463b26a..eff7aabc 100644 --- a/include/lucene++/SloppyPhraseScorer.h +++ b/include/lucene++/SloppyPhraseScorer.h @@ -20,8 +20,8 @@ class SloppyPhraseScorer : public PhraseScorer { protected: int32_t slop; - Collection repeats; - Collection tmpPos; // for flipping repeating pps + Collection repeats; + Collection tmpPos; // for flipping repeating pps bool checkedRepeats; public: @@ -42,7 +42,7 @@ class SloppyPhraseScorer : public PhraseScorer { protected: /// Flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. /// Assumes: pp!=pp2, pp2 in pq, pp not in pq. Called only when there are repeating pps. - PhrasePositionsPtr flip(const PhrasePositionsPtr& pp, const PhrasePositionsPtr& pp2); + PhrasePositions* flip(PhrasePositions* pp, PhrasePositions* pp2); /// Init PhrasePositions in place. /// There is a one time initialization for this scorer: @@ -61,7 +61,7 @@ class SloppyPhraseScorer : public PhraseScorer { /// of the same word would go elsewhere in the matched doc. /// @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions out of the first /// two PPs found to not differ. - PhrasePositionsPtr termPositionsDiffer(const PhrasePositionsPtr& pp); + PhrasePositions* termPositionsDiffer(PhrasePositions* pp); }; } diff --git a/include/lucene++/TermDocs.h b/include/lucene++/TermDocs.h index 6357335a..2bff90ea 100644 --- a/include/lucene++/TermDocs.h +++ b/include/lucene++/TermDocs.h @@ -44,7 +44,7 @@ class LPPAPI TermDocs { /// Attempts to read multiple entries from the enumeration, up to length of docs. Document numbers are stored /// in docs, and term frequencies are stored in freqs. Returns the number of entries read. Zero is only /// returned when the stream has been exhausted. - virtual int32_t read(Collection docs, Collection freqs) = 0; + virtual int32_t read(Collection& docs, Collection& freqs) = 0; /// Skips entries to the first beyond the current whose document number is greater than or equal to target. /// Returns true if there is such an entry. diff --git a/include/lucene++/TermScorer.h b/include/lucene++/TermScorer.h index f76ebf18..cfb9c00e 100644 --- a/include/lucene++/TermScorer.h +++ b/include/lucene++/TermScorer.h @@ -27,13 +27,16 @@ class LPPAPI TermScorer : public Scorer { protected: WeightPtr weight; - TermDocsPtr termDocs; + TermDocsPtr termDocs; // for malloc and free + TermDocs* __termDocs; // for work, ByteArray norms; double weightValue; int32_t doc; Collection docs; // buffered doc numbers + decltype(docs.get()) __docs; // Collection freqs; // buffered term freqs + decltype(freqs.get()) __freqs; // int32_t freq; int32_t pointer; @@ -70,7 +73,7 @@ class LPPAPI TermScorer : public Scorer { } protected: - static const Collection SIM_NORM_DECODER(); + static const Collection& SIM_NORM_DECODER(); virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); }; diff --git a/include/lucene++/TopScoreDocCollector.h b/include/lucene++/TopScoreDocCollector.h index 0fb8a69c..a43fe804 100644 --- a/include/lucene++/TopScoreDocCollector.h +++ b/include/lucene++/TopScoreDocCollector.h @@ -29,6 +29,7 @@ class LPPAPI TopScoreDocCollector : public TopDocsCollector { ScoreDocPtr pqTop; int32_t docBase; ScorerWeakPtr _scorer; + Scorer* __scorer; public: /// Creates a new {@link TopScoreDocCollector} given the number of hits to collect and whether documents diff --git a/src/contrib/include/MemoryIndex.h b/src/contrib/include/MemoryIndex.h index d6cdd1e5..8efc5282 100644 --- a/src/contrib/include/MemoryIndex.h +++ b/src/contrib/include/MemoryIndex.h @@ -323,7 +323,7 @@ class LPPCONTRIBAPI MemoryIndexTermPositions : public TermPositions, public Luce virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); diff --git a/src/contrib/memory/MemoryIndex.cpp b/src/contrib/memory/MemoryIndex.cpp index 9c17ddaf..07372e82 100644 --- a/src/contrib/memory/MemoryIndex.cpp +++ b/src/contrib/memory/MemoryIndex.cpp @@ -569,7 +569,7 @@ bool MemoryIndexTermPositions::next() { return _next; } -int32_t MemoryIndexTermPositions::read(Collection docs, Collection freqs) { +int32_t MemoryIndexTermPositions::read(Collection& docs, Collection& freqs) { if (!hasNext) { return 0; } diff --git a/src/core/include/_ParallelReader.h b/src/core/include/_ParallelReader.h index d308c4fa..53f6f599 100644 --- a/src/core/include/_ParallelReader.h +++ b/src/core/include/_ParallelReader.h @@ -61,7 +61,7 @@ class ParallelTermDocs : public TermPositions, public LuceneObject { virtual void seek(const TermPtr& term); virtual void seek(const TermEnumPtr& termEnum); virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); + virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); }; diff --git a/src/core/index/AbstractAllTermDocs.cpp b/src/core/index/AbstractAllTermDocs.cpp index ea6e6acf..aa6edccf 100644 --- a/src/core/index/AbstractAllTermDocs.cpp +++ b/src/core/index/AbstractAllTermDocs.cpp @@ -41,7 +41,7 @@ bool AbstractAllTermDocs::next() { return skipTo(_doc + 1); } -int32_t AbstractAllTermDocs::read(Collection docs, Collection freqs) { +int32_t AbstractAllTermDocs::read(Collection& docs, Collection& freqs) { int32_t length = docs.size(); int32_t i = 0; while (i < length && _doc < maxDoc) { diff --git a/src/core/index/DirectoryReader.cpp b/src/core/index/DirectoryReader.cpp index c59bb404..8704c32c 100644 --- a/src/core/index/DirectoryReader.cpp +++ b/src/core/index/DirectoryReader.cpp @@ -984,7 +984,7 @@ bool MultiTermDocs::next() { } } -int32_t MultiTermDocs::read(Collection docs, Collection freqs) { +int32_t MultiTermDocs::read(Collection& docs, Collection& freqs) { while (true) { while (!current) { if (pointer < readers.size()) { // try next segment diff --git a/src/core/index/FilterIndexReader.cpp b/src/core/index/FilterIndexReader.cpp index 8dbcf931..81bdfa42 100644 --- a/src/core/index/FilterIndexReader.cpp +++ b/src/core/index/FilterIndexReader.cpp @@ -194,7 +194,7 @@ bool FilterTermDocs::next() { return in->next(); } -int32_t FilterTermDocs::read(Collection docs, Collection freqs) { +int32_t FilterTermDocs::read(Collection& docs, Collection& freqs) { return in->read(docs, freqs); } diff --git a/src/core/index/MultipleTermPositions.cpp b/src/core/index/MultipleTermPositions.cpp index f5c5a1ba..afed29dd 100644 --- a/src/core/index/MultipleTermPositions.cpp +++ b/src/core/index/MultipleTermPositions.cpp @@ -98,7 +98,7 @@ void MultipleTermPositions::seek(const TermEnumPtr& termEnum) { boost::throw_exception(UnsupportedOperationException()); } -int32_t MultipleTermPositions::read(Collection docs, Collection freqs) { +int32_t MultipleTermPositions::read(Collection& docs, Collection& freqs) { boost::throw_exception(UnsupportedOperationException()); return 0; } diff --git a/src/core/index/ParallelReader.cpp b/src/core/index/ParallelReader.cpp index fb258e1f..80368b66 100644 --- a/src/core/index/ParallelReader.cpp +++ b/src/core/index/ParallelReader.cpp @@ -488,7 +488,7 @@ bool ParallelTermDocs::next() { return termDocs ? termDocs->next() : false; } -int32_t ParallelTermDocs::read(Collection docs, Collection freqs) { +int32_t ParallelTermDocs::read(Collection& docs, Collection& freqs) { return termDocs ? termDocs->read(docs, freqs) : 0; } diff --git a/src/core/index/SegmentTermDocs.cpp b/src/core/index/SegmentTermDocs.cpp index 9e49ec8c..7e17b9ef 100644 --- a/src/core/index/SegmentTermDocs.cpp +++ b/src/core/index/SegmentTermDocs.cpp @@ -38,16 +38,19 @@ SegmentTermDocs::SegmentTermDocs(const SegmentReaderPtr& parent) { { SyncLock parentLock(parent); this->deletedDocs = parent->deletedDocs; + this->__deletedDocs = this->deletedDocs.get(); } this->skipInterval = parent->core->getTermsReader()->getSkipInterval(); this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels(); + this->__parent = parent.get(); + this->__freqStream = _freqStream.get(); } SegmentTermDocs::~SegmentTermDocs() { } void SegmentTermDocs::seek(const TermPtr& term) { - TermInfoPtr ti(SegmentReaderPtr(_parent)->core->getTermsReader()->get(term)); + TermInfoPtr ti(__parent->core->getTermsReader()->get(term)); seek(ti, term); } @@ -56,15 +59,14 @@ void SegmentTermDocs::seek(const TermEnumPtr& termEnum) { TermPtr term; SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast(termEnum)); - SegmentReaderPtr parent(_parent); // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs - if (segmentTermEnum && segmentTermEnum->fieldInfos == parent->core->fieldInfos) { // optimized case + if (segmentTermEnum && segmentTermEnum->fieldInfos == __parent->core->fieldInfos) { // optimized case term = segmentTermEnum->term(); ti = segmentTermEnum->termInfo(); } else { // punt case term = termEnum->term(); - ti = parent->core->getTermsReader()->get(term); + ti = __parent->core->getTermsReader()->get(term); } seek(ti, term); @@ -72,7 +74,7 @@ void SegmentTermDocs::seek(const TermEnumPtr& termEnum) { void SegmentTermDocs::seek(const TermInfoPtr& ti, const TermPtr& term) { count = 0; - FieldInfoPtr fi(SegmentReaderPtr(_parent)->core->fieldInfos->fieldInfo(term->_field)); + FieldInfoPtr fi(__parent->core->fieldInfos->fieldInfo(term->_field)); currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false; currentFieldStoresPayloads = fi ? fi->storePayloads : false; if (!ti) { @@ -83,13 +85,13 @@ void SegmentTermDocs::seek(const TermInfoPtr& ti, const TermPtr& term) { freqBasePointer = ti->freqPointer; proxBasePointer = ti->proxPointer; skipPointer = freqBasePointer + ti->skipOffset; - _freqStream->seek(freqBasePointer); + __freqStream->seek(freqBasePointer); haveSkipped = false; } } void SegmentTermDocs::close() { - _freqStream->close(); + __freqStream->close(); if (skipListReader) { skipListReader->close(); } @@ -111,7 +113,7 @@ bool SegmentTermDocs::next() { if (count == df) { return false; } - int32_t docCode = _freqStream->readVInt(); + int32_t docCode = __freqStream->readVInt(); if (currentFieldOmitTermFreqAndPositions) { _doc += docCode; @@ -121,13 +123,13 @@ bool SegmentTermDocs::next() { if ((docCode & 1) != 0) { // if low bit is set _freq = 1; // freq is one } else { - _freq = _freqStream->readVInt(); // else read freq + _freq = __freqStream->readVInt(); // else read freq } } ++count; - if (!deletedDocs || !deletedDocs->get(_doc)) { + if (!__deletedDocs || !__deletedDocs->get(_doc)) { break; } skippingDoc(); @@ -135,26 +137,28 @@ bool SegmentTermDocs::next() { return true; } -int32_t SegmentTermDocs::read(Collection docs, Collection freqs) { - int32_t length = docs.size(); +int32_t SegmentTermDocs::read(Collection& docs, Collection& freqs) { + auto* __docs = docs.get(); + auto* __freqs = freqs.get(); + int32_t length = __docs->size(); if (currentFieldOmitTermFreqAndPositions) { return readNoTf(docs, freqs, length); } else { int32_t i = 0; while (i < length && count < df) { // manually inlined call to next() for speed - int32_t docCode = _freqStream->readVInt(); + int32_t docCode = __freqStream->readVInt(); _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit if ((docCode & 1) != 0) { // if low bit is set _freq = 1; // freq is one } else { - _freq = _freqStream->readVInt(); // else read freq + _freq = __freqStream->readVInt(); // else read freq } ++count; - if (!deletedDocs || !deletedDocs->get(_doc)) { - docs[i] = _doc; - freqs[i] = _freq; + if (!__deletedDocs || !__deletedDocs->get(_doc)) { + (*__docs)[i] = _doc; + (*__freqs)[i] = _freq; ++i; } } @@ -162,14 +166,14 @@ int32_t SegmentTermDocs::read(Collection docs, Collection freq } } -int32_t SegmentTermDocs::readNoTf(Collection docs, Collection freqs, int32_t length) { +int32_t SegmentTermDocs::readNoTf(Collection& docs, Collection& freqs, int32_t length) { int32_t i = 0; while (i < length && count < df) { // manually inlined call to next() for speed - _doc += _freqStream->readVInt(); + _doc += __freqStream->readVInt(); ++count; - if (!deletedDocs || !deletedDocs->get(_doc)) { + if (!__deletedDocs || !__deletedDocs->get(_doc)) { docs[i] = _doc; // Hardware freq to 1 when term freqs were not stored in the index @@ -186,7 +190,7 @@ void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) { bool SegmentTermDocs::skipTo(int32_t target) { if (df >= skipInterval) { // optimized case if (!skipListReader) { - skipListReader = newLucene(boost::dynamic_pointer_cast(_freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone + skipListReader = newLucene(boost::dynamic_pointer_cast(__freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone } if (!haveSkipped) { // lazily initialize skip stream @@ -196,7 +200,7 @@ bool SegmentTermDocs::skipTo(int32_t target) { int32_t newCount = skipListReader->skipTo(target); if (newCount > count) { - _freqStream->seek(skipListReader->getFreqPointer()); + __freqStream->seek(skipListReader->getFreqPointer()); skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength()); _doc = skipListReader->getDoc(); @@ -219,6 +223,7 @@ IndexInputPtr SegmentTermDocs::freqStream() { void SegmentTermDocs::freqStream(const IndexInputPtr& freqStream) { _freqStream = freqStream; + __freqStream = freqStream.get(); } } diff --git a/src/core/index/SegmentTermPositions.cpp b/src/core/index/SegmentTermPositions.cpp index 8d1296c8..ddf90553 100644 --- a/src/core/index/SegmentTermPositions.cpp +++ b/src/core/index/SegmentTermPositions.cpp @@ -88,7 +88,7 @@ bool SegmentTermPositions::next() { return false; } -int32_t SegmentTermPositions::read(Collection docs, Collection freqs) { +int32_t SegmentTermPositions::read(Collection& docs, Collection& freqs) { boost::throw_exception(UnsupportedOperationException(L"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.")); return 0; } diff --git a/src/core/index/TermDocs.cpp b/src/core/index/TermDocs.cpp index 8ce3c037..1c27896a 100644 --- a/src/core/index/TermDocs.cpp +++ b/src/core/index/TermDocs.cpp @@ -37,7 +37,7 @@ bool TermDocs::next() { return false; // override } -int32_t TermDocs::read(Collection docs, Collection freqs) { +int32_t TermDocs::read(Collection& docs, Collection& freqs) { BOOST_ASSERT(false); return 0; // override } diff --git a/src/core/search/BooleanScorer.cpp b/src/core/search/BooleanScorer.cpp index 8de17dd1..ace799d0 100644 --- a/src/core/search/BooleanScorer.cpp +++ b/src/core/search/BooleanScorer.cpp @@ -52,38 +52,39 @@ BooleanScorer::~BooleanScorer() { bool BooleanScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { bool more = false; - BucketPtr tmp; + Bucket* __tmp; BucketScorerPtr bs(newLucene()); // The internal loop will set the score and doc before calling collect. collector->setScorer(bs); do { - bucketTable->first.reset(); + bucketTable->__first = nullptr; - while (current) { // more queued + while (__current) { // more queued // check prohibited & required - if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask) { - if (current->doc >= max) { - tmp = current; - current = current->_next.lock(); - tmp->_next = bucketTable->first; - bucketTable->first = tmp; + if ((__current->bits & prohibitedMask) == 0 && (__current->bits & requiredMask) == requiredMask) { + if (__current->doc >= max) { + __tmp = __current; + __current = __current->__next; + __tmp->__next = bucketTable->__first; + bucketTable->__first = __tmp; continue; } - if (current->coord >= minNrShouldMatch) { - bs->_score = current->score * coordFactors[current->coord]; - bs->doc = current->doc; - bs->freq = current->coord; - collector->collect(current->doc); + if (__current->coord >= minNrShouldMatch) { + auto s = coordFactors.size(); + bs->_score = __current->score * coordFactors[__current->coord]; + bs->doc = __current->doc; + bs->freq = __current->coord; + collector->collect(__current->doc); } } - current = current->_next.lock(); // pop the queue + __current = __current->__next; // pop the queue } - if (bucketTable->first) { - current = bucketTable->first; - bucketTable->first = current->_next.lock(); + if (bucketTable->__first) { + __current = bucketTable->__first; + bucketTable->__first = __current->__next; return true; } @@ -99,8 +100,8 @@ bool BooleanScorer::score(const CollectorPtr& collector, int32_t max, int32_t fi } } } - current = bucketTable->first; - } while (current || more); + __current = bucketTable->__first; + } while (__current || more); return false; } @@ -117,13 +118,13 @@ int32_t BooleanScorer::docID() { int32_t BooleanScorer::nextDoc() { bool more = false; do { - while (bucketTable->first) { // more queued - current = bucketTable->first; - bucketTable->first = current->_next.lock(); // pop the queue + while (bucketTable->__first) { // more queued + __current = bucketTable->__first; + bucketTable->__first = __current->__next; // pop the queue // check prohibited & required and minNrShouldMatch - if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask && current->coord >= minNrShouldMatch) { - doc = current->doc; + if ((__current->bits & prohibitedMask) == 0 && (__current->bits & requiredMask) == requiredMask && __current->coord >= minNrShouldMatch) { + doc = __current->doc; return doc; } } @@ -144,14 +145,14 @@ int32_t BooleanScorer::nextDoc() { more = true; } } - } while (bucketTable->first || more); + } while (bucketTable->__first || more); doc = NO_MORE_DOCS; return doc; } -double BooleanScorer::score() { - return current->score * coordFactors[current->coord]; +inline double BooleanScorer::score() { + return __current->score * coordFactors[__current->coord]; } void BooleanScorer::score(const CollectorPtr& collector) { @@ -171,32 +172,32 @@ String BooleanScorer::toString() { BooleanScorerCollector::BooleanScorerCollector(int32_t mask, const BucketTablePtr& bucketTable) { this->mask = mask; this->_bucketTable = bucketTable; + this->__bucketTable = bucketTable.get(); } BooleanScorerCollector::~BooleanScorerCollector() { } void BooleanScorerCollector::collect(int32_t doc) { - BucketTablePtr table(_bucketTable); + auto* table = __bucketTable; int32_t i = doc & BucketTable::MASK; - BucketPtr bucket(table->buckets[i]); + auto& bucket = table->buckets[i]; if (!bucket) { bucket = newLucene(); - table->buckets[i] = bucket; } - - if (bucket->doc != doc) { // invalid bucket - bucket->doc = doc; // set doc - bucket->score = ScorerPtr(_scorer)->score(); // initialize score - bucket->bits = mask; // initialize mask - bucket->coord = 1; // initialize coord - - bucket->_next = table->first; // push onto valid list - table->first = bucket; + auto* __bucket = bucket.get(); + if (__bucket->doc != doc) { // invalid bucket + __bucket->doc = doc; // set doc + __bucket->score = __scorer->score(); // initialize score + __bucket->bits = mask; // initialize mask + __bucket->coord = 1; // initialize coord + + __bucket->__next = table->__first; // push onto valid list + table->__first = __bucket; } else { - bucket->score += ScorerPtr(_scorer)->score(); // increment score - bucket->bits |= mask; // add bits in mask - ++bucket->coord; // increment coord + __bucket->score += __scorer->score(); // increment score + __bucket->bits |= mask; // add bits in mask + ++__bucket->coord; // increment coord } } @@ -206,6 +207,7 @@ void BooleanScorerCollector::setNextReader(const IndexReaderPtr& reader, int32_t void BooleanScorerCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; + this->__scorer = scorer.get(); } bool BooleanScorerCollector::acceptsDocsOutOfOrder() { diff --git a/src/core/search/ConjunctionScorer.cpp b/src/core/search/ConjunctionScorer.cpp index 86d9f73d..aeacaa73 100644 --- a/src/core/search/ConjunctionScorer.cpp +++ b/src/core/search/ConjunctionScorer.cpp @@ -67,9 +67,10 @@ ConjunctionScorer::~ConjunctionScorer() { int32_t ConjunctionScorer::doNext() { int32_t first = 0; int32_t doc = scorers[scorers.size() - 1]->docID(); - ScorerPtr firstScorer; - while ((firstScorer = scorers[first])->docID() < doc) { - doc = firstScorer->advance(doc); + Scorer* __firstScorer; + // TODO: __firstScore nullptr ?? + while ((__firstScorer = scorers[first].get())->docID() < doc) { + doc = __firstScorer->advance(doc); first = first == scorers.size() - 1 ? 0 : first + 1; } return doc; @@ -78,14 +79,16 @@ int32_t ConjunctionScorer::doNext() { int32_t ConjunctionScorer::advance(int32_t target) { if (lastDoc == NO_MORE_DOCS) { return lastDoc; - } else if (scorers[(scorers.size() - 1)]->docID() < target) { - scorers[(scorers.size() - 1)]->advance(target); + } + auto& scorer = scorers[(scorers.size() - 1)]; + if (scorer->docID() < target) { + scorer->advance(target); } lastDoc = doNext(); return lastDoc; } -int32_t ConjunctionScorer::docID() { +inline int32_t ConjunctionScorer::docID() { return lastDoc; } @@ -103,8 +106,8 @@ int32_t ConjunctionScorer::nextDoc() { double ConjunctionScorer::score() { double sum = 0.0; - for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) { - sum += (*scorer)->score(); + for (auto& scorer : scorers){ + sum += scorer->score(); } return sum * coord; } diff --git a/src/core/search/DefaultSimilarity.cpp b/src/core/search/DefaultSimilarity.cpp index da4c3db2..c98f2d95 100644 --- a/src/core/search/DefaultSimilarity.cpp +++ b/src/core/search/DefaultSimilarity.cpp @@ -27,35 +27,35 @@ double DefaultSimilarity::computeNorm(const String& fieldName, const FieldInvert return (state->getBoost() * lengthNorm(fieldName, numTerms)); } -double DefaultSimilarity::lengthNorm(const String& fieldName, int32_t numTokens) { +inline double DefaultSimilarity::lengthNorm(const String& fieldName, int32_t numTokens) { return (double)(1.0 / std::sqrt((double)numTokens)); } -double DefaultSimilarity::queryNorm(double sumOfSquaredWeights) { +inline double DefaultSimilarity::queryNorm(double sumOfSquaredWeights) { return (double)(1.0 / std::sqrt(sumOfSquaredWeights)); } -double DefaultSimilarity::tf(double freq) { +inline double DefaultSimilarity::tf(double freq) { return (double)std::sqrt(freq); } -double DefaultSimilarity::sloppyFreq(int32_t distance) { +inline double DefaultSimilarity::sloppyFreq(int32_t distance) { return (1.0 / (double)(distance + 1)); } -double DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) { +inline double DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) { return (double)(std::log((double)numDocs / (double)(docFreq + 1)) + 1.0); } -double DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) { +inline double DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) { return (double)overlap / (double)maxOverlap; } -void DefaultSimilarity::setDiscountOverlaps(bool v) { +inline void DefaultSimilarity::setDiscountOverlaps(bool v) { discountOverlaps = v; } -bool DefaultSimilarity::getDiscountOverlaps() { +inline bool DefaultSimilarity::getDiscountOverlaps() { return discountOverlaps; } diff --git a/src/core/search/ExactPhraseScorer.cpp b/src/core/search/ExactPhraseScorer.cpp index 2304d187..cd5130c9 100644 --- a/src/core/search/ExactPhraseScorer.cpp +++ b/src/core/search/ExactPhraseScorer.cpp @@ -20,9 +20,9 @@ ExactPhraseScorer::~ExactPhraseScorer() { double ExactPhraseScorer::phraseFreq() { // sort list with pq pq->clear(); - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) { - pp->firstPosition(); - pq->add(pp); // build pq from list + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + __pp->firstPosition(); + pq->add(__pp); } pqToList(); // rebuild list from pq @@ -30,16 +30,16 @@ double ExactPhraseScorer::phraseFreq() { // times all PhrasePosition's have exactly the same position. int32_t freq = 0; do { - while (first->position < last->position) { // scan forward in first + while (__first->position < __last->position) { // scan forward in first do { - if (!first->nextPosition()) { + if (!__first->nextPosition()) { return freq; } - } while (first->position < last->position); + } while (__first->position < __last->position); firstToLast(); } ++freq; // all equal: a match - } while (last->nextPosition()); + } while (__last->nextPosition()); return freq; } diff --git a/src/core/search/PhraseQueue.cpp b/src/core/search/PhraseQueue.cpp index 16fff6e5..b88e1432 100644 --- a/src/core/search/PhraseQueue.cpp +++ b/src/core/search/PhraseQueue.cpp @@ -10,24 +10,27 @@ namespace Lucene { -PhraseQueue::PhraseQueue(int32_t size) : PriorityQueue(size) { +PhraseQueue::PhraseQueue(int32_t size) : PriorityQueue(size) { } PhraseQueue::~PhraseQueue() { } -bool PhraseQueue::lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second) { - if (first->doc == second->doc) { - if (first->position == second->position) { - // same doc and pp.position, so decide by actual term positions. - // rely on: pp.position == tp.position - offset. - return first->offset < second->offset; +inline bool PhraseQueue::lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second) { + if (first && second) { + if (first->doc == second->doc) { + if (first->position == second->position) { + // same doc and pp.position, so decide by actual term positions. + // rely on: pp.position == tp.position - offset. + return first->offset < second->offset; + } else { + return first->position < second->position; + } } else { - return first->position < second->position; + return first->doc < second->doc; } - } else { - return first->doc < second->doc; } + return first ? false : true; } } diff --git a/src/core/search/PhraseScorer.cpp b/src/core/search/PhraseScorer.cpp index 5211e9e2..15ec1428 100644 --- a/src/core/search/PhraseScorer.cpp +++ b/src/core/search/PhraseScorer.cpp @@ -27,23 +27,25 @@ PhraseScorer::PhraseScorer(const WeightPtr& weight, Collection // This allows to easily identify a matching (exact) phrase when all PhrasePositions have exactly the same position. for (int32_t i = 0; i < tps.size(); ++i) { PhrasePositionsPtr pp(newLucene(tps[i], offsets[i])); - if (last) { // add next to end of list - last->_next = pp; + auto* __pp = pp.get(); + if (__last) { // add next to end of list + __last->__next = __pp; } else { - first = pp; + __first = __pp; } - last = pp; + __last = __pp; + _holds.emplace_back(pp); } pq = newLucene(tps.size()); // construct empty pq - first->doc = -1; + __first->doc = -1; } PhraseScorer::~PhraseScorer() { } int32_t PhraseScorer::docID() { - return first->doc; + return __first->doc; } int32_t PhraseScorer::nextDoc() { @@ -51,18 +53,18 @@ int32_t PhraseScorer::nextDoc() { init(); firstTime = false; } else if (more) { - more = last->next(); // trigger further scanning + more = __last->next(); // trigger further scanning } if (!doNext()) { - first->doc = NO_MORE_DOCS; + __first->doc = NO_MORE_DOCS; } - return first->doc; + return __first->doc; } bool PhraseScorer::doNext() { while (more) { - while (more && first->doc < last->doc) { // find doc with all the terms - more = first->skipTo(last->doc); // skip first upto last and move it to the end + while (more && __first->doc < __last->doc) { // find doc with all the terms + more = __first->skipTo(__last->doc); // skip first upto last and move it to the end firstToLast(); } @@ -70,7 +72,7 @@ bool PhraseScorer::doNext() { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0) { // no match - more = last->next(); // trigger further scanning + more = __last->next(); // trigger further scanning } else { return true; } @@ -81,21 +83,21 @@ bool PhraseScorer::doNext() { double PhraseScorer::score() { double raw = getSimilarity()->tf(freq) * value; // raw score - return !norms ? raw : raw * Similarity::decodeNorm(norms[first->doc]); // normalize + return !norms ? raw : raw * Similarity::decodeNorm(norms[__first->doc]); // normalize } int32_t PhraseScorer::advance(int32_t target) { firstTime = false; - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) { - more = pp->skipTo(target); + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + more = __pp->skipTo(target); } if (more) { sort(); // re-sort } if (!doNext()) { - first->doc = NO_MORE_DOCS; + __first->doc = NO_MORE_DOCS; } - return first->doc; + return __first->doc; } double PhraseScorer::currentFreq() { @@ -103,8 +105,8 @@ double PhraseScorer::currentFreq() { } void PhraseScorer::init() { - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) { - more = pp->next(); + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + more = __pp->next(); } if (more) { sort(); @@ -113,32 +115,32 @@ void PhraseScorer::init() { void PhraseScorer::sort() { pq->clear(); - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) { - pq->add(pp); + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + pq->add(__pp); } pqToList(); } void PhraseScorer::pqToList() { - last.reset(); - first.reset(); + __last = nullptr; + __first = nullptr; while (pq->top()) { - PhrasePositionsPtr pp(pq->pop()); - if (last) { // add next to end of list - last->_next = pp; + auto* __pp = pq->pop(); + if (__last) { // add next to end of list + __last->__next = __pp; } else { - first = pp; + __first = __pp; } - last = pp; - pp->_next.reset(); + __last = __pp; + __pp->__next = nullptr; } } void PhraseScorer::firstToLast() { - last->_next = first; // move first to end of list - last = first; - first = first->_next; - last->_next.reset(); + __last->__next = __first; // move first to end of list + __last = __first; + __first = __first->__next; + __last->__next = nullptr; } String PhraseScorer::toString() { diff --git a/src/core/search/Scorer.cpp b/src/core/search/Scorer.cpp index 20ea35d6..36a0517a 100644 --- a/src/core/search/Scorer.cpp +++ b/src/core/search/Scorer.cpp @@ -24,7 +24,7 @@ namespace Lucene { SimilarityPtr Scorer::getSimilarity() { return similarity; } - + void Scorer::score(const CollectorPtr& collector) { collector->setScorer(shared_from_this()); int32_t doc; diff --git a/src/core/search/Similarity.cpp b/src/core/search/Similarity.cpp index 302f3fb8..4a11b466 100644 --- a/src/core/search/Similarity.cpp +++ b/src/core/search/Similarity.cpp @@ -33,7 +33,7 @@ SimilarityPtr Similarity::getDefault() { return defaultImpl; } -const Collection Similarity::NORM_TABLE() { +static const Collection GEN_NORM_TABLE() { static Collection _NORM_TABLE; if (!_NORM_TABLE) { _NORM_TABLE = Collection::newInstance(256); @@ -44,12 +44,14 @@ const Collection Similarity::NORM_TABLE() { return _NORM_TABLE; } +const Collection Similarity::NORM_TABLE = GEN_NORM_TABLE(); + double Similarity::decodeNorm(uint8_t b) { - return NORM_TABLE()[b & 0xff]; // & 0xff maps negative bytes to positive above 127 + return NORM_TABLE[b & 0xff]; // & 0xff maps negative bytes to positive above 127 } -const Collection Similarity::getNormDecoder() { - return NORM_TABLE(); +const Collection& Similarity::getNormDecoder() { + return NORM_TABLE; } double Similarity::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { diff --git a/src/core/search/SloppyPhraseScorer.cpp b/src/core/search/SloppyPhraseScorer.cpp index 7920c680..5c4283cc 100644 --- a/src/core/search/SloppyPhraseScorer.cpp +++ b/src/core/search/SloppyPhraseScorer.cpp @@ -12,6 +12,14 @@ namespace Lucene { +struct __luceneEquals { + inline bool operator()(const PhrasePositions* __first, const PhrasePositions* __second) const { + return __first ? (__second && __first == __second) : (!__first && !__second); + } +}; + +typedef HashMap< PhrasePositions*, LuceneObjectPtr, luceneHash, __luceneEquals > __MapPhrasePositionsLuceneObject; + SloppyPhraseScorer::SloppyPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, int32_t slop, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { this->slop = slop; this->checkedRepeats = false; @@ -26,24 +34,24 @@ double SloppyPhraseScorer::phraseFreq() { double freq = 0.0; bool done = (end < 0); while (!done) { - PhrasePositionsPtr pp(pq->pop()); - int32_t start = pp->position; + auto* __pp = pq->pop(); + int32_t start = __pp->position; int32_t next = pq->top()->position; bool tpsDiffer = true; - for (int32_t pos = start; pos <= next || !tpsDiffer; pos = pp->position) { + for (int32_t pos = start; pos <= next || !tpsDiffer; pos = __pp->position) { if (pos<=next && tpsDiffer) { start = pos; // advance pp to min window } - if (!pp->nextPosition()) { + if (!__pp->nextPosition()) { done = true; // ran out of a term - done break; } - PhrasePositionsPtr pp2; - tpsDiffer = (!pp->repeats || !(pp2 = termPositionsDiffer(pp))); - if (pp2 && pp2 != pp) { - pp = flip(pp, pp2); // flip pp to pp2 + PhrasePositions* __pp2 = nullptr; + tpsDiffer = (!__pp->repeats || !(__pp2 = termPositionsDiffer(__pp))); + if (__pp2 && __pp2 != __pp) { + __pp = flip(__pp, __pp2); // flip pp to pp2 } } @@ -52,29 +60,29 @@ double SloppyPhraseScorer::phraseFreq() { freq += getSimilarity()->sloppyFreq(matchLength); // score match } - if (pp->position > end) { - end = pp->position; + if (__pp->position > end) { + end = __pp->position; } - pq->add(pp); // restore pq + pq->add(__pp); // restore pq } return freq; } -PhrasePositionsPtr SloppyPhraseScorer::flip(const PhrasePositionsPtr& pp, const PhrasePositionsPtr& pp2) { +PhrasePositions* SloppyPhraseScorer::flip(PhrasePositions* __pp, PhrasePositions* __pp2) { int32_t n = 0; - PhrasePositionsPtr pp3; + PhrasePositions* __pp3; // pop until finding pp2 - while ((pp3 = pq->pop()) != pp2) { - tmpPos[n++] = pp3; + while ((__pp3 = pq->pop()) != __pp2) { + tmpPos[n++] = __pp3; } // insert back all but pp2 for (n--; n >= 0; --n) { pq->addOverflow(tmpPos[n]); } // insert pp back - pq->add(pp); - return pp2; + pq->add(__pp); + return __pp2; } int32_t SloppyPhraseScorer::initPhrasePositions() { @@ -84,44 +92,44 @@ int32_t SloppyPhraseScorer::initPhrasePositions() { if (checkedRepeats && !repeats) { // build queue from list pq->clear(); - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { - pp->firstPosition(); - if (pp->position > end) { - end = pp->position; + for (auto* __pp = __first; __pp; __pp = __pp->__next) { + __pp->firstPosition(); + if (__pp->position > end) { + end = __pp->position; } - pq->add(pp); // build pq from list + pq->add(__pp); // build pq from list } return end; } // position the pp's - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { - pp->firstPosition(); + for (PhrasePositions* __pp = __first; __pp; __pp = __pp->__next) { + __pp->firstPosition(); } // one time initialization for this scorer if (!checkedRepeats) { checkedRepeats = true; // check for repeats - MapPhrasePositionsLuceneObject m; - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { - int32_t tpPos = pp->position + pp->offset; - for (PhrasePositionsPtr pp2(pp->_next); pp2; pp2 = pp2->_next) { - int32_t tpPos2 = pp2->position + pp2->offset; + __MapPhrasePositionsLuceneObject m; + for (auto* __pp = __first; __pp; __pp = __pp->__next) { + int32_t tpPos = __pp->position + __pp->offset; + for (auto* __pp2 = __pp->__next; __pp2; __pp2 = __pp2->__next) { + int32_t tpPos2 = __pp2->position + __pp2->offset; if (tpPos2 == tpPos) { if (!m) { - m = MapPhrasePositionsLuceneObject::newInstance(); + m = __MapPhrasePositionsLuceneObject::newInstance(); } - pp->repeats = true; - pp2->repeats = true; - m.put(pp, LuceneObjectPtr()); - m.put(pp2, LuceneObjectPtr()); + __pp->repeats = true; + __pp2->repeats = true; + m.put(__pp, LuceneObjectPtr()); + m.put(__pp2, LuceneObjectPtr()); } } } if (m) { - repeats = Collection::newInstance(); - for (MapPhrasePositionsLuceneObject::iterator key = m.begin(); key != m.end(); ++key) { + repeats = Collection::newInstance(); + for (__MapPhrasePositionsLuceneObject::iterator key = m.begin(); key != m.end(); ++key) { repeats.add(key->first); } } @@ -129,8 +137,8 @@ int32_t SloppyPhraseScorer::initPhrasePositions() { // with repeats must advance some repeating pp's so they all start with differing tp's if (repeats) { - for (Collection::iterator pp = repeats.begin(); pp != repeats.end(); ++pp) { - PhrasePositionsPtr pp2; + for (Collection::iterator pp = repeats.begin(); pp != repeats.end(); ++pp) { + PhrasePositions* pp2 = nullptr; while ((pp2 = termPositionsDiffer(*pp))) { if (!pp2->nextPosition()) { // out of pps that do not differ, advance the pp with higher offset return -1; // ran out of a term - done @@ -141,36 +149,36 @@ int32_t SloppyPhraseScorer::initPhrasePositions() { // build queue from list pq->clear(); - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) { - if (pp->position > end) { - end = pp->position; + for (auto* __pp = __first; __pp; __pp = __pp->__next) { + if (__pp->position > end) { + end = __pp->position; } - pq->add(pp); // build pq from list + pq->add(__pp); // build pq from list } if (repeats) { - tmpPos = Collection::newInstance(pq->size()); + tmpPos = Collection::newInstance(pq->size()); } return end; } -PhrasePositionsPtr SloppyPhraseScorer::termPositionsDiffer(const PhrasePositionsPtr& pp) { +PhrasePositions* SloppyPhraseScorer::termPositionsDiffer(PhrasePositions* __pp) { // Efficiency note: a more efficient implementation could keep a map between repeating pp's, so that if // pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats of term2, pp2a would only be checked // against pp2b but not against pp1a, pp1b, pp1c. However this would complicate code, for a rather rare // case, so choice is to compromise here. - int32_t tpPos = pp->position + pp->offset; - for (Collection::iterator pp2 = repeats.begin(); pp2 != repeats.end(); ++pp2) { - if (*pp2 == pp) { + int32_t tpPos = __pp->position + __pp->offset; + for (Collection::iterator pp2 = repeats.begin(); pp2 != repeats.end(); ++pp2) { + if (*pp2 == __pp) { continue; } int32_t tpPos2 = (*pp2)->position + (*pp2)->offset; if (tpPos2 == tpPos) { - return pp->offset > (*pp2)->offset ? pp : *pp2; // do not differ: return the one with higher offset. + return __pp->offset > (*pp2)->offset ? __pp : *pp2; // do not differ: return the one with higher offset. } } - return PhrasePositionsPtr(); + return nullptr; } } diff --git a/src/core/search/TermScorer.cpp b/src/core/search/TermScorer.cpp index 310fe2a8..d2623e3c 100644 --- a/src/core/search/TermScorer.cpp +++ b/src/core/search/TermScorer.cpp @@ -18,25 +18,28 @@ const int32_t TermScorer::SCORE_CACHE_SIZE = 32; TermScorer::TermScorer(const WeightPtr& weight, const TermDocsPtr& td, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { this->weight = weight; this->termDocs = td; + this->__termDocs = this->termDocs.get(); this->norms = norms; this->weightValue = weight->getValue(); this->doc = -1; - this->docs = Collection::newInstance(32); - this->freqs = Collection::newInstance(32); + this->docs = Collection::newInstance(123); + this->__docs = this->docs.get(); + this->freqs = Collection::newInstance(128); + this->__freqs = this->freqs.get(); this->pointer = 0; this->pointerMax = 0; this->scoreCache = Collection::newInstance(SCORE_CACHE_SIZE); for (int32_t i = 0; i < SCORE_CACHE_SIZE; ++i) { - scoreCache[i] = getSimilarity()->tf(i) * weightValue; + scoreCache[i] = similarity->tf(i) * weightValue; } } TermScorer::~TermScorer() { } -const Collection TermScorer::SIM_NORM_DECODER() { - return Similarity::getNormDecoder(); +inline const Collection& TermScorer::SIM_NORM_DECODER() { + return Similarity::NORM_TABLE; } void TermScorer::score(const CollectorPtr& collector) { @@ -45,22 +48,23 @@ void TermScorer::score(const CollectorPtr& collector) { bool TermScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { // firstDocID is ignored since nextDoc() sets 'doc' - collector->setScorer(shared_from_this()); + auto* __collector = collector.get(); + __collector->setScorer(shared_from_this()); while (doc < max) { // for docs in window - collector->collect(doc); + __collector->collect(doc); if (++pointer >= pointerMax) { - pointerMax = termDocs->read(docs, freqs); // refill buffers + pointerMax = __termDocs->read(docs, freqs); // refill buffers if (pointerMax != 0) { pointer = 0; } else { - termDocs->close(); // close stream + __termDocs->close(); // close stream doc = INT_MAX; // set to sentinel value return false; } } - doc = docs[pointer]; - freq = freqs[pointer]; + doc = __docs->operator[](pointer); + freq = __freqs->operator[](pointer); } return true; } @@ -72,45 +76,46 @@ int32_t TermScorer::docID() { int32_t TermScorer::nextDoc() { ++pointer; if (pointer >= pointerMax) { - pointerMax = termDocs->read(docs, freqs); // refill buffer + pointerMax = __termDocs->read(docs, freqs); // refill buffer if (pointerMax != 0) { pointer = 0; } else { - termDocs->close(); // close stream + __termDocs->close(); // close stream doc = NO_MORE_DOCS; return doc; } } - doc = docs[pointer]; - freq = freqs[pointer]; + doc = __docs->operator[](pointer); + freq = __freqs->operator[](pointer); return doc; } double TermScorer::score() { BOOST_ASSERT(doc != -1); - double raw = freq < SCORE_CACHE_SIZE ? scoreCache[freq] : getSimilarity()->tf(freq) * weightValue; // compute tf(f) * weight + double raw = freq < SCORE_CACHE_SIZE ? scoreCache[freq] : similarity->tf(freq) * weightValue; // compute tf(f) * weight return norms ? raw * SIM_NORM_DECODER()[norms[doc] & 0xff] : raw; // normalize for field } int32_t TermScorer::advance(int32_t target) { // first scan in cache for (++pointer; pointer < pointerMax; ++pointer) { - if (docs[pointer] >= target) { - doc = docs[pointer]; - freq = freqs[pointer]; + if (__docs->operator[](pointer) >= target) { + doc = __docs->operator[](pointer); + freq = __freqs->operator[](pointer); return doc; } } // not found in cache, seek underlying stream - bool result = termDocs->skipTo(target); + bool result = __termDocs->skipTo(target); if (result) { pointerMax = 1; pointer = 0; - doc = termDocs->doc(); - docs[pointer] = doc; - freqs[pointer] = freq = termDocs->freq(); + doc = __termDocs->doc(); + __docs->operator[](pointer) = doc; + freq = __termDocs->freq(); + __freqs->operator[](pointer) = freq; } else { doc = NO_MORE_DOCS; } diff --git a/src/core/search/TopScoreDocCollector.cpp b/src/core/search/TopScoreDocCollector.cpp index 08a9499c..15b16103 100644 --- a/src/core/search/TopScoreDocCollector.cpp +++ b/src/core/search/TopScoreDocCollector.cpp @@ -60,6 +60,7 @@ void TopScoreDocCollector::setNextReader(const IndexReaderPtr& reader, int32_t d void TopScoreDocCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; + this->__scorer = scorer.get(); } InOrderTopScoreDocCollector::InOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { @@ -69,7 +70,7 @@ InOrderTopScoreDocCollector::~InOrderTopScoreDocCollector() { } void InOrderTopScoreDocCollector::collect(int32_t doc) { - double score = ScorerPtr(_scorer)->score(); + double score = __scorer->score(); // This collector cannot handle these scores BOOST_ASSERT(score != -std::numeric_limits::infinity()); @@ -98,7 +99,7 @@ OutOfOrderTopScoreDocCollector::~OutOfOrderTopScoreDocCollector() { } void OutOfOrderTopScoreDocCollector::collect(int32_t doc) { - double score = ScorerPtr(_scorer)->score(); + double score = __scorer->score(); // This collector cannot handle NaN BOOST_ASSERT(!MiscUtils::isNaN(score)); diff --git a/src/core/store/BufferedIndexInput.cpp b/src/core/store/BufferedIndexInput.cpp index 42bc0c2c..96d03ddb 100644 --- a/src/core/store/BufferedIndexInput.cpp +++ b/src/core/store/BufferedIndexInput.cpp @@ -12,7 +12,7 @@ namespace Lucene { /// Default buffer size. -const int32_t BufferedIndexInput::BUFFER_SIZE = 1024; +const int32_t BufferedIndexInput::BUFFER_SIZE = 1024 * 2; BufferedIndexInput::BufferedIndexInput(int32_t bufferSize) { this->bufferSize = bufferSize; @@ -25,10 +25,34 @@ BufferedIndexInput::~BufferedIndexInput() { } uint8_t BufferedIndexInput::readByte() { - if (bufferPosition >= bufferLength) { - refill(); + if (bufferPosition < bufferLength) { + return __buffer[bufferPosition++]; + } + refill(); + return __buffer[bufferPosition++]; +} + +static const int MAX_VARINT32_LENGHT = 5; + +int32_t BufferedIndexInput::readVInt() { + if (bufferPosition + MAX_VARINT32_LENGHT < bufferLength) { + uint8_t b = __buffer[bufferPosition++]; + int32_t i = (b & 0x7f); + for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { + b = __buffer[bufferPosition++]; + i |= (b & 0x7f) << shift; + } + return i; + } + else { + uint8_t b = readByte(); + int32_t i = (b & 0x7f); + for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { + b = readByte(); + i |= (b & 0x7f) << shift; + } + return i; } - return buffer[bufferPosition++]; } void BufferedIndexInput::setBufferSize(int32_t newSize) { @@ -52,6 +76,7 @@ void BufferedIndexInput::setBufferSize(int32_t newSize) { void BufferedIndexInput::newBuffer(ByteArray newBuffer) { // Subclasses can do something here buffer = newBuffer; + __buffer = newBuffer.get(); } int32_t BufferedIndexInput::getBufferSize() { @@ -130,7 +155,7 @@ void BufferedIndexInput::refill() { newBuffer(ByteArray::newInstance(bufferSize)); // allocate buffer lazily seekInternal(bufferStart); } - readInternal(buffer.get(), 0, newLength); + readInternal(__buffer, 0, newLength); bufferLength = newLength; bufferStart = start; bufferPosition = 0; diff --git a/src/core/util/MiscUtils.cpp b/src/core/util/MiscUtils.cpp index a50ba51d..f5856ce3 100644 --- a/src/core/util/MiscUtils.cpp +++ b/src/core/util/MiscUtils.cpp @@ -124,12 +124,4 @@ bool MiscUtils::equalTypes(const LuceneObjectPtr& first, const LuceneObjectPtr& return (typeid(firstRef) == typeid(secondRef)); } -int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) { - return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1); -} - -int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) { - return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1); -} - }