Skip to content

Search performance optimization #179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/lucene++/AbstractAllTermDocs.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class LPPAPI AbstractAllTermDocs : public TermDocs, public LuceneObject {
virtual int32_t doc();
virtual int32_t freq();
virtual bool next();
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);
virtual bool skipTo(int32_t target);
virtual void close();
virtual bool isDeleted(int32_t doc) = 0;
Expand Down
1 change: 0 additions & 1 deletion include/lucene++/Array.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ class Array {
}

TYPE& operator[] (int32_t i) const {
BOOST_ASSERT(i >= 0 && i < array->size);
return array->data[i];
}

Expand Down
5 changes: 5 additions & 0 deletions include/lucene++/BooleanScorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class LPPAPI BooleanScorer : public Scorer {
int32_t minNrShouldMatch;
int32_t end;
BucketPtr current;
Bucket* __current = nullptr;
int32_t doc;

protected:
Expand All @@ -71,8 +72,10 @@ class BooleanScorerCollector : public Collector {

protected:
BucketTableWeakPtr _bucketTable;
BucketTable* __bucketTable = nullptr;
int32_t mask;
ScorerWeakPtr _scorer;
Scorer* __scorer = nullptr;

public:
virtual void collect(int32_t doc);
Expand Down Expand Up @@ -121,6 +124,7 @@ class Bucket : public LuceneObject {
int32_t bits; // used for bool constraints
int32_t coord; // count of terms in score
BucketWeakPtr _next; // next valid bucket
Bucket* __next = nullptr; // next valid bucket
};

/// A simple hash table of document scores within a range.
Expand All @@ -137,6 +141,7 @@ class BucketTable : public LuceneObject {

Collection<BucketPtr> buckets;
BucketPtr first; // head of valid list
Bucket* __first = nullptr; // head of valid list

public:
CollectorPtr newCollector(int32_t mask);
Expand Down
6 changes: 6 additions & 0 deletions include/lucene++/BufferedIndexInput.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,18 @@ class LPPAPI BufferedIndexInput : public IndexInput {
int32_t bufferLength; // end of valid bytes
int32_t bufferPosition; // next byte to read
ByteArray buffer;
decltype(buffer.get()) __buffer;

public:
/// Reads and returns a single byte.
/// @see IndexOutput#writeByte(uint8_t)
virtual uint8_t readByte();

/// Reads an int stored in variable-length format. Reads between one and five
/// bytes. Smaller values take fewer bytes. Negative numbers are not supported.
/// @see IndexOutput#writeVInt(int32_t)
virtual int32_t readVInt();

/// Change the buffer size used by this IndexInput.
void setBufferSize(int32_t newSize);

Expand Down
4 changes: 4 additions & 0 deletions include/lucene++/Collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ class Collection : public LuceneSync {
bool operator!= (const this_type& other) {
return (container != other.container);
}

collection_type* get() {
return container.get();
}
};

template <typename TYPE>
Expand Down
2 changes: 1 addition & 1 deletion include/lucene++/DirectoryReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ class MultiTermDocs : public TermPositions, public LuceneObject {

/// Attempts to read multiple entries from the enumeration, up to length of docs.
/// Optimized implementation.
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);

/// Skips entries to the first beyond the current whose document number is greater than or equal to target.
virtual bool skipTo(int32_t target);
Expand Down
2 changes: 1 addition & 1 deletion include/lucene++/FilterIndexReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class LPPAPI FilterTermDocs : public TermPositions, public LuceneObject {
virtual int32_t doc();
virtual int32_t freq();
virtual bool next();
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);
virtual bool skipTo(int32_t target);
virtual void close();
};
Expand Down
6 changes: 5 additions & 1 deletion include/lucene++/LuceneFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ boost::shared_ptr<T> newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 co

template <class T>
boost::shared_ptr<T> newLucene() {
boost::shared_ptr<T> instance(newInstance<T>());
#if BOOST_VERSION <= 103800
boost::shared_ptr<T> instance = boost::shared_ptr<T>(new T);
#else
boost::shared_ptr<T> instance = boost::make_shared<T>();
#endif
instance->initialize();
return instance;
}
Expand Down
8 changes: 8 additions & 0 deletions include/lucene++/MiscUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,14 @@ class LPPAPI MiscUtils {
static int32_t unsignedShift(int32_t num, int32_t shift);
};

inline int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) {
return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1);
}

inline int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) {
return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1);
}

}

#endif
2 changes: 1 addition & 1 deletion include/lucene++/MultipleTermPositions.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class LPPAPI MultipleTermPositions : public TermPositions, public LuceneObject {
virtual void seek(const TermEnumPtr& termEnum);

/// Not implemented.
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);

/// Not implemented.
virtual ByteArray getPayload(ByteArray data, int32_t offset);
Expand Down
2 changes: 1 addition & 1 deletion include/lucene++/PhrasePositions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class PhrasePositions : public LuceneObject {
int32_t count; // remaining pos in this doc
int32_t offset; // position in phrase
TermPositionsPtr tp; // stream of positions
PhrasePositionsPtr _next; // used to make lists
PhrasePositions* __next = nullptr; // used to make lists
bool repeats; // there's other pp for same term (eg. query="1st word 2nd word"~1)

public:
Expand Down
7 changes: 5 additions & 2 deletions include/lucene++/PhraseQueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,19 @@
#include "PriorityQueue.h"

namespace Lucene {
// raw pointer
typedef PhrasePositions* PhrasePositionsStar;

class PhraseQueue : public PriorityQueue<PhrasePositionsPtr> {
class PhraseQueue : public PriorityQueue<PhrasePositionsStar> {
public:
PhraseQueue(int32_t size);
virtual ~PhraseQueue();

LUCENE_CLASS(PhraseQueue);

protected:
virtual bool lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second);
virtual bool lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second);

};

}
Expand Down
7 changes: 5 additions & 2 deletions include/lucene++/PhraseScorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define PHRASESCORER_H

#include "Scorer.h"
#include <vector>

namespace Lucene {

Expand All @@ -27,14 +28,16 @@ class PhraseScorer : public Scorer {

protected:
WeightPtr weight;
Weight* __weight = nullptr;
ByteArray norms;
double value;

bool firstTime;
bool more;
PhraseQueuePtr pq;
PhrasePositionsPtr first;
PhrasePositionsPtr last;
std::vector<PhrasePositionsPtr> _holds;
PhrasePositions* __first = nullptr;
PhrasePositions* __last = nullptr;

double freq; // phrase frequency in current doc as computed by phraseFreq().

Expand Down
7 changes: 5 additions & 2 deletions include/lucene++/SegmentTermDocs.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@ class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject {

protected:
SegmentReaderWeakPtr _parent;
SegmentReader* __parent;
IndexInputPtr _freqStream;
IndexInput* __freqStream;
int32_t count;
int32_t df;
BitVectorPtr deletedDocs;
BitVector* __deletedDocs;
int32_t _doc;
int32_t _freq;

Expand Down Expand Up @@ -61,7 +64,7 @@ class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject {
virtual bool next();

/// Optimized implementation.
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);

/// Optimized implementation.
virtual bool skipTo(int32_t target);
Expand All @@ -72,7 +75,7 @@ class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject {

protected:
virtual void skippingDoc();
virtual int32_t readNoTf(Collection<int32_t> docs, Collection<int32_t> freqs, int32_t length);
virtual int32_t readNoTf(Collection<int32_t>& docs, Collection<int32_t>& freqs, int32_t length);

/// Overridden by SegmentTermPositions to skip in prox stream.
virtual void skipProx(int64_t proxPointer, int32_t payloadLength);
Expand Down
2 changes: 1 addition & 1 deletion include/lucene++/SegmentTermPositions.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class LPPAPI SegmentTermPositions : public SegmentTermDocs {
virtual bool next();

/// Not supported
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);

/// Returns the length of the payload at the current term position.
virtual int32_t getPayloadLength();
Expand Down
6 changes: 3 additions & 3 deletions include/lucene++/Similarity.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,8 @@ class LPPAPI Similarity : public LuceneObject {
protected:
static const int32_t NO_DOC_ID_PROVIDED;

protected:
static const Collection<double> NORM_TABLE();
public:
static const Collection<double> NORM_TABLE;

public:
/// Return the default Similarity implementation used by indexing and search code.
Expand All @@ -450,7 +450,7 @@ class LPPAPI Similarity : public LuceneObject {

/// Returns a table for decoding normalization bytes.
/// @see #encodeNorm(double)
static const Collection<double> getNormDecoder();
static const Collection<double>& getNormDecoder();

/// Compute the normalization value for a field, given the accumulated state of term processing for this
/// field (see {@link FieldInvertState}).
Expand Down
8 changes: 4 additions & 4 deletions include/lucene++/SloppyPhraseScorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class SloppyPhraseScorer : public PhraseScorer {

protected:
int32_t slop;
Collection<PhrasePositionsPtr> repeats;
Collection<PhrasePositionsPtr> tmpPos; // for flipping repeating pps
Collection<PhrasePositions*> repeats;
Collection<PhrasePositions*> tmpPos; // for flipping repeating pps
bool checkedRepeats;

public:
Expand All @@ -42,7 +42,7 @@ class SloppyPhraseScorer : public PhraseScorer {
protected:
/// Flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back.
/// Assumes: pp!=pp2, pp2 in pq, pp not in pq. Called only when there are repeating pps.
PhrasePositionsPtr flip(const PhrasePositionsPtr& pp, const PhrasePositionsPtr& pp2);
PhrasePositions* flip(PhrasePositions* pp, PhrasePositions* pp2);

/// Init PhrasePositions in place.
/// There is a one time initialization for this scorer:
Expand All @@ -61,7 +61,7 @@ class SloppyPhraseScorer : public PhraseScorer {
/// of the same word would go elsewhere in the matched doc.
/// @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions out of the first
/// two PPs found to not differ.
PhrasePositionsPtr termPositionsDiffer(const PhrasePositionsPtr& pp);
PhrasePositions* termPositionsDiffer(PhrasePositions* pp);
};

}
Expand Down
2 changes: 1 addition & 1 deletion include/lucene++/TermDocs.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class LPPAPI TermDocs {
/// Attempts to read multiple entries from the enumeration, up to length of docs. Document numbers are stored
/// in docs, and term frequencies are stored in freqs. Returns the number of entries read. Zero is only
/// returned when the stream has been exhausted.
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs) = 0;
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs) = 0;

/// Skips entries to the first beyond the current whose document number is greater than or equal to target.
/// Returns true if there is such an entry.
Expand Down
7 changes: 5 additions & 2 deletions include/lucene++/TermScorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ class LPPAPI TermScorer : public Scorer {

protected:
WeightPtr weight;
TermDocsPtr termDocs;
TermDocsPtr termDocs; // for malloc and free
TermDocs* __termDocs; // for work,
ByteArray norms;
double weightValue;
int32_t doc;

Collection<int32_t> docs; // buffered doc numbers
decltype(docs.get()) __docs; //
Collection<int32_t> freqs; // buffered term freqs
decltype(freqs.get()) __freqs; //

int32_t freq;
int32_t pointer;
Expand Down Expand Up @@ -70,7 +73,7 @@ class LPPAPI TermScorer : public Scorer {
}

protected:
static const Collection<double> SIM_NORM_DECODER();
static const Collection<double>& SIM_NORM_DECODER();

virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID);
};
Expand Down
1 change: 1 addition & 0 deletions include/lucene++/TopScoreDocCollector.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class LPPAPI TopScoreDocCollector : public TopDocsCollector {
ScoreDocPtr pqTop;
int32_t docBase;
ScorerWeakPtr _scorer;
Scorer* __scorer;

public:
/// Creates a new {@link TopScoreDocCollector} given the number of hits to collect and whether documents
Expand Down
2 changes: 1 addition & 1 deletion src/contrib/include/MemoryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ class LPPCONTRIBAPI MemoryIndexTermPositions : public TermPositions, public Luce
virtual int32_t doc();
virtual int32_t freq();
virtual bool next();
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);
virtual bool skipTo(int32_t target);
virtual void close();

Expand Down
2 changes: 1 addition & 1 deletion src/contrib/memory/MemoryIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ bool MemoryIndexTermPositions::next() {
return _next;
}

int32_t MemoryIndexTermPositions::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
int32_t MemoryIndexTermPositions::read(Collection<int32_t>& docs, Collection<int32_t>& freqs) {
if (!hasNext) {
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion src/core/include/_ParallelReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class ParallelTermDocs : public TermPositions, public LuceneObject {
virtual void seek(const TermPtr& term);
virtual void seek(const TermEnumPtr& termEnum);
virtual bool next();
virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
virtual int32_t read(Collection<int32_t>& docs, Collection<int32_t>& freqs);
virtual bool skipTo(int32_t target);
virtual void close();
};
Expand Down
2 changes: 1 addition & 1 deletion src/core/index/AbstractAllTermDocs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ bool AbstractAllTermDocs::next() {
return skipTo(_doc + 1);
}

int32_t AbstractAllTermDocs::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
int32_t AbstractAllTermDocs::read(Collection<int32_t>& docs, Collection<int32_t>& freqs) {
int32_t length = docs.size();
int32_t i = 0;
while (i < length && _doc < maxDoc) {
Expand Down
2 changes: 1 addition & 1 deletion src/core/index/DirectoryReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ bool MultiTermDocs::next() {
}
}

int32_t MultiTermDocs::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
int32_t MultiTermDocs::read(Collection<int32_t>& docs, Collection<int32_t>& freqs) {
while (true) {
while (!current) {
if (pointer < readers.size()) { // try next segment
Expand Down
2 changes: 1 addition & 1 deletion src/core/index/FilterIndexReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ bool FilterTermDocs::next() {
return in->next();
}

int32_t FilterTermDocs::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
int32_t FilterTermDocs::read(Collection<int32_t>& docs, Collection<int32_t>& freqs) {
return in->read(docs, freqs);
}

Expand Down
2 changes: 1 addition & 1 deletion src/core/index/MultipleTermPositions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void MultipleTermPositions::seek(const TermEnumPtr& termEnum) {
boost::throw_exception(UnsupportedOperationException());
}

int32_t MultipleTermPositions::read(Collection<int32_t> docs, Collection<int32_t> freqs) {
int32_t MultipleTermPositions::read(Collection<int32_t>& docs, Collection<int32_t>& freqs) {
boost::throw_exception(UnsupportedOperationException());
return 0;
}
Expand Down
Loading