Lucene++ - a full-featured, c++ search engine
API Documentation


DirectoryReader.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef DIRECTORYREADER_H
8 #define DIRECTORYREADER_H
9 
10 #include "IndexReader.h"
11 #include "TermEnum.h"
12 #include "TermPositions.h"
13 #include "IndexCommit.h"
14 #include "SegmentMergeQueue.h"
15 
16 namespace Lucene {
17 
19 class LPPAPI DirectoryReader : public IndexReader {
20 public:
22  DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor);
23 
25  DirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor);
26 
28  DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection<SegmentReaderPtr> oldReaders,
29  Collection<int32_t> oldStarts, MapStringByteArray oldNormsCache, bool readOnly,
30  bool doClone, int32_t termInfosIndexDivisor);
31 
32  virtual ~DirectoryReader();
33 
35 
36 protected:
38  bool readOnly;
45  bool stale;
47 
49 
51  Collection<int32_t> starts; // 1st docno for each segment
52  MapStringByteArray normsCache;
53  int32_t _maxDoc;
54  int32_t _numDocs;
56 
57  // Max version in index as of when we opened; this can be > our current segmentInfos version
58  // in case we were opened on a past IndexCommit
59  int64_t maxIndexVersion;
60 
61 public:
63 
64  static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor);
65 
67  virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr());
68 
70  virtual IndexReaderPtr reopen(bool openReadOnly);
71  virtual IndexReaderPtr reopen(const IndexCommitPtr& commit);
72 
74  virtual int64_t getVersion();
75 
78 
80  virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field);
81 
83  virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper);
84 
86  virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper);
87 
90  virtual bool isOptimized();
91 
93  virtual int32_t numDocs();
94 
96  virtual int32_t maxDoc();
97 
99  virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector);
100 
102  virtual bool isDeleted(int32_t n);
103 
105  virtual bool hasDeletions();
106 
108  static int32_t readerIndex(int32_t n, Collection<int32_t> starts, int32_t numSubReaders);
109 
111  virtual bool hasNorms(const String& field);
112 
114  virtual ByteArray norms(const String& field);
115 
117  virtual void norms(const String& field, ByteArray norms, int32_t offset);
118 
120  virtual TermEnumPtr terms();
121 
123  virtual TermEnumPtr terms(const TermPtr& t);
124 
126  virtual int32_t docFreq(const TermPtr& t);
127 
130 
133 
136  virtual void acquireWriteLock();
137 
138  void startCommit();
140 
142  virtual MapStringString getCommitUserData();
143 
145  virtual bool isCurrent();
146 
150 
152 
155 
158 
159  virtual int32_t getTermInfosIndexDivisor();
160 
163 
166 
167 protected:
168  IndexReaderPtr doReopenFromWriter(bool openReadOnly, const IndexCommitPtr& commit);
169  IndexReaderPtr doReopen(bool openReadOnly, const IndexCommitPtr& commit);
170  IndexReaderPtr doReopenNoWriter(bool openReadOnly, const IndexCommitPtr& commit);
171  DirectoryReaderPtr doReopen(const SegmentInfosPtr& infos, bool doClone, bool openReadOnly);
172 
174  virtual void doDelete(int32_t docNum);
175 
177  virtual void doUndeleteAll();
178 
179  int32_t readerIndex(int32_t n);
180 
182  virtual void doSetNorm(int32_t doc, const String& field, uint8_t value);
183 
187  virtual void doCommit(MapStringString commitUserData);
188 
190  virtual void doClose();
191 
192  friend class FindSegmentsReopen;
193 };
194 
195 class MultiTermEnum : public TermEnum {
196 public:
198  virtual ~MultiTermEnum();
199 
201 
202 protected:
205  int32_t _docFreq;
206 
207 public:
209  Collection<SegmentMergeInfoPtr> matchingSegments; // null terminated array of matching segments
210 
211 public:
213  virtual bool next();
214 
216  virtual TermPtr term();
217 
219  virtual int32_t docFreq();
220 
222  virtual void close();
223 };
224 
225 class MultiTermDocs : public TermPositions, public LuceneObject {
226 public:
228  virtual ~MultiTermDocs();
229 
231 
232 protected:
233  IndexReaderWeakPtr _topReader; // used for matching TermEnum to TermDocs
237 
238  int32_t base;
239  int32_t pointer;
240 
243  MultiTermEnumPtr tenum; // the term enum used for seeking
244  int32_t matchingSegmentPos; // position into the matching segments from tenum
245  SegmentMergeInfoPtr smi; // current segment mere info
246 
247 public:
249  virtual int32_t doc();
250 
252  virtual int32_t freq();
253 
255  virtual void seek(const TermPtr& term);
256 
258  virtual void seek(const TermEnumPtr& termEnum);
259 
261  virtual bool next();
262 
265  virtual int32_t read(Collection<int32_t> docs, Collection<int32_t> freqs);
266 
268  virtual bool skipTo(int32_t target);
269 
271  virtual void close();
272 
273 protected:
274  virtual TermDocsPtr termDocs(int32_t i);
275  virtual TermDocsPtr termDocs(const IndexReaderPtr& reader);
276 };
277 
279 public:
282 
284 
285 public:
287  virtual int32_t nextPosition();
288 
290  virtual int32_t getPayloadLength();
291 
293  virtual ByteArray getPayload(ByteArray data, int32_t offset);
294 
296  virtual bool isPayloadAvailable();
297 
298 protected:
299  virtual TermDocsPtr termDocs(const IndexReaderPtr& reader);
300 };
301 
302 class ReaderCommit : public IndexCommit {
303 public:
305  virtual ~ReaderCommit();
306 
308 
309 protected:
313  int64_t generation;
314  int64_t version;
316  MapStringString userData;
317 
318 public:
319  virtual String toString();
320 
322  virtual bool isOptimized();
323 
325  virtual String getSegmentsFileName();
326 
329 
332 
334  virtual int64_t getVersion();
335 
337  virtual int64_t getGeneration();
338 
339  virtual bool isDeleted();
340 
342  virtual MapStringString getUserData();
343 
344  virtual void deleteCommit();
345 };
346 
347 }
348 
349 #endif
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
An IndexReader which reads indexes with multiple segments.
Definition: DirectoryReader.h:19
virtual int64_t getVersion()
Version number when this IndexReader was opened.
virtual IndexReaderPtr reopen(const IndexCommitPtr &commit)
Reopen this reader on a specific commit point. This always returns a readOnly reader....
virtual int32_t getTermInfosIndexDivisor()
For IndexReader implementations that use TermInfosReader to read terms, this returns the current inde...
virtual void doClose()
Implements close.
virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr &mapper)
Map all the term vectors for all fields in a Document.
virtual bool hasDeletions()
Returns true if any documents have been deleted.
virtual MapStringString getCommitUserData()
Retrieve the String userData optionally passed to IndexWriter::commit.
Collection< int32_t > starts
Definition: DirectoryReader.h:51
virtual TermPositionsPtr termPositions()
Returns an unpositioned TermPositions enumerator.
DirectoryReader(const DirectoryPtr &directory, const SegmentInfosPtr &sis, const IndexDeletionPolicyPtr &deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor)
Construct reading the named set of readers.
bool rollbackHasChanges
Definition: DirectoryReader.h:48
IndexReaderPtr doReopenNoWriter(bool openReadOnly, const IndexCommitPtr &commit)
virtual HashSet< String > getFieldNames(FieldOption fieldOption)
Get a list of unique field names that exist in this index and have the specified field option informa...
IndexWriterWeakPtr _writer
Definition: DirectoryReader.h:39
void _initialize(Collection< SegmentReaderPtr > subReaders)
virtual TermDocsPtr termDocs()
Returns an unpositioned TermDocs enumerator.
virtual Collection< TermFreqVectorPtr > getTermFreqVectors(int32_t docNumber)
Return an array of term frequency vectors for the specified document.
int32_t _numDocs
Definition: DirectoryReader.h:54
virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr &other=LuceneObjectPtr())
Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable read...
DirectoryReader(const DirectoryPtr &directory, const SegmentInfosPtr &infos, Collection< SegmentReaderPtr > oldReaders, Collection< int32_t > oldStarts, MapStringByteArray oldNormsCache, bool readOnly, bool doClone, int32_t termInfosIndexDivisor)
This constructor is only used for reopen().
virtual ByteArray norms(const String &field)
Returns the byte-encoded normalization factor for the named field of every document.
virtual void norms(const String &field, ByteArray norms, int32_t offset)
Reads the byte-encoded normalization factor for the named field of every document.
virtual void doDelete(int32_t docNum)
Implements deletion of the document numbered docNum.
virtual DirectoryPtr directory()
Returns the directory this index resides in.
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Efficiently clones the IndexReader (sharing most internal state).
SegmentInfosPtr segmentInfosStart
Definition: DirectoryReader.h:44
IndexDeletionPolicyPtr deletionPolicy
Definition: DirectoryReader.h:40
HashSet< String > synced
Definition: DirectoryReader.h:41
virtual void getTermFreqVector(int32_t docNumber, const String &field, const TermVectorMapperPtr &mapper)
Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of ...
virtual DocumentPtr document(int32_t n, const FieldSelectorPtr &fieldSelector)
Get the Document at the n'th position. The FieldSelector may be used to determine what Fields to load...
virtual bool isCurrent()
Check whether any new changes have occurred to the index since this reader was opened.
virtual bool isDeleted(int32_t n)
Returns true if document n has been deleted.
virtual IndexCommitPtr getIndexCommit()
Return the IndexCommit that this reader has opened.
bool stale
Definition: DirectoryReader.h:45
IndexReaderPtr doReopenFromWriter(bool openReadOnly, const IndexCommitPtr &commit)
virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String &field)
Return a term frequency vector for the specified document and field.
static Collection< IndexCommitPtr > listCommits(const DirectoryPtr &dir)
Returns all commit points that exist in the Directory.
static HashSet< String > getFieldNames(FieldOption fieldOption, Collection< IndexReaderPtr > subReaders)
int64_t maxIndexVersion
Definition: DirectoryReader.h:59
virtual void doSetNorm(int32_t doc, const String &field, uint8_t value)
Implements setNorm in subclass.
static int32_t readerIndex(int32_t n, Collection< int32_t > starts, int32_t numSubReaders)
Find reader for doc n.
virtual TermEnumPtr terms()
Returns an enumeration of all the terms in the index.
bool readOnly
Definition: DirectoryReader.h:38
virtual void doUndeleteAll()
Implements actual undeleteAll() in subclass.
int32_t _maxDoc
Definition: DirectoryReader.h:53
MapStringByteArray normsCache
Definition: DirectoryReader.h:52
virtual bool isOptimized()
Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in th...
virtual Collection< IndexReaderPtr > getSequentialSubReaders()
Returns the sequential sub readers that this reader is logically composed of.
int32_t readerIndex(int32_t n)
virtual int32_t numDocs()
Returns the number of documents in this index.
SegmentInfosPtr segmentInfos
Definition: DirectoryReader.h:43
DirectoryPtr _directory
Definition: DirectoryReader.h:34
virtual IndexReaderPtr reopen(bool openReadOnly)
Just like reopen(), except you can change the readOnly of the original reader. If the index is unchan...
virtual int32_t docFreq(const TermPtr &t)
Returns the number of documents containing the term t.
virtual int32_t maxDoc()
Returns one greater than the largest possible document number.
LockPtr writeLock
Definition: DirectoryReader.h:42
IndexReaderPtr doReopen(bool openReadOnly, const IndexCommitPtr &commit)
static IndexReaderPtr open(const DirectoryPtr &directory, const IndexDeletionPolicyPtr &deletionPolicy, const IndexCommitPtr &commit, bool readOnly, int32_t termInfosIndexDivisor)
virtual TermEnumPtr terms(const TermPtr &t)
Returns an enumeration of all terms starting at a given term.
DirectoryReaderPtr doReopen(const SegmentInfosPtr &infos, bool doClone, bool openReadOnly)
DirectoryReader(const IndexWriterPtr &writer, const SegmentInfosPtr &infos, int32_t termInfosIndexDivisor)
Used by near real-time search.
int32_t termInfosIndexDivisor
Definition: DirectoryReader.h:46
virtual IndexReaderPtr reopen()
Refreshes an IndexReader if the index has changed since this instance was (re)opened.
virtual void acquireWriteLock()
Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is di...
virtual void doCommit(MapStringString commitUserData)
Commit changes resulting from delete, undeleteAll, or setNorm operations.
virtual bool hasNorms(const String &field)
Returns true if there are norms stored for this field.
bool _hasDeletions
Definition: DirectoryReader.h:55
Collection< SegmentReaderPtr > subReaders
Definition: DirectoryReader.h:50
Represents a single commit into an index as seen by the IndexDeletionPolicy or IndexReader.
Definition: IndexCommit.h:22
IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...
Definition: IndexReader.h:39
FieldOption
Constants describing field properties, for example used for IndexReader#getFieldNames(FieldOption).
Definition: IndexReader.h:48
Base class for all Lucene classes.
Definition: LuceneObject.h:31
Definition: DirectoryReader.h:225
virtual TermDocsPtr termDocs(int32_t i)
TermPtr term
Definition: DirectoryReader.h:236
virtual bool skipTo(int32_t target)
Skips entries to the first beyond the current whose document number is greater than or equal to targe...
virtual bool next()
Moves to the next pair in the enumeration.
Collection< IndexReaderPtr > readers
Definition: DirectoryReader.h:234
Collection< TermDocsPtr > readerTermDocs
Definition: DirectoryReader.h:241
virtual void close()
Frees associated resources.
virtual void seek(const TermEnumPtr &termEnum)
Sets this to the data for the current term in a TermEnum.
virtual TermDocsPtr termDocs(const IndexReaderPtr &reader)
int32_t pointer
Definition: DirectoryReader.h:239
virtual int32_t doc()
Returns the current document number.
int32_t matchingSegmentPos
Definition: DirectoryReader.h:244
int32_t base
Definition: DirectoryReader.h:238
virtual void seek(const TermPtr &term)
Sets this to the data for a term.
MultiTermDocs(const IndexReaderPtr &topReader, Collection< IndexReaderPtr > r, Collection< int32_t > s)
Collection< int32_t > starts
Definition: DirectoryReader.h:235
virtual int32_t freq()
Returns the frequency of the term within the current document.
IndexReaderWeakPtr _topReader
Definition: DirectoryReader.h:230
SegmentMergeInfoPtr smi
Definition: DirectoryReader.h:245
MultiTermEnumPtr tenum
Definition: DirectoryReader.h:243
TermDocsPtr current
Definition: DirectoryReader.h:242
virtual int32_t read(Collection< int32_t > docs, Collection< int32_t > freqs)
Attempts to read multiple entries from the enumeration, up to length of docs. Optimized implementatio...
Definition: DirectoryReader.h:195
virtual bool next()
Increments the enumeration to the next element. True if one exists.
Collection< SegmentMergeInfoPtr > matchingSegments
Definition: DirectoryReader.h:209
virtual int32_t docFreq()
Returns the docFreq of the current Term in the enumeration.
TermPtr _term
Definition: DirectoryReader.h:204
SegmentMergeQueuePtr queue
Definition: DirectoryReader.h:200
MultiTermEnum(const IndexReaderPtr &topReader, Collection< IndexReaderPtr > readers, Collection< int32_t > starts, const TermPtr &t)
IndexReaderWeakPtr _topReader
Definition: DirectoryReader.h:208
int32_t _docFreq
Definition: DirectoryReader.h:205
virtual void close()
Closes the enumeration to further activity, freeing resources.
virtual TermPtr term()
Returns the current Term in the enumeration.
Definition: DirectoryReader.h:278
virtual int32_t nextPosition()
Returns next position in the current document.
virtual int32_t getPayloadLength()
Returns the length of the payload at the current term position.
virtual TermDocsPtr termDocs(const IndexReaderPtr &reader)
virtual bool isPayloadAvailable()
Checks if a payload can be loaded at this position.
MultiTermPositions(const IndexReaderPtr &topReader, Collection< IndexReaderPtr > r, Collection< int32_t > s)
virtual ByteArray getPayload(ByteArray data, int32_t offset)
Returns the payload data at the current term position.
Definition: DirectoryReader.h:302
virtual HashSet< String > getFileNames()
Returns all index files referenced by this commit point.
HashSet< String > files
Definition: DirectoryReader.h:311
int64_t generation
Definition: DirectoryReader.h:313
virtual bool isDeleted()
ReaderCommit(const SegmentInfosPtr &infos, const DirectoryPtr &dir)
virtual String toString()
Returns a string representation of the object.
String segmentsFileName
Definition: DirectoryReader.h:307
virtual String getSegmentsFileName()
Two IndexCommits are equal if both their Directory and versions are equal.
int64_t version
Definition: DirectoryReader.h:314
bool _isOptimized
Definition: DirectoryReader.h:315
virtual bool isOptimized()
Returns true if this commit is an optimized index.
virtual MapStringString getUserData()
Returns userData, previously passed to IndexWriter#commit(Map) for this commit.
MapStringString userData
Definition: DirectoryReader.h:316
virtual int64_t getVersion()
Returns the version for this IndexCommit.
DirectoryPtr dir
Definition: DirectoryReader.h:312
virtual int64_t getGeneration()
Returns the generation (the _N in segments_N) for this IndexCommit.
virtual DirectoryPtr getDirectory()
Returns the Directory for the index.
virtual void deleteCommit()
Delete this commit point. This only applies when using the commit point in the context of IndexWriter...
Abstract class for enumerating terms.
Definition: TermEnum.h:18
TermPositions provides an interface for enumerating the <document, frequency, <position>*> tuples for...
Definition: TermPositions.h:18
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< IndexCommit > IndexCommitPtr
Definition: LuceneTypes.h:152
boost::shared_ptr< SegmentMergeInfo > SegmentMergeInfoPtr
Definition: LuceneTypes.h:212
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition: LuceneTypes.h:539
boost::shared_ptr< Lock > LockPtr
Definition: LuceneTypes.h:496
boost::shared_ptr< TermPositions > TermPositionsPtr
Definition: LuceneTypes.h:243
boost::shared_ptr< TermDocs > TermDocsPtr
Definition: LuceneTypes.h:236
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition: LuceneTypes.h:160
boost::shared_ptr< DirectoryReader > DirectoryReaderPtr
Definition: LuceneTypes.h:105
boost::shared_ptr< FieldSelector > FieldSelectorPtr
Definition: LuceneTypes.h:77
boost::weak_ptr< IndexReader > IndexReaderWeakPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
boost::shared_ptr< MultiTermEnum > MultiTermEnumPtr
Definition: LuceneTypes.h:183
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition: LuceneTypes.h:254
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
boost::shared_ptr< IndexDeletionPolicy > IndexDeletionPolicyPtr
Definition: LuceneTypes.h:153
boost::shared_ptr< TermEnum > TermEnumPtr
Definition: LuceneTypes.h:235
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition: LuceneTypes.h:237
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
boost::shared_ptr< SegmentMergeQueue > SegmentMergeQueuePtr
Definition: LuceneTypes.h:213
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160

clucene.sourceforge.net