7 #ifndef CHARTOKENIZER_H
8 #define CHARTOKENIZER_H
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
An abstract base class for simple, character-oriented tokenizers.
Definition: CharTokenizer.h:15
virtual void reset(const ReaderPtr &input)
Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will ...
CharTokenizer(const AttributeSourcePtr &source, const ReaderPtr &input)
CharArray ioBuffer
Definition: CharTokenizer.h:32
OffsetAttributePtr offsetAtt
Definition: CharTokenizer.h:34
CharTokenizer(const AttributeFactoryPtr &factory, const ReaderPtr &input)
virtual void end()
This method is called by the consumer after the last token has been consumed, after incrementToken() ...
static const int32_t IO_BUFFER_SIZE
Definition: CharTokenizer.h:30
virtual bool isTokenChar(wchar_t c)=0
Returns true if a character should be included in a token. This tokenizer generates as tokens adjacen...
TermAttributePtr termAtt
Definition: CharTokenizer.h:33
static const int32_t MAX_WORD_LEN
Definition: CharTokenizer.h:29
int32_t offset
Definition: CharTokenizer.h:22
int32_t dataLen
Definition: CharTokenizer.h:27
virtual wchar_t normalize(wchar_t c)
Called on each token character to normalize it before it is added to the token. The default implement...
virtual bool incrementToken()
Consumers (ie., IndexWriter) use this method to advance the stream to the next token....
CharTokenizer(const ReaderPtr &input)
int32_t bufferIndex
Definition: CharTokenizer.h:26
A Tokenizer is a TokenStream whose input is a Reader.
Definition: Tokenizer.h:20
Definition: AbstractAllTermDocs.h:12
boost::shared_ptr< AttributeSource > AttributeSourcePtr
Definition: LuceneTypes.h:520
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition: LuceneTypes.h:58
boost::shared_ptr< OffsetAttribute > OffsetAttributePtr
Definition: LuceneTypes.h:40
boost::shared_ptr< Reader > ReaderPtr
Definition: LuceneTypes.h:547
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition: LuceneTypes.h:519