|
|||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||
java.lang.Objectjava.util.Observable
org.exist.storage.TextSearchEngine
org.exist.storage.NativeTextEngine
public class NativeTextEngine
This class is responsible for fulltext-indexing. Text-nodes are handed over
to this class to be fulltext-indexed. Method storeText() is called by
RelationalBroker whenever it finds a TextNode. Method getNodeIDsContaining()
is used by the XPath-engine to process queries where a fulltext-operator is
involved. The class keeps two database tables: table dbTokens stores the words
found with their unique id. Table invertedIndex contains the word occurrences for
every word-id per document.
TODO: store node type (attribute or text) with each entry
| Nested Class Summary | |
|---|---|
class |
NativeTextEngine.FtMatch
|
| Field Summary | |
|---|---|
static int |
ATTRIBUTE_BY_QNAME
|
static int |
ATTRIBUTE_NOT_BY_QNAME
|
static byte |
ATTRIBUTE_SECTION
|
static double |
DEFAULT_WORD_CACHE_GROWTH
|
static double |
DEFAULT_WORD_KEY_THRESHOLD
|
static double |
DEFAULT_WORD_VALUE_THRESHOLD
|
static int |
DO_NOT_TOKENIZE
|
static String |
FILE_KEY_IN_CONFIG
|
static String |
FILE_NAME
|
static int |
FOURTH_OPTION
|
static int |
LENGTH_NODE_IDS_FREQ_OFFSETS
|
static int |
LENGTH_NODE_TYPE
|
static int |
MAX_TOKEN_LENGTH
Length limit for the tokens |
static int |
OFFSET_ATTRIBUTE_DLN_LENGTH
|
static int |
OFFSET_DLN
|
static int |
OFFSET_ELEMENT_CHILDREN_COUNT
|
static int |
OFFSET_NODE_TYPE
|
static int |
OFFSET_TEXT_DLN_LENGTH
|
static byte |
QNAME_SECTION
|
static int |
TEXT_BY_QNAME
|
static byte |
TEXT_SECTION
|
static int |
TOKENIZE
|
| Fields inherited from class org.exist.storage.TextSearchEngine |
|---|
PROPERTY_INDEX_NUMBERS, PROPERTY_STEM, PROPERTY_STORE_TERM_FREQUENCY, PROPERTY_TOKENIZER |
| Constructor Summary | |
|---|---|
NativeTextEngine(DBBroker broker,
byte id,
String dataDir,
Configuration config)
|
|
| Method Summary | |
|---|---|
boolean |
close()
|
void |
closeAndRemove()
|
static boolean |
containsWildcards(String str)
Checks if the given string could be a regular expression. |
void |
dropIndex(Collection collection)
Remove index entries for an entire collection. |
void |
dropIndex(DocumentImpl document)
Remove all index entries for the given document. |
void |
endElement(int xpathType,
ElementImpl node,
String content)
store and index given element (called storeElement before) |
void |
flush()
|
String |
getConfigKeyForFile()
|
String |
getFileName()
|
String[] |
getIndexTerms(DocumentSet docs,
TermMatcher matcher)
|
NativeTextEngine |
getInstance()
|
NodeSet |
getNodes(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
TermMatcher matcher,
CharSequence startTerm)
|
NodeSet |
getNodesContaining(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
String expr,
int type,
boolean matchAll)
For each of the given search terms and each of the documents in the document set, return a node-set of matching nodes. |
NodeSet |
getNodesExact(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
String expr)
Get all nodes whose content exactly matches the give expression. |
int |
getTrackMatches()
|
void |
printStatistics()
|
void |
remove()
remove all pending modifications, for the current document. |
void |
removeElement(ElementImpl node,
NodePath currentPath,
String content)
Mark given Element for removal; added entries are written to the list of pending entries. |
Occurrences[] |
scanIndexTerms(DocumentSet docs,
NodeSet contextSet,
String start,
String end)
Queries the fulltext index to retrieve information on indexed words contained in the index for the current collection. |
void |
setDocument(DocumentImpl document)
set the current document; generally called before calling an operation |
void |
setTrackMatches(int flags)
|
void |
startElement(ElementImpl impl,
NodePath currentPath,
boolean index)
corresponds to SAX function of the same name |
static boolean |
startsWithWildcard(String str)
|
void |
storeAttribute(AttrImpl node,
NodePath currentPath,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
Indexes the tokens contained in an attribute. |
void |
storeAttribute(AttrImpl node,
NodePath currentPath,
int indexingHint,
RangeIndexSpec idx,
boolean remove)
store and index given attribute |
void |
storeText(StoredNode parent,
String text,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
|
void |
storeText(TextImpl node,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
Indexes the tokens contained in a text node. |
void |
storeText(TextImpl node,
NodePath currentPath,
int indexingHint)
store and index given text node |
void |
sync()
triggers a cache sync, i.e. |
String |
toString()
|
| Methods inherited from class org.exist.storage.TextSearchEngine |
|---|
getNodesContaining, getTokenizer |
| Methods inherited from class java.util.Observable |
|---|
addObserver, countObservers, deleteObserver, deleteObservers, hasChanged, notifyObservers, notifyObservers |
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait |
| Field Detail |
|---|
public static final String FILE_NAME
public static final String FILE_KEY_IN_CONFIG
public static final double DEFAULT_WORD_CACHE_GROWTH
public static final double DEFAULT_WORD_KEY_THRESHOLD
public static final double DEFAULT_WORD_VALUE_THRESHOLD
public static final byte TEXT_SECTION
public static final byte ATTRIBUTE_SECTION
public static final byte QNAME_SECTION
public static int ATTRIBUTE_BY_QNAME
public static int ATTRIBUTE_NOT_BY_QNAME
public static int TOKENIZE
public static int DO_NOT_TOKENIZE
public static int TEXT_BY_QNAME
public static int FOURTH_OPTION
public static final int LENGTH_NODE_TYPE
public static final int LENGTH_NODE_IDS_FREQ_OFFSETS
public static final int OFFSET_NODE_TYPE
public static final int OFFSET_ELEMENT_CHILDREN_COUNT
public static final int OFFSET_ATTRIBUTE_DLN_LENGTH
public static final int OFFSET_TEXT_DLN_LENGTH
public static final int OFFSET_DLN
public static final int MAX_TOKEN_LENGTH
| Constructor Detail |
|---|
public NativeTextEngine(DBBroker broker,
byte id,
String dataDir,
Configuration config)
throws DBException
DBException| Method Detail |
|---|
public String getFileName()
getFileName in interface ContentLoadingObserverpublic String getConfigKeyForFile()
getConfigKeyForFile in interface ContentLoadingObserverpublic NativeTextEngine getInstance()
public static final boolean containsWildcards(String str)
str - The stringpublic static final boolean startsWithWildcard(String str)
public int getTrackMatches()
getTrackMatches in class TextSearchEnginepublic void setTrackMatches(int flags)
setTrackMatches in class TextSearchEnginepublic void setDocument(DocumentImpl document)
ContentLoadingObserver
setDocument in interface ContentLoadingObserver
public void storeAttribute(AttrImpl node,
NodePath currentPath,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
node - The attribute to be indexed
public void storeAttribute(AttrImpl node,
NodePath currentPath,
int indexingHint,
RangeIndexSpec idx,
boolean remove)
ContentLoadingObserver
storeAttribute in interface ContentLoadingObserver
public void storeText(TextImpl node,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
storeText in class TextSearchEngineindexSpec - The index configurationnode - The text node to be indexedindexingHint - if true, given text is indexed as a single token
if false, it is tokenized before being indexed
public void storeText(StoredNode parent,
String text,
int indexingHint,
FulltextIndexSpec indexSpec,
boolean remove)
storeText in class TextSearchEngine
public void storeText(TextImpl node,
NodePath currentPath,
int indexingHint)
ContentLoadingObserver
storeText in interface ContentLoadingObserver
public void startElement(ElementImpl impl,
NodePath currentPath,
boolean index)
ContentLoadingObserver
startElement in interface ContentLoadingObserver
public void endElement(int xpathType,
ElementImpl node,
String content)
ContentLoadingObserver
endElement in interface ContentLoadingObserver
public void removeElement(ElementImpl node,
NodePath currentPath,
String content)
ContentLoadingObserverContentLoadingObserver.flush() is called later to flush all pending entries.
removeElement in interface ContentLoadingObserverpublic void sync()
ContentLoadingObserver
sync in interface ContentLoadingObserverpublic void flush()
flush in interface ContentLoadingObserverflush in class TextSearchEnginepublic void remove()
ContentLoadingObserver
remove in interface ContentLoadingObserverpublic void dropIndex(Collection collection)
TextSearchEngine
dropIndex in interface ContentLoadingObserverdropIndex in class TextSearchEnginepublic void dropIndex(DocumentImpl document)
TextSearchEngine
dropIndex in interface ContentLoadingObserverdropIndex in class TextSearchEngine
public NodeSet getNodesContaining(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
String expr,
int type,
boolean matchAll)
throws TerminatedException
TextSearchEngine
getNodesContaining in class TextSearchEngineTerminatedException
public NodeSet getNodesExact(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
String expr)
throws TerminatedException
TerminatedException
public NodeSet getNodes(XQueryContext context,
DocumentSet docs,
NodeSet contextSet,
int axis,
QName qname,
TermMatcher matcher,
CharSequence startTerm)
throws TerminatedException
getNodes in class TextSearchEngineTerminatedException
public String[] getIndexTerms(DocumentSet docs,
TermMatcher matcher)
getIndexTerms in class TextSearchEngine
public Occurrences[] scanIndexTerms(DocumentSet docs,
NodeSet contextSet,
String start,
String end)
throws PermissionDeniedException
TextSearchEngineOccurrences for all
words contained in the index. If param end is null, all words starting with
the string sequence param start are returned. Otherwise, the method
returns all words that come after start and before end in lexical order.
scanIndexTerms in class TextSearchEnginePermissionDeniedExceptionpublic void closeAndRemove()
closeAndRemove in interface ContentLoadingObserver
public boolean close()
throws DBException
close in interface ContentLoadingObserverclose in class TextSearchEngineDBExceptionpublic void printStatistics()
printStatistics in interface ContentLoadingObserverpublic String toString()
toString in class Object
|
|||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||