diff --git a/CMakeLists.txt b/CMakeLists.txt index 838b25ad..62589f5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,11 +101,11 @@ configure_file( IMMEDIATE @ONLY ) -add_custom_target( - uninstall - "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" - VERBATIM -) +#add_custom_target( +# uninstall +# "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" +# VERBATIM +#) if(ENABLE_PACKAGING) include(CreateLucene++Packages) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h new file mode 100644 index 00000000..47f161e7 --- /dev/null +++ b/include/lucene++/Lucene_c.h @@ -0,0 +1,39 @@ +#ifndef _LUCENE_C_H +#define _LUCENE_C_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + + +typedef struct index_t index_t; +typedef struct index_document_t index_document_t; +__attribute__((visibility("default"))) index_t* index_open(const char *path); + + +__attribute__((visibility("default"))) int index_put(index_t *index, index_document_t *idoc); + +__attribute__((visibility("default"))) int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult); + +__attribute__((visibility("default"))) int index_multi_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult); + +__attribute__((visibility("default"))) void index_close(index_t *index); + +__attribute__((visibility("default"))) int index_optimize(index_t *index); + + +__attribute__((visibility("default"))) index_document_t* index_document_create(); + +__attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char *field, int nFields, const char *val, int nVals, int index); + +__attribute__((visibility("default"))) void index_document_destroy(index_document_t *doc); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 690f3826..844e81e4 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -23,6 +23,7 @@ file(GLOB_RECURSE lucene_sources "queryparser/*.cpp" "store/*.cpp" "util/*.c*" + "c/*.cc" ) file(GLOB_RECURSE lucene_internal_headers diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc new file mode 100644 index 00000000..f9817c69 --- /dev/null +++ b/src/core/c/Lucene_c.cc @@ -0,0 +1,140 @@ +#include "Lucene_c.h" + + +#include "targetver.h" +#include +#include "LuceneHeaders.h" +#include "FileUtils.h" +#include "MiscUtils.h" +#include "ConstantScoreQuery.h" + +using namespace Lucene; +String UID = L"U$DID"; +static const int MAX_NUM_OF_OUTPUT = 1000*10000; + +extern "C" { + +struct index_t { IndexWriterPtr rep; }; +struct index_document_t { DocumentPtr rep;}; + + + +index_t* index_open(const char *path) { + IndexWriterPtr writer = newLucene(FSDirectory::open(StringUtils::toString(path)), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); + if (writer == NULL) {return NULL;} + index_t *index = new index_t; + if (index == NULL) { return NULL; } + index->rep = writer; + return index; +} + +int index_put(index_t *index, index_document_t *idoc) { + index->rep->addDocument(idoc->rep); + return 1; +} + +int index_multi_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult) { + if (index->rep == NULL) { return -1; } + IndexReaderPtr reader = index->rep->getReader() ; + IndexSearcherPtr searcher = newLucene(reader); + BooleanQueryPtr bQuery = newLucene(); + BooleanClause::Occur occur; + + if (opera == 0) { + occur = BooleanClause::MUST; + } else if (opera == 1) { + occur = BooleanClause::SHOULD; + } else if (opera == 2) { + occur = BooleanClause::MUST_NOT; + } + for (int i = 0; i < nQuery; i++) { + if (qSet[i] == 0) { + bQuery->add(newLucene(newLucene(StringUtils::toString(field[i]),StringUtils::toString(key[i]))), occur); + } else if (qSet[i] == 1) { + bQuery->add(newLucene(newLucene(StringUtils::toString(field[i]),StringUtils::toString(key[i]))), occur); + } else if (qSet[i] == 2) { + //other query type + } else if (qSet[i] == 3) { + + } + } + Collection hits = searcher->search(bQuery, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs; + if (*nResult < hits.size()) { + *result = (int *)realloc(*result, hits.size() * sizeof(int)); + *nResult = hits.size(); + } + for (int i = 0; i < hits.size(); i++) { + (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); + } + return 0; +} +int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult) { + if(type == 0) { + IndexReaderPtr reader = index->rep->getReader() ; + IndexSearcherPtr searcher = newLucene(reader); + QueryPtr query = newLucene(newLucene(StringUtils::toString(*field),StringUtils::toString(*key))); + Collection hits = searcher->search(query, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs; + if (*nResult < hits.size()) { + *result = (int *)realloc(*result, hits.size() * sizeof(int)); + *nResult = hits.size(); + } + for (int i = 0; i < hits.size(); i++) { + (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); + } + + } else if (type == 1) { + //PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((*field)), StringUtils::toString((*key)))); + //QueryPtr query = newLucene(filter); + PrefixQueryPtr query = newLucene(newLucene(StringUtils::toString(*field), StringUtils::toString(*key))); + IndexReaderPtr reader = index->rep->getReader() ; + IndexSearcherPtr searcher = newLucene(reader); + Collection hits = searcher->search(query, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs; + if (*nResult < hits.size()) { + *result = (int *)realloc(*result, hits.size() * sizeof(int)); + *nResult = hits.size(); + } + for (int i = 0; i < hits.size(); i++) { + (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); + } + } else if (type == 2) { + + } else if (type == 3); + return 1; +} + +void index_close(index_t *index) { + if (index->rep) { + index->rep->close(); + index->rep = NULL; + } + delete index; +} + + + +int index_optimize(index_t *index) { + index->rep->optimize(); + return 1; +} +index_document_t* index_document_create() { + DocumentPtr doc = newLucene(); + if (doc == NULL) { return NULL; } + index_document_t *idoc = new index_document_t; + idoc->rep = doc; + return idoc; +} + + +void index_document_destroy(index_document_t *idoc) { + if (idoc == NULL) { return; } + idoc->rep = NULL; + delete idoc; +} +void index_document_add(index_document_t *idoc, const char *field, int nFields, const char *val, int nVals, int32_t index) { + if (index) { + idoc->rep->add(newLucene(StringUtils::toString(field), StringUtils::toString(val), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + } else { + idoc->rep->add(newLucene(UID, StringUtils::toString(val), Field::STORE_YES, Field::INDEX_NO)); + } +} +} diff --git a/src/demo/indexfiles/main.cpp b/src/demo/indexfiles/main.cpp index e6911f48..7b247938 100644 --- a/src/demo/indexfiles/main.cpp +++ b/src/demo/indexfiles/main.cpp @@ -17,7 +17,8 @@ #include "LuceneHeaders.h" #include "FileUtils.h" #include "MiscUtils.h" - +#include "ConstantScoreQuery.h" +#include "BooleanQuery.h" using namespace Lucene; int32_t docNumber = 0; @@ -42,6 +43,15 @@ DocumentPtr fileDocument(const String& docFile) { return doc; } +int addDoc(IndexWriterPtr& writer) { + DocumentPtr doc = newLucene(); + doc->add(newLucene(L"tag1", L"cpu1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + doc->add(newLucene(L"tag2", L"cpu2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + doc->add(newLucene(L"uid", StringUtils::toString(10), Field::STORE_YES, Field::INDEX_NO)); + writer->addDocument(doc); + return 0; + +} void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) { HashSet dirList(HashSet::newInstance()); if (!FileUtils::listDirectory(sourceDir, false, dirList)) { @@ -65,47 +75,83 @@ void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) { /// Index all text files under a directory. int main(int argc, char* argv[]) { - if (argc != 3) { + if (argc != 2) { std::wcout << L"Usage: indexfiles.exe \n"; return 1; } - String sourceDir(StringUtils::toUnicode(argv[1])); - String indexDir(StringUtils::toUnicode(argv[2])); + //String sourceDir(StringUtils::toUnicode(argv[1])); + String indexDir(StringUtils::toUnicode(argv[1])); - if (!FileUtils::isDirectory(sourceDir)) { - std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; - return 1; - } + //if (!FileUtils::isDirectory(sourceDir)) { + // std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; + // return 1; + //} - if (!FileUtils::isDirectory(indexDir)) { - if (!FileUtils::createDirectory(indexDir)) { - std::wcout << L"Unable to create directory: " << indexDir << L"\n"; - return 1; - } - } + //if (!FileUtils::isDirectory(indexDir)) { + // if (!FileUtils::createDirectory(indexDir)) { + // std::wcout << L"Unable to create directory: " << indexDir << L"\n"; + // return 1; + // } + //} uint64_t beginIndex = MiscUtils::currentTimeMillis(); try { IndexWriterPtr writer = newLucene(FSDirectory::open(indexDir), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); std::wcout << L"Indexing to directory: " << indexDir << L"...\n"; + for (int i = 0; i < 10000; i++) { + addDoc(writer); + } + IndexReaderPtr reader = writer->getReader(); + // PrefixFilter combined with ConstantScoreQuery + PrefixFilterPtr filter = newLucene(newLucene(L"tag1", L"cp")); + QueryPtr query = newLucene(filter); + IndexSearcherPtr searcher = newLucene(reader); + Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + + + BooleanQueryPtr q = newLucene(); + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::SHOULD); + q->add(newLucene(newLucene(L"tag2", L"cpu2")), BooleanClause::SHOULD); + hits = searcher->search(q, FilterPtr(), 100000000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::SHOULD); + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::MUST); + hits = searcher->search(q, 100000000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::MUST); + q->add(newLucene(newLucene(L"tag2", L"cpu1")), BooleanClause::MUST); + hits = searcher->search(q, 10000000)->scoreDocs; + + - indexDocs(writer, sourceDir); + BooleanQueryPtr bquery = newLucene(); + bquery->add(newLucene(newLucene(L"tag1", L"xxx")), BooleanClause::SHOULD); + bquery->add(newLucene(newLucene(L"tag2", L"cpuxxx")), BooleanClause::SHOULD); + hits = searcher->search(bquery, FilterPtr(), 10000000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + + //EXPECT_EQ(4, hits.size()); - uint64_t endIndex = MiscUtils::currentTimeMillis(); - uint64_t indexDuration = endIndex - beginIndex; - std::wcout << L"Index time: " << indexDuration << L" milliseconds\n"; - std::wcout << L"Optimizing...\n"; + ///indexDocs(writer, sourceDir); - writer->optimize(); + //uint64_t endIndex = MiscUtils::currentTimeMillis(); + //uint64_t indexDuration = endIndex - beginIndex; + //std::wcout << L"Index time: " << indexDuration << L" milliseconds\n"; + //std::wcout << L"Optimizing...\n"; - uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex; - std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n"; + //writer->optimize(); + //uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex; + //std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n"; + writer->close(); - std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; + //std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1;