blob: c9bdd8adfd69352a517a3527a6f4063469eea864 [file] [log] [blame]
// Copyright (C) 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/query/suggestion-processor.h"
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/index/embed/embedding-index.h"
#include "icing/index/index.h"
#include "icing/index/numeric/dummy-numeric-index.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/term-metadata.h"
#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
#include "unicode/uloc.h"
namespace icing {
namespace lib {
namespace {
using ::testing::IsEmpty;
using ::testing::Test;
using ::testing::UnorderedElementsAre;
std::vector<std::string> RetrieveSuggestionsText(
const std::vector<TermMetadata>& terms) {
std::vector<std::string> suggestions;
suggestions.reserve(terms.size());
for (const TermMetadata& term : terms) {
suggestions.push_back(term.content);
}
return suggestions;
}
class SuggestionProcessorTest : public Test {
protected:
SuggestionProcessorTest()
: test_dir_(GetTestTempDir() + "/icing"),
store_dir_(test_dir_ + "/store"),
schema_store_dir_(test_dir_ + "/schema_store"),
index_dir_(test_dir_ + "/index"),
numeric_index_dir_(test_dir_ + "/numeric_index"),
embedding_index_dir_(test_dir_ + "/embedding_index") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
// If we've specified using the reverse-JNI method for segmentation (i.e.
// not ICU), then we won't have the ICU data file included to set up.
// Technically, we could choose to use reverse-JNI for segmentation AND
// include an ICU data file, but that seems unlikely and our current BUILD
// setup doesn't do this.
ICING_ASSERT_OK(
// File generated via icu_data_file rule in //icing/BUILD.
icu_data_file_helper::SetUpICUDataFile(
GetTestFilePath("icing/icu.dat")));
}
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(
&filesystem_, store_dir_, &fake_clock_, schema_store_.get(),
/*force_recovery_and_revalidate_documents=*/false,
/*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
/*index_merge_size=*/1024 * 1024,
/*lite_index_sort_at_indexing=*/true,
/*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// TODO(b/249829533): switch to use persistent numeric index.
ICING_ASSERT_OK_AND_ASSIGN(
numeric_index_,
DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
ICING_ASSERT_OK_AND_ASSIGN(
embedding_index_,
EmbeddingIndex::Create(&filesystem_, embedding_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
ICING_ASSERT_OK_AND_ASSIGN(
language_segmenter_,
language_segmenter_factory::Create(segmenter_options));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/1000));
ICING_ASSERT_OK_AND_ASSIGN(
suggestion_processor_,
SuggestionProcessor::Create(
index_.get(), numeric_index_.get(), embedding_index_.get(),
language_segmenter_.get(), normalizer_.get(), document_store_.get(),
schema_store_.get(), &fake_clock_));
}
libtextclassifier3::Status AddTokenToIndex(
DocumentId document_id, SectionId section_id,
TermMatchType::Code term_match_type, const std::string& token) {
Index::Editor editor = index_->Edit(document_id, section_id,
term_match_type, /*namespace_id=*/0);
auto status = editor.BufferTerm(token.c_str());
return status.ok() ? editor.IndexAllBufferedTerms() : status;
}
void TearDown() override {
document_store_.reset();
schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
Filesystem filesystem_;
const std::string test_dir_;
const std::string store_dir_;
const std::string schema_store_dir_;
private:
IcingFilesystem icing_filesystem_;
const std::string index_dir_;
const std::string numeric_index_dir_;
const std::string embedding_index_dir_;
protected:
std::unique_ptr<Index> index_;
std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
std::unique_ptr<EmbeddingIndex> embedding_index_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
FakeClock fake_clock_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<DocumentStore> document_store_;
std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
std::unique_ptr<SuggestionProcessor> suggestion_processor_;
};
constexpr SectionId kSectionId2 = 2;
TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "2")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "bar"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "fool"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("bar f");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("bar foo"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "2")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "bar"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "cat"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "fool"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("bar cat f");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms),
UnorderedElementsAre("bar cat foo"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "2")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "fo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "bar"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "cat"),
IsOk());
// Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
// foo" could match.
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("bar OR cat f");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms),
UnorderedElementsAre("bar OR cat fo", "bar OR cat foo"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "2")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "3")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "fo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "bar"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "cat"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
TermMatchType::EXACT_ONLY, "fool"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
TermMatchType::EXACT_ONLY, "lot"),
IsOk());
SuggestionSpecProto suggestion_spec;
// Search for "((bar OR cat) OR lot) AND f"
suggestion_spec.set_prefix("bar OR cat OR lot f");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
// "fo" in document1, "foo" in document2 and "fool" in document3 could match.
EXPECT_THAT(
RetrieveSuggestionsText(terms),
UnorderedElementsAre("bar OR cat OR lot fo", "bar OR cat OR lot foo",
"bar OR cat OR lot fool"));
}
TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "2")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "bar"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "fool"),
IsOk());
ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
TermMatchType::EXACT_ONLY, "bar"),
IsOk());
SuggestionSpecProto suggestion_spec;
// Search for "bar AND FO"
suggestion_spec.set_prefix("bar FO");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
// The term is normalized.
EXPECT_THAT(RetrieveSuggestionsText(terms),
UnorderedElementsAre("bar foo", "bar fool"));
// Search for "bar AND ḞÖ"
suggestion_spec.set_prefix("bar ḞÖ");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
// The term is normalized.
EXPECT_THAT(RetrieveSuggestionsText(terms),
UnorderedElementsAre("bar foo", "bar fool"));
}
TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("nonExistTerm");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("f ");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("F");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("fO");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("Fo");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
suggestion_spec.set_prefix("FO");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
}
TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("{f}");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermMetadata> terms,
suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("[f]");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("(f)");
ICING_ASSERT_OK_AND_ASSIGN(
terms, suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(terms, IsEmpty());
}
TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "foo"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("f:");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
auto terms_or = suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
if (SearchSpecProto::default_instance().search_type() ==
SearchSpecProto::SearchType::ICING_RAW_QUERY) {
ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
EXPECT_THAT(terms, IsEmpty());
} else {
EXPECT_THAT(terms_or,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
// TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
// within a TEXT token (rather than a MINUS token) when surrounded on both
// sides by TEXT rather than just preceded by TEXT.
suggestion_spec.set_prefix("f-");
terms_or = suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
EXPECT_THAT(terms, IsEmpty());
suggestion_spec.set_prefix("f OR");
terms_or = suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
if (SearchSpecProto::default_instance().search_type() ==
SearchSpecProto::SearchType::ICING_RAW_QUERY) {
ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
EXPECT_THAT(terms, IsEmpty());
} else {
EXPECT_THAT(terms_or,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
if (SearchSpecProto::default_instance().search_type() ==
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
suggestion_spec.set_prefix(
"bar OR semanticSearch(getSearchSpecEmbedding(0), 0.5, 1)");
terms_or = suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(terms_or,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
}
TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
TermMatchType::EXACT_ONLY, "original"),
IsOk());
SuggestionSpecProto suggestion_spec;
suggestion_spec.set_prefix("OR OR - :");
suggestion_spec.set_num_to_return(10);
suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
TermMatchType::PREFIX);
auto terms_or = suggestion_processor_->QuerySuggestions(
suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
if (SearchSpecProto::default_instance().search_type() ==
SearchSpecProto::SearchType::ICING_RAW_QUERY) {
ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
EXPECT_THAT(terms, IsEmpty());
} else {
EXPECT_THAT(terms_or,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
}
} // namespace
} // namespace lib
} // namespace icing