icing/query/suggestion-processor_test.cc - platform/external/icing - Git at Google

 // Copyright (C) 2021 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "icing/query/suggestion-processor.h"

 #include <cstdint>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>

 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/portable-file-backed-proto-log.h"
 #include "icing/index/embed/embedding-index.h"
 #include "icing/index/index.h"
 #include "icing/index/numeric/dummy-numeric-index.h"
 #include "icing/index/numeric/numeric-index.h"
 #include "icing/index/term-metadata.h"
 #include "icing/jni/jni-cache.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/portable/platform.h"
 #include "icing/schema-builder.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
 #include "unicode/uloc.h"

 namespace icing {
 namespace lib {

 namespace {

 using ::testing::IsEmpty;
 using ::testing::Test;
 using ::testing::UnorderedElementsAre;

 std::vector<std::string> RetrieveSuggestionsText(
     const std::vector<TermMetadata>& terms) {
   std::vector<std::string> suggestions;
   suggestions.reserve(terms.size());
   for (const TermMetadata& term : terms) {
     suggestions.push_back(term.content);
   }
   return suggestions;
 }

 class SuggestionProcessorTest : public Test {
  protected:
   SuggestionProcessorTest()
       : test_dir_(GetTestTempDir() + "/icing"),
         store_dir_(test_dir_ + "/store"),
         schema_store_dir_(test_dir_ + "/schema_store"),
         index_dir_(test_dir_ + "/index"),
         numeric_index_dir_(test_dir_ + "/numeric_index"),
         embedding_index_dir_(test_dir_ + "/embedding_index") {}

   void SetUp() override {
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());

     if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
       // If we've specified using the reverse-JNI method for segmentation (i.e.
       // not ICU), then we won't have the ICU data file included to set up.
       // Technically, we could choose to use reverse-JNI for segmentation AND
       // include an ICU data file, but that seems unlikely and our current BUILD
       // setup doesn't do this.
       ICING_ASSERT_OK(
           // File generated via icu_data_file rule in //icing/BUILD.
           icu_data_file_helper::SetUpICUDataFile(
               GetTestFilePath("icing/icu.dat")));
     }

     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_,
         SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));

     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
         DocumentStore::Create(
             &filesystem_, store_dir_, &fake_clock_, schema_store_.get(),
             /*force_recovery_and_revalidate_documents=*/false,
             /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
             /*use_persistent_hash_map=*/false,
             PortableFileBackedProtoLog<
                 DocumentWrapper>::kDeflateCompressionLevel,
             /*initialize_stats=*/nullptr));
     document_store_ = std::move(create_result.document_store);

     Index::Options options(index_dir_,
                            /*index_merge_size=*/1024 * 1024,
                            /*lite_index_sort_at_indexing=*/true,
                            /*lite_index_sort_size=*/1024 * 8);
     ICING_ASSERT_OK_AND_ASSIGN(
         index_, Index::Create(options, &filesystem_, &icing_filesystem_));
     // TODO(b/249829533): switch to use persistent numeric index.
     ICING_ASSERT_OK_AND_ASSIGN(
         numeric_index_,
         DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
     ICING_ASSERT_OK_AND_ASSIGN(
         embedding_index_,
         EmbeddingIndex::Create(&filesystem_, embedding_index_dir_));

     language_segmenter_factory::SegmenterOptions segmenter_options(
         ULOC_US, jni_cache_.get());
     ICING_ASSERT_OK_AND_ASSIGN(
         language_segmenter_,
         language_segmenter_factory::Create(segmenter_options));

     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
                                                 /*max_term_byte_size=*/1000));

     ICING_ASSERT_OK_AND_ASSIGN(
         suggestion_processor_,
         SuggestionProcessor::Create(
             index_.get(), numeric_index_.get(), embedding_index_.get(),
             language_segmenter_.get(), normalizer_.get(), document_store_.get(),
             schema_store_.get(), &fake_clock_));
   }

   libtextclassifier3::Status AddTokenToIndex(
       DocumentId document_id, SectionId section_id,
       TermMatchType::Code term_match_type, const std::string& token) {
     Index::Editor editor = index_->Edit(document_id, section_id,
                                         term_match_type, /*namespace_id=*/0);
     auto status = editor.BufferTerm(token.c_str());
     return status.ok() ? editor.IndexAllBufferedTerms() : status;
   }

   void TearDown() override {
     document_store_.reset();
     schema_store_.reset();
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }

   Filesystem filesystem_;
   const std::string test_dir_;
   const std::string store_dir_;
   const std::string schema_store_dir_;

  private:
   IcingFilesystem icing_filesystem_;
   const std::string index_dir_;
   const std::string numeric_index_dir_;
   const std::string embedding_index_dir_;

  protected:
   std::unique_ptr<Index> index_;
   std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
   std::unique_ptr<EmbeddingIndex> embedding_index_;
   std::unique_ptr<LanguageSegmenter> language_segmenter_;
   std::unique_ptr<Normalizer> normalizer_;
   FakeClock fake_clock_;
   std::unique_ptr<SchemaStore> schema_store_;
   std::unique_ptr<DocumentStore> document_store_;
   std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
   std::unique_ptr<SuggestionProcessor> suggestion_processor_;
 };

 constexpr SectionId kSectionId2 = 2;

 TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "2")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "bar"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "fool"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("bar f");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("bar foo"));
 }

 TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "2")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "bar"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "cat"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "fool"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("bar cat f");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms),
               UnorderedElementsAre("bar cat foo"));
 }

 TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "2")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "fo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "bar"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "cat"),
               IsOk());

   // Search for "(bar OR cat) AND f" both document1 "bar fo" and document2 "cat
   // foo" could match.
   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("bar OR cat f");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms),
               UnorderedElementsAre("bar OR cat fo", "bar OR cat foo"));
 }

 TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "2")
                                                       .SetSchema("email")
                                                       .Build()));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId2,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "3")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "fo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "bar"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "cat"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
                               TermMatchType::EXACT_ONLY, "fool"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId2, kSectionId2,
                               TermMatchType::EXACT_ONLY, "lot"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   // Search for "((bar OR cat) OR lot) AND f"
   suggestion_spec.set_prefix("bar OR cat OR lot f");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   // "fo" in document1, "foo" in document2 and "fool" in document3 could match.
   EXPECT_THAT(
       RetrieveSuggestionsText(terms),
       UnorderedElementsAre("bar OR cat OR lot fo", "bar OR cat OR lot foo",
                            "bar OR cat OR lot fool"));
 }

 TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId1,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "2")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "bar"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "fool"),
               IsOk());
   ASSERT_THAT(AddTokenToIndex(documentId1, kSectionId2,
                               TermMatchType::EXACT_ONLY, "bar"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   // Search for "bar AND FO"
   suggestion_spec.set_prefix("bar FO");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   // The term is normalized.
   EXPECT_THAT(RetrieveSuggestionsText(terms),
               UnorderedElementsAre("bar foo", "bar fool"));

   // Search for "bar AND ḞÖ"
   suggestion_spec.set_prefix("bar ḞÖ");
   ICING_ASSERT_OK_AND_ASSIGN(
       terms, suggestion_processor_->QuerySuggestions(
                  suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   // The term is normalized.
   EXPECT_THAT(RetrieveSuggestionsText(terms),
               UnorderedElementsAre("bar foo", "bar fool"));
 }

 TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("nonExistTerm");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(terms, IsEmpty());
 }

 TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("f    ");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(terms, IsEmpty());
 }

 TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.

   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("F");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));

   suggestion_spec.set_prefix("fO");
   ICING_ASSERT_OK_AND_ASSIGN(
       terms, suggestion_processor_->QuerySuggestions(
                  suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));

   suggestion_spec.set_prefix("Fo");
   ICING_ASSERT_OK_AND_ASSIGN(
       terms, suggestion_processor_->QuerySuggestions(
                  suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));

   suggestion_spec.set_prefix("FO");
   ICING_ASSERT_OK_AND_ASSIGN(
       terms, suggestion_processor_->QuerySuggestions(
                  suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(RetrieveSuggestionsText(terms), UnorderedElementsAre("foo"));
 }

 TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.

   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));
   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("{f}");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
       suggestion_processor_->QuerySuggestions(
           suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(terms, IsEmpty());

   suggestion_spec.set_prefix("[f]");
   ICING_ASSERT_OK_AND_ASSIGN(
       terms, suggestion_processor_->QuerySuggestions(
                  suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(terms, IsEmpty());

   suggestion_spec.set_prefix("(f)");
   ICING_ASSERT_OK_AND_ASSIGN(
       terms, suggestion_processor_->QuerySuggestions(
                  suggestion_spec, fake_clock_.GetSystemTimeMilliseconds()));
   EXPECT_THAT(terms, IsEmpty());
 }

 TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "foo"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("f:");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   auto terms_or = suggestion_processor_->QuerySuggestions(
       suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
   if (SearchSpecProto::default_instance().search_type() ==
       SearchSpecProto::SearchType::ICING_RAW_QUERY) {
     ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
     EXPECT_THAT(terms, IsEmpty());
   } else {
     EXPECT_THAT(terms_or,
                 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   }

   // TODO(b/208654892): Update handling for hyphens to only consider it a hyphen
   // within a TEXT token (rather than a MINUS token) when surrounded on both
   // sides by TEXT rather than just preceded by TEXT.
   suggestion_spec.set_prefix("f-");
   terms_or = suggestion_processor_->QuerySuggestions(
       suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
   ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
   EXPECT_THAT(terms, IsEmpty());

   suggestion_spec.set_prefix("f OR");
   terms_or = suggestion_processor_->QuerySuggestions(
       suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
   if (SearchSpecProto::default_instance().search_type() ==
       SearchSpecProto::SearchType::ICING_RAW_QUERY) {
     ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
     EXPECT_THAT(terms, IsEmpty());
   } else {
     EXPECT_THAT(terms_or,
                 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   }

   if (SearchSpecProto::default_instance().search_type() ==
       SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
     suggestion_spec.set_prefix(
         "bar OR semanticSearch(getSearchSpecEmbedding(0), 0.5, 1)");
     terms_or = suggestion_processor_->QuerySuggestions(
         suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
     EXPECT_THAT(terms_or,
                 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   }
 }

 TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
   // Create the schema and document store
   SchemaProto schema = SchemaBuilder()
                            .AddType(SchemaTypeConfigBuilder().SetType("email"))
                            .Build();
   ASSERT_THAT(schema_store_->SetSchema(
                   schema, /*ignore_errors_and_delete_documents=*/false,
                   /*allow_circular_schema_definitions=*/false),
               IsOk());

   // These documents don't actually match to the tokens in the index. We're
   // inserting the documents to get the appropriate number of documents and
   // namespaces populated.
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId documentId0,
                              document_store_->Put(DocumentBuilder()
                                                       .SetKey("namespace1", "1")
                                                       .SetSchema("email")
                                                       .Build()));

   ASSERT_THAT(AddTokenToIndex(documentId0, kSectionId2,
                               TermMatchType::EXACT_ONLY, "original"),
               IsOk());

   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("OR OR - :");
   suggestion_spec.set_num_to_return(10);
   suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
       TermMatchType::PREFIX);

   auto terms_or = suggestion_processor_->QuerySuggestions(
       suggestion_spec, fake_clock_.GetSystemTimeMilliseconds());
   if (SearchSpecProto::default_instance().search_type() ==
       SearchSpecProto::SearchType::ICING_RAW_QUERY) {
     ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms, terms_or);
     EXPECT_THAT(terms, IsEmpty());
   } else {
     EXPECT_THAT(terms_or,
                 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
   }
 }

 }  // namespace

 }  // namespace lib
 }  // namespace icing