blob: e22d5f423419f801898c9579603424f51a05d2ef [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/scoring/scorer.h"
#include <cstdint>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/index/embed/embedding-query-results.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/usage.pb.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/scorer-factory.h"
#include "icing/scoring/scorer-test-utils.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
namespace {
using ::testing::Eq;
class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
protected:
ScorerTest()
: test_dir_(GetTestTempDir() + "/icing"),
doc_store_dir_(test_dir_ + "/doc_store"),
schema_store_dir_(test_dir_ + "/schema_store") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
fake_clock1_.SetSystemTimeMilliseconds(1571100000000);
fake_clock2_.SetSystemTimeMilliseconds(1572200000000);
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(
&filesystem_, doc_store_dir_, &fake_clock1_, schema_store_.get(),
/*force_recovery_and_revalidate_documents=*/false,
/*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
SchemaProto test_email_schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
PropertyConfigBuilder()
.SetName("subject")
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
test_email_schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
document_store_.reset();
schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
DocumentStore* document_store() { return document_store_.get(); }
SchemaStore* schema_store() { return schema_store_.get(); }
const FakeClock& fake_clock1() { return fake_clock1_; }
const FakeClock& fake_clock2() { return fake_clock2_; }
void SetFakeClock1Time(int64_t new_time) {
fake_clock1_.SetSystemTimeMilliseconds(new_time);
}
SearchSpecProto::EmbeddingQueryMetricType::Code default_semantic_metric_type =
SearchSpecProto::EmbeddingQueryMetricType::DOT_PRODUCT;
EmbeddingQueryResults empty_embedding_query_results;
private:
const std::string test_dir_;
const std::string doc_store_dir_;
const std::string schema_store_dir_;
Filesystem filesystem_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<DocumentStore> document_store_;
FakeClock fake_clock1_;
FakeClock fake_clock2_;
};
UsageReport CreateUsageReport(std::string name_space, std::string uri,
int64_t timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
usage_report.set_document_uri(uri);
usage_report.set_usage_timestamp_ms(timestamp_ms);
usage_report.set_usage_type(usage_type);
return usage_report;
}
TEST_P(ScorerTest, CreationWithNullDocumentStoreShouldFail) {
EXPECT_THAT(
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
/*default_score=*/0, default_semantic_metric_type,
/*document_store=*/nullptr, schema_store(),
fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_P(ScorerTest, CreationWithNullSchemaStoreShouldFail) {
EXPECT_THAT(
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
/*schema_store=*/nullptr, fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_P(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
/*default_score=*/10, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
// Non existent document id
DocHitInfo docHitInfo = DocHitInfo(/*document_id_in=*/1);
// The caller-provided default score is returned
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
}
TEST_P(ScorerTest, ShouldGetDefaultDocumentScore) {
// Creates a test document with the default document score 0
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
/*default_score=*/10, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
}
TEST_P(ScorerTest, ShouldGetCorrectDocumentScore) {
// Creates a test document with document score 5
DocumentProto test_document =
DocumentBuilder()
.SetScore(5)
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock2().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
}
// See scoring-processor_test.cc and icing-search-engine_test.cc for better
// Bm25F scoring tests.
TEST_P(ScorerTest, QueryIteratorNullRelevanceScoreShouldReturnDefaultScore) {
// Creates a test document with document score 5
DocumentProto test_document =
DocumentBuilder()
.SetScore(5)
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock2().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, GetParam()),
/*default_score=*/10, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
}
TEST_P(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
// Creates test_document1 with fake timestamp1
DocumentProto test_document1 =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
// Creates test_document2 with fake timestamp2
DocumentProto test_document2 =
DocumentBuilder()
.SetKey("icing", "email/2")
.SetSchema("email")
.AddStringProperty("subject", "subject foo 2")
.SetCreationTimestampMs(fake_clock2().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store()->Put(test_document1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store()->Put(test_document2));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo1 = DocHitInfo(document_id1);
DocHitInfo docHitInfo2 = DocHitInfo(document_id2);
EXPECT_THAT(scorer->GetScore(docHitInfo1),
Eq(fake_clock1().GetSystemTimeMilliseconds()));
EXPECT_THAT(scorer->GetScore(docHitInfo2),
Eq(fake_clock2().GetSystemTimeMilliseconds()));
}
TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType1) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
// Create 3 scorers for 3 different usage types.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report a type1 usage.
UsageReport usage_report_type1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType2) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
// Create 3 scorers for 3 different usage types.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report a type2 usage.
UsageReport usage_report_type2 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
TEST_P(ScorerTest, ShouldGetCorrectUsageCountScoreForType3) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
// Create 3 scorers for 3 different usage types.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT, GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report a type1 usage.
UsageReport usage_report_type3 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
UsageReport::USAGE_TYPE3);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
}
TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
// Create 3 scorers for 3 different usage types.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE2_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE3_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
UsageReport usage_report_type1_time1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 5000ms, score should be updated.
UsageReport usage_report_type1_time5 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 3000ms, score should not be updated.
UsageReport usage_report_type1_time3 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
// Create 3 scorers for 3 different usage types.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE2_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE3_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
UsageReport usage_report_type2_time1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 5000ms, score should be updated.
UsageReport usage_report_type2_time5 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 3000ms, score should not be updated.
UsageReport usage_report_type2_time3 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
TEST_P(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
// Create 3 scorers for 3 different usage types.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer2,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE2_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer3,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE3_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
UsageReport usage_report_type3_time1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
UsageReport::USAGE_TYPE3);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1000));
// Report usage with timestamp = 5000ms, score should be updated.
UsageReport usage_report_type3_time5 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
UsageReport::USAGE_TYPE3);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
// Report usage with timestamp = 3000ms, score should not be updated.
UsageReport usage_report_type3_time3 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
UsageReport::USAGE_TYPE3);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
}
TEST_P(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::NONE, GetParam()),
/*default_score=*/3, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo1 = DocHitInfo(/*document_id_in=*/0);
DocHitInfo docHitInfo2 = DocHitInfo(/*document_id_in=*/1);
DocHitInfo docHitInfo3 = DocHitInfo(/*document_id_in=*/2);
EXPECT_THAT(scorer->GetScore(docHitInfo1), Eq(3));
EXPECT_THAT(scorer->GetScore(docHitInfo2), Eq(3));
EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(3));
ICING_ASSERT_OK_AND_ASSIGN(
scorer,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::NONE, GetParam()),
/*default_score=*/111, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
docHitInfo1 = DocHitInfo(/*document_id_in=*/4);
docHitInfo2 = DocHitInfo(/*document_id_in=*/5);
docHitInfo3 = DocHitInfo(/*document_id_in=*/6);
EXPECT_THAT(scorer->GetScore(docHitInfo1), Eq(111));
EXPECT_THAT(scorer->GetScore(docHitInfo2), Eq(111));
EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(111));
}
TEST_P(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
DocumentProto test_document =
DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store()->Put(test_document));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer1,
scorer_factory::Create(
CreateScoringSpecForRankingStrategy(
ScoringSpecProto::RankingStrategy::
USAGE_TYPE1_LAST_USED_TIMESTAMP,
GetParam()),
/*default_score=*/0, default_semantic_metric_type, document_store(),
schema_store(), fake_clock1().GetSystemTimeMilliseconds(),
/*join_children_fetcher=*/nullptr, &empty_embedding_query_results));
DocHitInfo docHitInfo = DocHitInfo(document_id);
// Create usage report for the maximum allowable timestamp.
UsageReport usage_report_type1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1",
/*timestamp_ms=*/std::numeric_limits<uint32_t>::max() * 1000.0,
UsageReport::USAGE_TYPE1);
double max_int_usage_timestamp_score =
std::numeric_limits<uint32_t>::max() * 1000.0;
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(max_int_usage_timestamp_score));
}
INSTANTIATE_TEST_SUITE_P(ScorerTest, ScorerTest,
testing::Values(ScorerTestingMode::kNormal,
ScorerTestingMode::kAdvanced));
} // namespace
} // namespace lib
} // namespace icing