Revision control
Copy as Markdown
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
*/
use rusqlite::types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef};
use rusqlite::{named_params, Result as RusqliteResult, ToSql};
use sql_support::ConnExt;
use url::form_urlencoded;
use crate::{
db::SuggestDao,
provider::SuggestionProvider,
rs::{DownloadedYelpSuggestion, SuggestRecordId},
suggestion::Suggestion,
suggestion::YelpSubjectType,
Result, SuggestionQuery,
};
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
enum Modifier {
Pre = 0,
Post = 1,
Yelp = 2,
LocationSign = 3,
}
impl ToSql for Modifier {
fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(*self as u8))
}
}
impl ToSql for YelpSubjectType {
fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(*self as u8))
}
}
impl FromSql for YelpSubjectType {
fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
if value.as_i64().unwrap_or_default() == 0 {
Ok(YelpSubjectType::Service)
} else {
Ok(YelpSubjectType::Business)
}
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
enum FindFrom {
First,
Last,
}
/// This module assumes like following query.
/// "Yelp-modifier? Pre-modifier? Subject Post-modifier? (Location-modifier | Location-sign Location?)? Yelp-modifier?"
/// For example, the query below is valid.
/// "Yelp (Yelp-modifier) Best(Pre-modifier) Ramen(Subject) Delivery(Post-modifier) In(Location-sign) Tokyo(Location)"
/// Also, as everything except Subject is optional, "Ramen" will be also valid query.
/// However, "Best Best Ramen" and "Ramen Best" is out of the above appearance order rule,
/// parsing will be failed. Also, every words except Location needs to be registered in DB.
/// Please refer to the query test in store.rs for all of combination.
/// Currently, the maximum query length is determined while referring to having word lengths in DB
/// and location names.
/// max subject: 50 + pre-modifier: 10 + post-modifier: 10 + location-sign: 7 + location: 50 = 127 = 150.
const MAX_QUERY_LENGTH: usize = 150;
/// The max number of words consisting the modifier. To improve the SQL performance by matching with
/// "keyword=:modifier" (please see is_modifier()), define this how many words we should check.
const MAX_MODIFIER_WORDS_NUMBER: usize = 2;
/// At least this many characters must be typed for a subject to be matched.
const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
#[derive(Debug, PartialEq)]
struct FindSubjectData<'a> {
// The keyword in DB (but the case is inherited by query).
subject: String,
// Whether or not the keyword is exact match.
exact_match: bool,
// The subject type.
subject_type: YelpSubjectType,
// Words after removed matching subject.
rest: &'a [&'a str],
}
impl SuggestDao<'_> {
/// Inserts the suggestions for Yelp attachment into the database.
pub(crate) fn insert_yelp_suggestions(
&mut self,
record_id: &SuggestRecordId,
suggestion: &DownloadedYelpSuggestion,
) -> Result<()> {
for keyword in &suggestion.subjects {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_subjects(record_id, keyword, subject_type) VALUES(:record_id, :keyword, :subject_type)",
named_params! {
":record_id": record_id.as_str(),
":keyword": keyword,
":subject_type": YelpSubjectType::Service,
},
)?;
}
for keyword in suggestion.business_subjects.as_ref().unwrap_or(&vec![]) {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_subjects(record_id, keyword, subject_type) VALUES(:record_id, :keyword, :subject_type)",
named_params! {
":record_id": record_id.as_str(),
":keyword": keyword,
":subject_type": YelpSubjectType::Business,
},
)?;
}
for keyword in &suggestion.pre_modifiers {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::Pre,
":keyword": keyword,
},
)?;
}
for keyword in &suggestion.post_modifiers {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::Post,
":keyword": keyword,
},
)?;
}
for keyword in &suggestion.yelp_modifiers {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::Yelp,
":keyword": keyword,
},
)?;
}
for keyword in &suggestion.location_signs {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::LocationSign,
":keyword": keyword,
},
)?;
}
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)",
named_params! {
":record_id": record_id.as_str(),
":icon_id": suggestion.icon_id,
":score": suggestion.score,
},
)?;
Ok(())
}
/// Fetch Yelp suggestion from given user's query.
pub(crate) fn fetch_yelp_suggestions(
&self,
query: &SuggestionQuery,
) -> Result<Vec<Suggestion>> {
if !query.providers.contains(&SuggestionProvider::Yelp) {
return Ok(vec![]);
}
if query.keyword.len() > MAX_QUERY_LENGTH {
return Ok(vec![]);
}
let query_vec: Vec<_> = query.keyword.split_whitespace().collect();
let mut query_words: &[&str] = &query_vec;
let pre_yelp_modifier_tuple =
self.find_modifier(query_words, Modifier::Yelp, FindFrom::First)?;
if let Some((_, rest)) = pre_yelp_modifier_tuple {
query_words = rest;
}
let pre_modifier_tuple = self.find_modifier(query_words, Modifier::Pre, FindFrom::First)?;
if let Some((_, rest)) = pre_modifier_tuple {
query_words = rest;
}
let Some(subject_data) = self.find_subject(query_words)? else {
return Ok(vec![]);
};
query_words = subject_data.rest;
let post_modifier_tuple =
self.find_modifier(query_words, Modifier::Post, FindFrom::First)?;
if let Some((_, rest)) = post_modifier_tuple {
query_words = rest;
}
let location_sign_tuple =
self.find_modifier(query_words, Modifier::LocationSign, FindFrom::First)?;
if let Some((_, rest)) = location_sign_tuple {
query_words = rest;
}
let post_yelp_modifier_tuple =
self.find_modifier(query_words, Modifier::Yelp, FindFrom::Last)?;
if let Some((_, rest)) = post_yelp_modifier_tuple {
query_words = rest;
}
let location = if query_words.is_empty() {
None
} else {
Some(query_words.join(" "))
};
let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
let builder = SuggestionBuilder {
subject: &subject_data.subject,
subject_exact_match: subject_data.exact_match,
subject_type: subject_data.subject_type,
pre_modifier: pre_modifier_tuple.map(|(words, _)| words.to_string()),
post_modifier: post_modifier_tuple.map(|(words, _)| words.to_string()),
location_sign: location_sign_tuple.map(|(words, _)| words.to_string()),
location,
icon,
icon_mimetype,
score,
};
Ok(vec![builder.into()])
}
/// Find the modifier for given query and modifier type.
/// Find from last word, if set FindFrom::Last to find_from.
/// It returns Option<tuple> as follows:
/// (
/// String: The keyword in DB (but the case is inherited by query).
/// &[&str]: Words after removed matching modifier.
/// )
fn find_modifier<'a>(
&self,
query_words: &'a [&'a str],
modifier_type: Modifier,
find_from: FindFrom,
) -> Result<Option<(String, &'a [&'a str])>> {
if query_words.is_empty() {
return Ok(None);
}
for n in (1..=std::cmp::min(MAX_MODIFIER_WORDS_NUMBER, query_words.len())).rev() {
let Some((candidate_chunk, rest)) = (match find_from {
FindFrom::First => query_words.split_at_checked(n),
FindFrom::Last => query_words
.split_at_checked(query_words.len() - n)
.map(|(front, back)| (back, front)),
}) else {
continue;
};
let mut candidate = candidate_chunk.join(" ");
if let Some(keyword_lowercase) = self.conn.try_query_one::<String, _>(
if n == query_words.len() {
"
SELECT keyword FROM yelp_modifiers
WHERE type = :type AND keyword BETWEEN :word AND :word || x'FFFF'
LIMIT 1
"
} else {
"
SELECT keyword FROM yelp_modifiers
WHERE type = :type AND keyword = :word
LIMIT 1
"
},
named_params! {
":type": modifier_type,
":word": candidate.to_lowercase(),
},
true,
)? {
// Preserve the query as the user typed it including its case.
candidate.push_str(keyword_lowercase.get(candidate.len()..).unwrap_or_default());
return Ok(Some((candidate, rest)));
}
}
Ok(None)
}
/// Find the subject for given query.
/// It returns Option<FindSubjectData>.
fn find_subject<'a>(&self, query_words: &'a [&'a str]) -> Result<Option<FindSubjectData<'a>>> {
if query_words.is_empty() {
return Ok(None);
}
let mut query_string = query_words.join(" ");
// This checks if keyword is a substring of the query.
if let Ok((keyword_lowercase, subject_type)) = self.conn.query_row_and_then_cachable(
"SELECT keyword, subject_type
FROM yelp_subjects
WHERE :query BETWEEN keyword AND keyword || ' ' || x'FFFF'
ORDER BY LENGTH(keyword) ASC, keyword ASC
LIMIT 1",
named_params! {
":query": query_string.to_lowercase(),
},
|row| -> Result<_> {
Ok((row.get::<_, String>(0)?, row.get::<_, YelpSubjectType>(1)?))
},
true,
) {
// Preserve the query as the user typed it including its case.
return Ok(query_string.get(0..keyword_lowercase.len()).map(|keyword| {
let count = keyword.split_whitespace().count();
FindSubjectData {
subject: keyword.to_string(),
exact_match: true,
subject_type,
rest: query_words.get(count..).unwrap_or_default(),
}
}));
};
if query_string.len() < SUBJECT_PREFIX_MATCH_THRESHOLD {
return Ok(None);
}
// Oppositely, this checks if the query is a substring of keyword.
if let Ok((keyword_lowercase, subject_type)) = self.conn.query_row_and_then_cachable(
"SELECT keyword, subject_type
FROM yelp_subjects
WHERE keyword BETWEEN :query AND :query || x'FFFF'
ORDER BY LENGTH(keyword) ASC, keyword ASC
LIMIT 1",
named_params! {
":query": query_string.to_lowercase(),
},
|row| -> Result<_> {
Ok((row.get::<_, String>(0)?, row.get::<_, YelpSubjectType>(1)?))
},
true,
) {
// Preserve the query as the user typed it including its case.
return Ok(keyword_lowercase
.get(query_string.len()..)
.map(|keyword_rest| {
query_string.push_str(keyword_rest);
let count =
std::cmp::min(query_words.len(), query_string.split_whitespace().count());
FindSubjectData {
subject: query_string,
exact_match: false,
subject_type,
rest: query_words.get(count..).unwrap_or_default(),
}
}));
};
Ok(None)
}
/// Fetch the custom details for Yelp suggestions.
/// It returns the location tuple as follows:
/// (
/// Option<Vec<u8>>: Icon data. If not found, returns None.
/// Option<String>: Mimetype of the icon data. If not found, returns None.
/// f64: Reflects score field in the yelp_custom_details table.
/// )
///
/// Note that there should be only one record in `yelp_custom_details`
/// as all the Yelp assets are stored in the attachment of a single record
/// on Remote Settings. The following query will perform a table scan against
/// `yelp_custom_details` followed by an index search against `icons`,
/// which should be fine since there is only one record in the first table.
fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, Option<String>, f64)> {
let result = self.conn.query_row_and_then_cachable(
r#"
SELECT
i.data, i.mimetype, y.score
FROM
yelp_custom_details y
LEFT JOIN
icons i
ON y.icon_id = i.id
LIMIT
1
"#,
(),
|row| -> Result<_> {
Ok((
row.get::<_, Option<Vec<u8>>>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, f64>(2)?,
))
},
true,
)?;
Ok(result)
}
}
struct SuggestionBuilder<'a> {
subject: &'a str,
subject_exact_match: bool,
subject_type: YelpSubjectType,
pre_modifier: Option<String>,
post_modifier: Option<String>,
location_sign: Option<String>,
location: Option<String>,
icon: Option<Vec<u8>>,
icon_mimetype: Option<String>,
score: f64,
}
impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
fn from(builder: SuggestionBuilder<'a>) -> Suggestion {
let description = [
builder.pre_modifier.as_deref(),
Some(builder.subject),
builder.post_modifier.as_deref(),
]
.iter()
.flatten()
.copied()
.collect::<Vec<_>>()
.join(" ");
let mut parameters = form_urlencoded::Serializer::new(String::new());
parameters.append_pair("find_desc", &description);
if let Some(location) = &builder.location {
parameters.append_pair("find_loc", location);
}
url.push_str(¶meters.finish());
let title = [
builder.pre_modifier.as_deref(),
Some(builder.subject),
builder.post_modifier.as_deref(),
builder.location_sign.as_deref(),
builder.location.as_deref(),
]
.iter()
.flatten()
.copied()
.collect::<Vec<_>>()
.join(" ");
Suggestion::Yelp {
url,
title,
icon: builder.icon,
icon_mimetype: builder.icon_mimetype,
score: builder.score,
has_location_sign: builder.location_sign.is_some(),
subject_exact_match: builder.subject_exact_match,
subject_type: builder.subject_type,
location_param: "find_loc".to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{store::tests::TestStore, testing::*, SuggestIngestionConstraints};
#[test]
fn yelp_functions() -> anyhow::Result<()> {
before_each();
let store = TestStore::new(
MockRemoteSettingsClient::default()
.with_record(SuggestionProvider::Yelp.record("data-4", json!([ramen_yelp()])))
.with_record(SuggestionProvider::Yelp.icon(yelp_favicon())),
);
store.ingest(SuggestIngestionConstraints {
providers: Some(vec![SuggestionProvider::Yelp]),
..SuggestIngestionConstraints::all_providers()
});
store.read(|dao| {
type FindModifierTestCase<'a> =
(&'a str, Modifier, FindFrom, Option<(String, &'a [&'a str])>);
let find_modifer_tests: &[FindModifierTestCase] = &[
// Query, Modifier, FindFrom, Expected result.
("", Modifier::Pre, FindFrom::First, None),
("", Modifier::Post, FindFrom::First, None),
("", Modifier::Yelp, FindFrom::First, None),
// Single word modifier.
(
"b",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"be",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"bes",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"best",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"best ",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"best r",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &["r"])),
),
(
"best ramen",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &["ramen"])),
),
(
"best spicy ramen",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &["spicy", "ramen"])),
),
(
"delivery",
Modifier::Post,
FindFrom::First,
Some(("delivery".to_string(), &[])),
),
(
"yelp",
Modifier::Yelp,
FindFrom::First,
Some(("yelp".to_string(), &[])),
),
(
"same_modifier",
Modifier::Pre,
FindFrom::First,
Some(("same_modifier".to_string(), &[])),
),
(
"same_modifier",
Modifier::Post,
FindFrom::First,
Some(("same_modifier".to_string(), &[])),
),
("same_modifier", Modifier::Yelp, FindFrom::First, None),
// Multiple word modifier.
(
"s",
Modifier::Pre,
FindFrom::First,
Some(("same_modifier".to_string(), &[])),
),
(
"su",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super b",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super be",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super bes",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super best",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super best ramen",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &["ramen"])),
),
(
"super delivery",
Modifier::Post,
FindFrom::First,
Some(("super delivery".to_string(), &[])),
),
(
"yelp keyword",
Modifier::Yelp,
FindFrom::First,
Some(("yelp keyword".to_string(), &[])),
),
// Different modifier or findfrom.
("best ramen", Modifier::Post, FindFrom::First, None),
("best ramen", Modifier::Yelp, FindFrom::First, None),
("best ramen", Modifier::Pre, FindFrom::Last, None),
(
"ramen best",
Modifier::Pre,
FindFrom::Last,
Some(("best".to_string(), &["ramen"])),
),
// Keywords similar to modifire.
("bestabc", Modifier::Post, FindFrom::First, None),
("bestabc ramen", Modifier::Post, FindFrom::First, None),
// Keep chars case.
(
"BeSt SpIcY rAmEn",
Modifier::Pre,
FindFrom::First,
Some(("BeSt".to_string(), &["SpIcY", "rAmEn"])),
),
(
"SpIcY rAmEn DeLiVeRy",
Modifier::Post,
FindFrom::Last,
Some(("DeLiVeRy".to_string(), &["SpIcY", "rAmEn"])),
),
// Prefix match is available only for last words.
("be ramen", Modifier::Pre, FindFrom::First, None),
("bes ramen", Modifier::Pre, FindFrom::First, None),
];
for (query, modifier, findfrom, expected) in find_modifer_tests {
assert_eq!(
dao.find_modifier(
&query.split_whitespace().collect::<Vec<_>>(),
*modifier,
*findfrom
)?,
*expected
);
}
type FindSubjectTestCase<'a> = (&'a str, Option<FindSubjectData<'a>>);
let find_subject_tests: &[FindSubjectTestCase] = &[
// Query, Expected result.
("", None),
("r", None),
(
"ra",
Some(FindSubjectData {
subject: "rats".to_string(),
exact_match: false,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"ram",
Some(FindSubjectData {
subject: "ramen".to_string(),
exact_match: false,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"rame",
Some(FindSubjectData {
subject: "ramen".to_string(),
exact_match: false,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"ramen",
Some(FindSubjectData {
subject: "ramen".to_string(),
exact_match: true,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"spi",
Some(FindSubjectData {
subject: "spicy ramen".to_string(),
exact_match: false,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"spicy ra ",
Some(FindSubjectData {
subject: "spicy ramen".to_string(),
exact_match: false,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"spicy ramen",
Some(FindSubjectData {
subject: "spicy ramen".to_string(),
exact_match: true,
subject_type: YelpSubjectType::Service,
rest: &[],
}),
),
(
"spicy ramen gogo",
Some(FindSubjectData {
subject: "spicy ramen".to_string(),
exact_match: true,
subject_type: YelpSubjectType::Service,
rest: &["gogo"],
}),
),
(
"SpIcY rAmEn GoGo",
Some(FindSubjectData {
subject: "SpIcY rAmEn".to_string(),
exact_match: true,
subject_type: YelpSubjectType::Service,
rest: &["GoGo"],
}),
),
("ramenabc", None),
("ramenabc xyz", None),
("spicy ramenabc", None),
("spicy ramenabc xyz", None),
(
"ramen abc",
Some(FindSubjectData {
subject: "ramen".to_string(),
exact_match: true,
subject_type: YelpSubjectType::Service,
rest: &["abc"],
}),
),
(
"the shop",
Some(FindSubjectData {
subject: "the shop".to_string(),
exact_match: true,
subject_type: YelpSubjectType::Business,
rest: &[],
}),
),
];
for (query, expected) in find_subject_tests {
assert_eq!(
dao.find_subject(&query.split_whitespace().collect::<Vec<_>>())?,
*expected
);
}
type FindLocationSignTestCase<'a> = (&'a str, Option<(String, &'a [&'a str])>);
let find_location_sign_tests: &[FindLocationSignTestCase] = &[
// Query, Expected result.
("", None),
("n", Some(("near".to_string(), &[]))),
("ne", Some(("near".to_string(), &[]))),
("nea", Some(("near".to_string(), &[]))),
("near", Some(("near".to_string(), &[]))),
("near ", Some(("near".to_string(), &[]))),
("near b", Some(("near by".to_string(), &[]))),
("near by", Some(("near by".to_string(), &[]))),
("near by a", Some(("near by".to_string(), &["a"]))),
// Prefix match is available only for last words.
("nea r", None),
];
for (query, expected) in find_location_sign_tests {
assert_eq!(
dao.find_modifier(
&query.split_whitespace().collect::<Vec<_>>(),
Modifier::LocationSign,
FindFrom::First
)?,
*expected
);
}
Ok(())
})?;
Ok(())
}
}