Revision control

Copy as Markdown

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
use crate::{db::Sqlite3Extension, geoname::geonames_collate};
use rusqlite::{Connection, Transaction};
use sql_support::{
open_database::{self, ConnectionInitializer},
ConnExt,
};
/// The current database schema version.
///
/// For any changes to the schema [`SQL`], please make sure to:
///
/// 1. Bump this version.
/// 2. Add a migration from the old version to the new version in
/// [`SuggestConnectionInitializer::upgrade_from`].
/// a. If suggestions should be re-ingested after the migration, call `clear_database()` inside
/// the migration.
/// 3. If the migration adds any tables, delete their their rows in
/// `clear_database()` by adding their names to `conditional_tables`, unless
/// they are cleared via a deletion trigger or there's some other good
/// reason not to do so.
pub const VERSION: u32 = 41;
/// The current Suggest database schema.
pub const SQL: &str = "
CREATE TABLE meta(
key TEXT PRIMARY KEY,
value NOT NULL
) WITHOUT ROWID;
CREATE TABLE ingested_records(
id TEXT,
collection TEXT,
type TEXT NOT NULL,
last_modified INTEGER NOT NULL,
PRIMARY KEY (id, collection)
) WITHOUT ROWID;
CREATE TABLE keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL,
full_keyword_id INTEGER NULL,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
-- Keywords metrics per record ID and type. Currently we only record metrics for
-- a small number of record types.
CREATE TABLE keywords_metrics(
record_id TEXT NOT NULL PRIMARY KEY,
record_type TEXT NOT NULL,
max_len INTEGER NOT NULL,
max_word_count INTEGER NOT NULL
) WITHOUT ROWID;
CREATE INDEX keywords_metrics_record_type ON keywords_metrics(record_type);
-- full keywords are what we display to the user when a (partial) keyword matches
CREATE TABLE full_keywords(
id INTEGER PRIMARY KEY,
suggestion_id INTEGER NOT NULL,
full_keyword TEXT NOT NULL
);
CREATE TABLE prefix_keywords(
keyword_prefix TEXT NOT NULL,
keyword_suffix TEXT NOT NULL DEFAULT '',
confidence INTEGER NOT NULL DEFAULT 0,
rank INTEGER NOT NULL,
suggestion_id INTEGER NOT NULL,
PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
) WITHOUT ROWID;
CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
CREATE TABLE suggestions(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
provider INTEGER NOT NULL,
title TEXT NOT NULL,
url TEXT NOT NULL,
score REAL NOT NULL
);
CREATE TABLE amp_custom_details(
suggestion_id INTEGER PRIMARY KEY,
advertiser TEXT NOT NULL,
block_id INTEGER NOT NULL,
iab_category TEXT NOT NULL,
impression_url TEXT NOT NULL,
click_url TEXT NOT NULL,
icon_id TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE TABLE wikipedia_custom_details(
suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
icon_id TEXT NOT NULL
);
CREATE TABLE amo_custom_details(
suggestion_id INTEGER PRIMARY KEY,
description TEXT NOT NULL,
guid TEXT NOT NULL,
icon_url TEXT NOT NULL,
rating TEXT,
number_of_ratings INTEGER NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE TABLE fakespot_custom_details(
suggestion_id INTEGER PRIMARY KEY,
fakespot_grade TEXT NOT NULL,
product_id TEXT NOT NULL,
keywords TEXT NOT NULL,
product_type TEXT NOT NULL,
rating REAL NOT NULL,
total_reviews INTEGER NOT NULL,
icon_id TEXT,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5(
title,
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
INSERT INTO fakespot_fts(rowid, title)
SELECT id, title
FROM suggestions
WHERE id = new.suggestion_id;
END;
CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
full_keywords,
title,
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
-- DELETE/UPDATE triggers are difficult to implement, since the FTS contents are split between the fakespot_custom_details and suggestions tables.
-- If you use an AFTER trigger, then the data from the other table has already been deleted.
-- BEFORE triggers are discouraged by the SQLite docs.
-- Instead, the drop_suggestions function handles updating the FTS data.
CREATE INDEX suggestions_record_id ON suggestions(record_id);
CREATE TABLE icons(
id TEXT PRIMARY KEY,
data BLOB NOT NULL,
mimetype TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_subjects(
keyword TEXT PRIMARY KEY,
subject_type INTEGER NOT NULL DEFAULT 0,
record_id TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_modifiers(
type INTEGER NOT NULL,
keyword TEXT NOT NULL,
record_id TEXT NOT NULL,
PRIMARY KEY (type, keyword)
) WITHOUT ROWID;
CREATE TABLE yelp_custom_details(
icon_id TEXT PRIMARY KEY,
score REAL NOT NULL,
record_id TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE mdn_custom_details(
suggestion_id INTEGER PRIMARY KEY,
description TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE TABLE dynamic_custom_details(
suggestion_id INTEGER PRIMARY KEY,
suggestion_type TEXT NOT NULL,
json_data TEXT,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE INDEX dynamic_custom_details_suggestion_type ON dynamic_custom_details(suggestion_type);
CREATE TABLE geonames(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
name TEXT NOT NULL,
feature_class TEXT NOT NULL,
feature_code TEXT NOT NULL,
country_code TEXT NOT NULL,
admin1_code TEXT,
admin2_code TEXT,
admin3_code TEXT,
admin4_code TEXT,
population INTEGER,
latitude TEXT,
longitude TEXT
);
-- `language` is a lowercase ISO 639 code: 'en', 'de', 'fr', etc. It can also be
-- a geonames pseudo-language like 'abbr' for abbreviations and 'iata' for
-- airport codes. It will be null for names derived from a geoname's primary
-- name (see `Geoname::name` and `Geoname::ascii_name`).
-- `geoname_id` is not defined as a foreign key because the main geonames
-- records are not guaranteed to be ingested before alternates records.
CREATE TABLE geonames_alternates(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
language TEXT,
name TEXT NOT NULL COLLATE geonames_collate,
is_preferred INTEGER,
is_short INTEGER
);
CREATE INDEX geonames_alternates_geoname_id_language
ON geonames_alternates(geoname_id, language);
CREATE INDEX geonames_alternates_name
ON geonames_alternates(name);
CREATE TRIGGER geonames_alternates_delete AFTER DELETE ON geonames BEGIN
DELETE FROM geonames_alternates
WHERE geoname_id = old.id;
END;
CREATE TABLE geonames_metrics(
record_id TEXT NOT NULL PRIMARY KEY,
max_name_length INTEGER NOT NULL,
max_name_word_count INTEGER NOT NULL
) WITHOUT ROWID;
-- `url` may be an opaque dismissal key rather than a URL depending on the
-- suggestion type.
CREATE TABLE dismissed_suggestions (
url TEXT PRIMARY KEY
) WITHOUT ROWID;
";
/// Initializes an SQLite connection to the Suggest database, performing
/// migrations as needed.
#[derive(Default)]
pub struct SuggestConnectionInitializer<'a> {
extensions_to_load: &'a [Sqlite3Extension],
}
impl<'a> SuggestConnectionInitializer<'a> {
pub fn new(extensions_to_load: &'a [Sqlite3Extension]) -> Self {
Self { extensions_to_load }
}
pub fn load_extensions(&self, conn: &Connection) -> open_database::Result<()> {
// Safety: this relies on the extensions we're loading to operate correctly, for the
// entry point to be correct, etc.
unsafe {
let _guard = rusqlite::LoadExtensionGuard::new(conn)?;
for ext in self.extensions_to_load {
conn.load_extension(&ext.library, ext.entry_point.as_deref())?;
}
}
Ok(())
}
fn create_custom_functions(&self, conn: &Connection) -> open_database::Result<()> {
conn.create_collation("geonames_collate", geonames_collate)?;
Ok(())
}
}
impl ConnectionInitializer for SuggestConnectionInitializer<'_> {
const NAME: &'static str = "suggest db";
const END_VERSION: u32 = VERSION;
fn prepare(&self, conn: &Connection, _db_empty: bool) -> open_database::Result<()> {
self.load_extensions(conn)?;
sql_support::setup_sqlite_defaults(conn)?;
conn.execute("PRAGMA foreign_keys = ON", ())?;
sql_support::debug_tools::define_debug_functions(conn)?;
self.create_custom_functions(conn)?;
Ok(())
}
fn init(&self, db: &Transaction<'_>) -> open_database::Result<()> {
db.execute_batch(SQL)?;
Ok(())
}
fn upgrade_from(&self, tx: &Transaction<'_>, version: u32) -> open_database::Result<()> {
// Custom functions are per connection. `prepare` usually handles
// creating them but on upgrade it's not called before this method is.
self.create_custom_functions(tx)?;
match version {
1..=15 => {
// Treat databases with these older schema versions as corrupt,
// so that they'll be replaced by a fresh, empty database with
// the current schema.
Err(open_database::Error::Corrupt)
}
16 => {
tx.execute(
"
CREATE TABLE dismissed_suggestions (
url_hash INTEGER PRIMARY KEY
) WITHOUT ROWID;",
(),
)?;
Ok(())
}
17 => {
tx.execute(
"
DROP TABLE dismissed_suggestions;
CREATE TABLE dismissed_suggestions (
url TEXT PRIMARY KEY
) WITHOUT ROWID;",
(),
)?;
Ok(())
}
18 => {
tx.execute_batch(
"
CREATE TABLE IF NOT EXISTS dismissed_suggestions (
url TEXT PRIMARY KEY
) WITHOUT ROWID;",
)?;
Ok(())
}
19 => {
// Clear the database since we're going to be dropping the keywords table and
// re-creating it
clear_database(tx)?;
tx.execute_batch(
"
-- Recreate the various keywords table to drop the foreign keys.
DROP TABLE keywords;
DROP TABLE full_keywords;
DROP TABLE prefix_keywords;
CREATE TABLE keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL,
full_keyword_id INTEGER NULL,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
CREATE TABLE full_keywords(
id INTEGER PRIMARY KEY,
suggestion_id INTEGER NOT NULL,
full_keyword TEXT NOT NULL
);
CREATE TABLE prefix_keywords(
keyword_prefix TEXT NOT NULL,
keyword_suffix TEXT NOT NULL DEFAULT '',
confidence INTEGER NOT NULL DEFAULT 0,
rank INTEGER NOT NULL,
suggestion_id INTEGER NOT NULL,
PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
) WITHOUT ROWID;
CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
",
)?;
Ok(())
}
// Migration for the fakespot data. This is not currently active for any users, it's
// only used for the tests. It's safe to alter the fakespot_custom_detail schema and
// update this migration as the project moves forward.
//
// Note: if we want to add a regular migration while the fakespot code is still behind
// a feature flag, insert it before this one and make fakespot the last migration.
20 => {
tx.execute_batch(
"
CREATE TABLE fakespot_custom_details(
suggestion_id INTEGER PRIMARY KEY,
fakespot_grade TEXT NOT NULL,
product_id TEXT NOT NULL,
rating REAL NOT NULL,
total_reviews INTEGER NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
-- Create the Fakespot FTS table.
-- The `tokenize` param is hard to read. The effect is that dashes and apostrophes are
-- considered valid tokens in a word, rather than separators.
CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5(
title,
prefix='4 5 6 7 8 9 10 11',
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
INSERT INTO fakespot_fts(rowid, title)
SELECT id, title
FROM suggestions
WHERE id = new.suggestion_id;
END;
",
)?;
Ok(())
}
21 => {
// Drop and re-create the fakespot_custom_details to add the icon_id column.
tx.execute_batch(
"
DROP TABLE fakespot_custom_details;
CREATE TABLE fakespot_custom_details(
suggestion_id INTEGER PRIMARY KEY,
fakespot_grade TEXT NOT NULL,
product_id TEXT NOT NULL,
rating REAL NOT NULL,
total_reviews INTEGER NOT NULL,
icon_id TEXT,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
INSERT INTO fakespot_fts(rowid, title)
SELECT id, title
FROM suggestions
WHERE id = new.suggestion_id;
END;
",
)?;
Ok(())
}
22 => {
// Drop and re-create the fakespot_fts table to remove the prefix index param
tx.execute_batch(
"
DROP TABLE fakespot_fts;
CREATE VIRTUAL TABLE fakespot_fts USING FTS5(
title,
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
",
)?;
Ok(())
}
23 => {
// Drop all suggestions, then recreate the fakespot_custom_details table to add the
// `keywords` and `product_type` fields.
clear_database(tx)?;
tx.execute_batch(
"
DROP TABLE fakespot_custom_details;
CREATE TABLE fakespot_custom_details(
suggestion_id INTEGER PRIMARY KEY,
fakespot_grade TEXT NOT NULL,
product_id TEXT NOT NULL,
keywords TEXT NOT NULL,
product_type TEXT NOT NULL,
rating REAL NOT NULL,
total_reviews INTEGER NOT NULL,
icon_id TEXT,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
INSERT INTO fakespot_fts(rowid, title)
SELECT id, title
FROM suggestions
WHERE id = new.suggestion_id;
END;
",
)?;
Ok(())
}
24 => {
// Clear the database so that we re-ingest and populate the ingested_records table.
clear_database(tx)?;
tx.execute_batch(
"
CREATE TABLE rs_cache(
collection TEXT PRIMARY KEY,
data TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE ingested_records(
id TEXT,
collection TEXT,
type TEXT NOT NULL,
last_modified INTEGER NOT NULL,
PRIMARY KEY (id, collection)
) WITHOUT ROWID;
",
)?;
Ok(())
}
25 => {
// Create the exposure suggestions table and index.
tx.execute_batch(
"
CREATE TABLE exposure_custom_details(
suggestion_id INTEGER PRIMARY KEY,
type TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE INDEX exposure_custom_details_type ON exposure_custom_details(type);
",
)?;
Ok(())
}
26 => {
// Create tables related to city-based weather.
tx.execute_batch(
"
CREATE TABLE keywords_metrics(
record_id TEXT NOT NULL PRIMARY KEY,
provider INTEGER NOT NULL,
max_length INTEGER NOT NULL,
max_word_count INTEGER NOT NULL
) WITHOUT ROWID;
CREATE TABLE geonames(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
name TEXT NOT NULL,
feature_class TEXT NOT NULL,
feature_code TEXT NOT NULL,
country_code TEXT NOT NULL,
admin1_code TEXT NOT NULL,
population INTEGER
);
CREATE INDEX geonames_feature_class ON geonames(feature_class);
CREATE INDEX geonames_feature_code ON geonames(feature_code);
CREATE TABLE geonames_alternates(
name TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
PRIMARY KEY (name, geoname_id),
FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
) WITHOUT ROWID;
CREATE TABLE geonames_metrics(
record_id TEXT NOT NULL PRIMARY KEY,
max_name_length INTEGER NOT NULL,
max_name_word_count INTEGER NOT NULL
) WITHOUT ROWID;
",
)?;
Ok(())
}
27 => {
// Add latitude and longitude to the geonames table. Clear the
// database so geonames are reingested.
clear_database(tx)?;
tx.execute_batch(
"
DROP INDEX geonames_feature_class;
DROP INDEX geonames_feature_code;
DROP TABLE geonames;
CREATE TABLE geonames(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
name TEXT NOT NULL,
latitude REAL NOT NULL,
longitude REAL NOT NULL,
feature_class TEXT NOT NULL,
feature_code TEXT NOT NULL,
country_code TEXT NOT NULL,
admin1_code TEXT NOT NULL,
population INTEGER NOT NULL
);
CREATE INDEX geonames_feature_class ON geonames(feature_class);
CREATE INDEX geonames_feature_code ON geonames(feature_code);
",
)?;
Ok(())
}
28 => {
// Add `iso_language` column to `geonames_alternates`. Clear the
// database so geonames are reingested.
clear_database(tx)?;
tx.execute_batch(
"
DROP TABLE geonames_alternates;
CREATE TABLE geonames_alternates(
name TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
-- The value of the `iso_language` field for the alternate. This will be
-- null for the alternate we artificially create for the `name` in the
-- corresponding geoname record.
iso_language TEXT,
PRIMARY KEY (name, geoname_id),
FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
) WITHOUT ROWID;
",
)?;
Ok(())
}
29 => {
// This migration only clears the database because some tables
// that should have been cleared in previous migrations were
// not.
clear_database(tx)?;
Ok(())
}
30 => {
// Add the `geonames_alternates_geoname_id` index.
clear_database(tx)?;
tx.execute_batch(
"
CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
",
)?;
Ok(())
}
31 => {
// Need to clear the database so that the FTS index will get filled.
clear_database(tx)?;
tx.execute_batch(
"
CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
full_keywords,
title,
content='',
contentless_delete=1,
tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
);
",
)?;
Ok(())
}
32 => {
// Drop rs_cache since it's no longer needed.
tx.execute_batch("DROP TABLE rs_cache;")?;
Ok(())
}
33 => {
// This migration is due to the replacement of the
// `quicksuggest` collection with `quicksuggest-amp` and
// `quicksuggest-other`. Clear the DB so that records from the
// old collection don't stick around. See bug 1953945.
clear_database(tx)?;
Ok(())
}
34 => {
// Replace the exposure suggestions table and index with the
// dynamic suggestions table and index.
tx.execute_batch(
r#"
DROP INDEX exposure_custom_details_type;
DROP TABLE exposure_custom_details;
CREATE TABLE dynamic_custom_details(
suggestion_id INTEGER PRIMARY KEY,
suggestion_type TEXT NOT NULL,
json_data TEXT,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE INDEX dynamic_custom_details_suggestion_type ON dynamic_custom_details(suggestion_type);
"#,
)?;
Ok(())
}
35 => {
// The commit that added this migration was reverted.
Ok(())
}
36 => {
tx.execute_batch("DROP TABLE IF EXISTS yelp_location_signs;")?;
Ok(())
}
37 => {
clear_database(tx)?;
tx.execute_batch(
"
DROP TABLE yelp_subjects;
CREATE TABLE yelp_subjects(
keyword TEXT PRIMARY KEY,
subject_type INTEGER NOT NULL DEFAULT 0,
record_id TEXT NOT NULL
) WITHOUT ROWID;
",
)?;
Ok(())
}
38 => {
// This migration makes changes to geonames.
tx.execute_batch(
r#"
DROP INDEX geonames_alternates_geoname_id;
DROP TABLE geonames_alternates;
DROP INDEX geonames_feature_class;
DROP INDEX geonames_feature_code;
DROP TABLE geonames;
CREATE TABLE geonames(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
name TEXT NOT NULL,
feature_class TEXT NOT NULL,
feature_code TEXT NOT NULL,
country_code TEXT NOT NULL,
admin1_code TEXT,
admin2_code TEXT,
admin3_code TEXT,
admin4_code TEXT,
population INTEGER,
latitude TEXT,
longitude TEXT
);
CREATE TABLE geonames_alternates(
record_id TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
language TEXT,
name TEXT NOT NULL COLLATE geonames_collate,
PRIMARY KEY(geoname_id, language, name)
);
CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
CREATE INDEX geonames_alternates_name ON geonames_alternates(name);
CREATE TRIGGER geonames_alternates_delete AFTER DELETE ON geonames BEGIN
DELETE FROM geonames_alternates
WHERE geoname_id = old.id;
END;
"#,
)?;
Ok(())
}
39 => {
// This migration makes changes to keywords metrics.
clear_database(tx)?;
tx.execute_batch(
r#"
DROP TABLE keywords_metrics;
CREATE TABLE keywords_metrics(
record_id TEXT NOT NULL PRIMARY KEY,
record_type TEXT NOT NULL,
max_len INTEGER NOT NULL,
max_word_count INTEGER NOT NULL
) WITHOUT ROWID;
CREATE INDEX keywords_metrics_record_type ON keywords_metrics(record_type);
"#,
)?;
Ok(())
}
40 => {
// This migration makes changes to geonames.
clear_database(tx)?;
tx.execute_batch(
r#"
DROP INDEX geonames_alternates_geoname_id;
DROP INDEX geonames_alternates_name;
DROP TABLE geonames_alternates;
CREATE TABLE geonames_alternates(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
geoname_id INTEGER NOT NULL,
language TEXT,
name TEXT NOT NULL COLLATE geonames_collate,
is_preferred INTEGER,
is_short INTEGER
);
CREATE INDEX geonames_alternates_geoname_id_language
ON geonames_alternates(geoname_id, language);
CREATE INDEX geonames_alternates_name
ON geonames_alternates(name);
"#,
)?;
Ok(())
}
_ => Err(open_database::Error::IncompatibleVersion(version)),
}
}
}
/// Clears the database, removing all suggestions, icons, and metadata.
pub fn clear_database(db: &Connection) -> rusqlite::Result<()> {
db.execute_batch(
"
DELETE FROM meta;
DELETE FROM keywords;
DELETE FROM full_keywords;
DELETE FROM prefix_keywords;
DELETE FROM suggestions;
DELETE FROM icons;
DELETE FROM yelp_subjects;
DELETE FROM yelp_modifiers;
DELETE FROM yelp_custom_details;
",
)?;
let conditional_tables = [
"fakespot_fts",
"geonames",
"geonames_metrics",
"ingested_records",
"keywords_metrics",
];
for t in conditional_tables {
let table_exists = db.exists("SELECT 1 FROM sqlite_master WHERE name = ?", [t])?;
if table_exists {
db.execute(&format!("DELETE FROM {t}"), ())?;
}
}
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use sql_support::open_database::test_utils::MigratedDatabaseFile;
// Snapshot of the v16 schema. We use this to test that we can migrate from there to the
// current schema.
const V16_SCHEMA: &str = r#"
CREATE TABLE meta(
key TEXT PRIMARY KEY,
value NOT NULL
) WITHOUT ROWID;
CREATE TABLE keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
-- full keywords are what we display to the user when a (partial) keyword matches
-- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
CREATE TABLE full_keywords(
id INTEGER PRIMARY KEY,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
full_keyword TEXT NOT NULL
);
CREATE TABLE prefix_keywords(
keyword_prefix TEXT NOT NULL,
keyword_suffix TEXT NOT NULL DEFAULT '',
confidence INTEGER NOT NULL DEFAULT 0,
rank INTEGER NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
) WITHOUT ROWID;
CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
CREATE TABLE suggestions(
id INTEGER PRIMARY KEY,
record_id TEXT NOT NULL,
provider INTEGER NOT NULL,
title TEXT NOT NULL,
url TEXT NOT NULL,
score REAL NOT NULL
);
CREATE TABLE amp_custom_details(
suggestion_id INTEGER PRIMARY KEY,
advertiser TEXT NOT NULL,
block_id INTEGER NOT NULL,
iab_category TEXT NOT NULL,
impression_url TEXT NOT NULL,
click_url TEXT NOT NULL,
icon_id TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE TABLE wikipedia_custom_details(
suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
icon_id TEXT NOT NULL
);
CREATE TABLE amo_custom_details(
suggestion_id INTEGER PRIMARY KEY,
description TEXT NOT NULL,
guid TEXT NOT NULL,
icon_url TEXT NOT NULL,
rating TEXT,
number_of_ratings INTEGER NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
CREATE INDEX suggestions_record_id ON suggestions(record_id);
CREATE TABLE icons(
id TEXT PRIMARY KEY,
data BLOB NOT NULL,
mimetype TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_subjects(
keyword TEXT PRIMARY KEY,
record_id TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_modifiers(
type INTEGER NOT NULL,
keyword TEXT NOT NULL,
record_id TEXT NOT NULL,
PRIMARY KEY (type, keyword)
) WITHOUT ROWID;
CREATE TABLE yelp_location_signs(
keyword TEXT PRIMARY KEY,
need_location INTEGER NOT NULL,
record_id TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_custom_details(
icon_id TEXT PRIMARY KEY,
score REAL NOT NULL,
record_id TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE mdn_custom_details(
suggestion_id INTEGER PRIMARY KEY,
description TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
PRAGMA user_version=16;
"#;
/// Test running all schema upgrades from V16, which was the first schema with a "real"
/// migration.
///
/// If an upgrade fails, then this test will fail with a panic.
#[test]
fn test_all_upgrades() {
let db_file =
MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
db_file.run_all_upgrades();
db_file.assert_schema_matches_new_database();
}
/// Test that `clear_database()` works correctly during migrations.
///
/// TODO: This only checks `ingested_records` and `rs_cache` for now since
/// they're very important, but ideally this would test all tables.
#[test]
fn test_clear_database() -> anyhow::Result<()> {
// Start with the v16 schema.
let db_file =
MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
// Upgrade to v25, the first version with with `ingested_records` tables.
db_file.upgrade_to(25);
// Insert some ingested records.
let conn = db_file.open();
conn.execute(
"INSERT INTO ingested_records(id, collection, type, last_modified) VALUES(?, ?, ?, ?)",
("record-id", "quicksuggest", "record-type", 1),
)?;
conn.close().expect("Connection should be closed");
// Finish upgrading to the current version.
db_file.upgrade_to(VERSION);
db_file.assert_schema_matches_new_database();
// `ingested_records` should be empty.
let conn = db_file.open();
assert_eq!(
conn.query_one::<i32>("SELECT count(*) FROM ingested_records")?,
0,
"ingested_records should be empty"
);
conn.close().expect("Connection should be closed");
Ok(())
}
/// Test that yelp_location_signs table could be removed correctly.
#[test]
fn test_remove_yelp_location_signs_table() -> anyhow::Result<()> {
// Start with the v16 schema.
let db_file =
MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
// Upgrade to v36.
db_file.upgrade_to(36);
// Drop the table to simulate old 35 > 36 migration.
let conn = db_file.open();
conn.execute("DROP table yelp_location_signs", ())?;
conn.close().expect("Connection should be closed");
// Finish upgrading to the current version.
db_file.upgrade_to(VERSION);
db_file.assert_schema_matches_new_database();
Ok(())
}
}