quickwit-oss · PSeitz · Sep 4, 2024 · Sep 4, 2024
diff --git a/src/compat_tests.rs b/src/compat_tests.rs
@@ -0,0 +1,80 @@
+use std::path::PathBuf;
+
+use schema::*;
+
+use crate::*;
+
+fn create_index(path: &str) {
+    let mut schema_builder = Schema::builder();
+    let label = schema_builder.add_text_field("label", TEXT | STORED);
+    let date = schema_builder.add_date_field("date", INDEXED | STORED);
+    let schema = schema_builder.build();
+    std::fs::create_dir_all(path).unwrap();
+    let index = Index::create_in_dir(path, schema).unwrap();
+    let mut index_writer = index.writer_with_num_threads(1, 20_000_000).unwrap();
+    index_writer
+        .add_document(doc!(label => "dateformat", date => DateTime::from_timestamp_nanos(123456)))
+        .unwrap();
+    index_writer.commit().unwrap();
+}
+
+#[test]
+/// Writes an Index for the current INDEX_FORMAT_VERSION to disk.
+fn create_format() {
+    let version = INDEX_FORMAT_VERSION.to_string();
+    let file_path = path_for_version(&version);
+    if PathBuf::from(file_path.clone()).exists() {
+        return;
+    }
+    create_index(&file_path);
+}
+
+fn path_for_version(version: &str) -> String {
+    format!("./tests/compat_tests_data/index_v{}/", version)
+}
+
+/// feature flag quickwit uses a different dictionary type
+#[test]
+#[cfg(not(feature = "quickwit"))]
+fn test_format_6() {
+    let path = path_for_version("6");
+
+    let index = Index::open_in_dir(path).expect("Failed to open index");
+    // dates are truncated to Microseconds in v6
+    assert_date_time_precision(&index, DateTimePrecision::Microseconds);
+}
+
+#[cfg(not(feature = "quickwit"))]
+fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) {
+    use collector::TopDocs;
+    let reader = index.reader().expect("Failed to create reader");
+    let searcher = reader.searcher();
+
+    let schema = index.schema();
+    let label_field = schema.get_field("label").expect("Field 'label' not found");
+    let query_parser = query::QueryParser::for_index(index, vec![label_field]);
+
+    let query = query_parser
+        .parse_query("dateformat")
+        .expect("Failed to parse query");
+    let top_docs = searcher
+        .search(&query, &TopDocs::with_limit(1))
+        .expect("Search failed");
+
+    assert_eq!(top_docs.len(), 1, "Expected 1 search result");
+
+    let doc_address = top_docs[0].1;
+    let retrieved_doc: TantivyDocument = searcher
+        .doc(doc_address)
+        .expect("Failed to retrieve document");
+
+    let date_field = schema.get_field("date").expect("Field 'date' not found");
+    let date_value = retrieved_doc
+        .get_first(date_field)
+        .expect("Date field not found in document")
+        .as_datetime()
+        .unwrap();
+
+    let expected = DateTime::from_timestamp_nanos(123456).truncate(precision);
+    assert_eq!(date_value, expected,);
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -125,8 +125,8 @@
 //!
 //! - **Searching**: [Searcher] searches the segments with anything that implements
 //!   [Query](query::Query) and merges the results. The list of [supported
 //! queries](query::Query#implementors). Custom Queries are supported by implementing the
 //! [Query](query::Query) trait.
 //!
 //! - **[Directory](directory)**: Abstraction over the storage where the index data is stored.
 //!
@@ -202,12 +202,15 @@
 pub mod store;
 pub mod termdict;
 
+mod docset;
 mod reader;
 
+#[cfg(test)]
+mod compat_tests;
+
 pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer};
 pub mod snippet;
 
-mod docset;
 use std::fmt;
 
 pub use census::{Inventory, TrackedObject};
@@ -229,9 +232,9 @@
 pub use crate::schema::{Document, TantivyDocument, Term};
 
 /// Index format version.
-const INDEX_FORMAT_VERSION: u32 = 6;
+pub const INDEX_FORMAT_VERSION: u32 = 6;
 /// Oldest index format version this tantivy version can read.
-const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
+pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
 
 /// Structure version for the index.
 #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]

diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs
@@ -471,7 +471,7 @@ fn bound_to_value_range<T: MonotonicallyMappableToU64>(
 }
 
 #[cfg(test)]
-pub mod tests {
+mod tests {
     use std::ops::{Bound, RangeInclusive};
 
     use common::bounds::BoundsRange;

diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs
@@ -47,13 +47,26 @@ use self::termdict::{
 pub use self::termdict::{TermMerger, TermStreamer};
 use crate::postings::TermInfo;
 
+#[derive(Debug, Eq, PartialEq)]
 #[repr(u32)]
 #[allow(dead_code)]
 enum DictionaryType {
     Fst = 1,
     SSTable = 2,
 }
 
+impl TryFrom<u32> for DictionaryType {
+    type Error = &'static str;
+
+    fn try_from(value: u32) -> Result<Self, Self::Error> {
+        match value {
+            1 => Ok(DictionaryType::Fst),
+            2 => Ok(DictionaryType::SSTable),
+            _ => Err("Invalid value for DictionaryType"),
+        }
+    }
+}
+
 #[cfg(not(feature = "quickwit"))]
 const CURRENT_TYPE: DictionaryType = DictionaryType::Fst;
 
@@ -70,13 +83,19 @@ impl TermDictionary {
         let (main_slice, dict_type) = file.split_from_end(4);
         let mut dict_type = dict_type.read_bytes()?;
         let dict_type = u32::deserialize(&mut dict_type)?;
+        let dict_type = DictionaryType::try_from(dict_type).map_err(|_| {
+            io::Error::new(
+                io::ErrorKind::Other,
+                format!("Unsuported dictionary type, found {dict_type}"),
+            )
+        })?;
 
-        if dict_type != CURRENT_TYPE as u32 {
+        if dict_type != CURRENT_TYPE {
             return Err(io::Error::new(
                 io::ErrorKind::Other,
                 format!(
-                    "Unsuported dictionary type, expected {}, found {dict_type}",
-                    CURRENT_TYPE as u32,
+                    "Unsuported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \
+                     {dict_type:?}",
                 ),
             ));
         }

diff --git a/tests/compat_tests_data/index_v6/.managed.json b/tests/compat_tests_data/index_v6/.managed.json
@@ -0,0 +1 @@
+["00000000000000000000000000000000.store","00000000000000000000000000000000.fast","00000000000000000000000000000000.fieldnorm","00000000000000000000000000000000.term","00000000000000000000000000000000.idx","meta.json","00000000000000000000000000000000.pos"]
diff --git a/tests/compat_tests_data/index_v6/.tantivy-meta.lock b/tests/compat_tests_data/index_v6/.tantivy-meta.lock
diff --git a/tests/compat_tests_data/index_v6/.tantivy-writer.lock b/tests/compat_tests_data/index_v6/.tantivy-writer.lock
diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fast
diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.fieldnorm
diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.idx
diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.pos b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.pos
diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.store
diff --git a/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term b/tests/compat_tests_data/index_v6/00000000000000000000000000000000.term
diff --git a/tests/compat_tests_data/index_v6/meta.json b/tests/compat_tests_data/index_v6/meta.json
@@ -0,0 +1,40 @@
+{
+  "index_settings": {
+    "docstore_compression": "lz4",
+    "docstore_blocksize": 16384
+  },
+  "segments": [
+    {
+      "segment_id": "00000000-0000-0000-0000-000000000000",
+      "max_doc": 1,
+      "deletes": null
+    }
+  ],
+  "schema": [
+    {
+      "name": "label",
+      "type": "text",
+      "options": {
+        "indexing": {
+          "record": "position",
+          "fieldnorms": true,
+          "tokenizer": "default"
+        },
+        "stored": true,
+        "fast": false
+      }
+    },
+    {
+      "name": "date",
+      "type": "date",
+      "options": {
+        "indexed": true,
+        "fieldnorms": true,
+        "fast": false,
+        "stored": true,
+        "precision": "seconds"
+      }
+    }
+  ],
+  "opstamp": 2
+}