diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 7d9ac914da6..3e9f2194b69 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9234,7 +9234,6 @@ dependencies = [ "proptest", "quickwit-common", "quickwit-datetime", - "quickwit-proto", "regex", "rustc-hash", "serde", diff --git a/quickwit/quickwit-cli/src/index.rs b/quickwit/quickwit-cli/src/index.rs index 9d696065ced..9b66007416c 100644 --- a/quickwit/quickwit-cli/src/index.rs +++ b/quickwit/quickwit-cli/src/index.rs @@ -1186,7 +1186,7 @@ mod test { time_range: RangeInclusive, size: u64, ) -> SplitMetadata { - let mut split_metadata = SplitMetadata::for_test(split_id.to_string()); + let mut split_metadata = SplitMetadata::for_test(split_id.into()); split_metadata.num_docs = num_docs; split_metadata.time_range = Some(time_range); split_metadata.footer_offsets = (size - 10)..size; diff --git a/quickwit/quickwit-cli/src/split.rs b/quickwit/quickwit-cli/src/split.rs index ebdebbb7091..f3d13d6c6ab 100644 --- a/quickwit/quickwit-cli/src/split.rs +++ b/quickwit/quickwit-cli/src/split.rs @@ -261,9 +261,10 @@ impl SplitCliCommand { let index_id = matches .remove_one::("index") .expect("`index` should be a required arg."); - let split_id = matches + let split_id: SplitId = matches .remove_one::("split") - .expect("`split` should be a required arg."); + .expect("`split` should be a required arg.") + .into(); let client_args = ClientArgs::parse(&mut matches)?; let verbose = matches.get_flag("verbose"); @@ -345,7 +346,7 @@ async fn describe_split_cli(args: DescribeSplitArgs) -> anyhow::Result<()> { .await .expect("Failed to fetch splits.") .into_iter() - .find(|split| split.split_id() == args.split_id) + .find(|split| split.split_id() == args.split_id.as_str()) .with_context(|| { format!( "could not find split metadata in metastore {}", diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 8c212080ec4..d3eba3eba2a 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -366,9 +366,10 @@ impl ToolCliCommand { let index_id = matches .remove_one::("index") .expect("`index` should be a required arg."); - let split_id = matches + let split_id: SplitId = matches .remove_one::("split") - .expect("`split` should be a required arg."); + .expect("`split` should be a required arg.") + .into(); let config_uri = matches .remove_one::("config") .map(|uri_str| Uri::from_str(&uri_str)) diff --git a/quickwit/quickwit-cli/tests/cli.rs b/quickwit/quickwit-cli/tests/cli.rs index 5f695ec78d9..2fc61548a2f 100644 --- a/quickwit/quickwit-cli/tests/cli.rs +++ b/quickwit/quickwit-cli/tests/cli.rs @@ -865,7 +865,7 @@ async fn test_garbage_collect_index_cli() { index_uid: Some(index_uid.clone()), split_ids: splits_metadata .into_iter() - .map(|split_metadata| split_metadata.split_id) + .map(|split_metadata| split_metadata.split_id.to_string()) .collect(), }) .await diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index 8dee1f6335a..27b4a0cffb6 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -17,7 +17,6 @@ use std::convert::Infallible; use std::ops::Bound; use std::sync::Arc; -use quickwit_proto::types::SplitId; use quickwit_query::query_ast::{ BuildTantivyAstContext, FieldPresenceQuery, FullTextQuery, PhrasePrefixQuery, QueryAst, QueryAstTransformer, QueryAstVisitor, RangeQuery, RegexQuery, TermSetQuery, WildcardQuery, @@ -158,7 +157,7 @@ impl<'a, 'f> QueryAstVisitor<'a> for ExistsQueryFastFields<'f> { pub(crate) fn build_query( query_ast: QueryAst, context: &BuildTantivyAstContext, - cache_context: Option<(Arc, SplitId)>, + cache_context: Option<(Arc, String)>, ) -> Result<(Box, WarmupInfo), QueryParserError> { let mut fast_fields: HashSet = HashSet::new(); diff --git a/quickwit/quickwit-index-management/src/garbage_collection.rs b/quickwit/quickwit-index-management/src/garbage_collection.rs index 5d85e4c70bb..8739220f866 100644 --- a/quickwit/quickwit-index-management/src/garbage_collection.rs +++ b/quickwit/quickwit-index-management/src/garbage_collection.rs @@ -474,7 +474,7 @@ pub async fn delete_splits_from_storage_and_metastore( let splits_to_delete: Vec = split_infos .values() .map(|info| SplitToDelete { - split_id: info.split_id.clone(), + split_id: info.split_id.to_string(), path: info.file_name.clone(), size_bytes: info.file_size_bytes.as_u64(), }) @@ -508,7 +508,7 @@ pub async fn delete_splits_from_storage_and_metastore( } if !successes.is_empty() { - let split_ids: Vec = successes + let split_ids: Vec = successes .iter() .map(|split_info| split_info.split_id.to_string()) .collect(); @@ -597,7 +597,7 @@ mod tests { let split_id = "test-run-gc--split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -697,7 +697,7 @@ mod tests { let split_id = "test-run-gc--split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -827,7 +827,7 @@ mod tests { let split_id = "test-delete-splits-happy--split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: IndexUid::new_with_random_ulid(index_id), ..Default::default() }; @@ -932,13 +932,13 @@ mod tests { let split_id_0 = "test-delete-splits-storage-error--split-0"; let split_metadata_0 = SplitMetadata { - split_id: split_id_0.to_string(), + split_id: split_id_0.into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_1 = "test-delete-splits-storage-error--split-1"; let split_metadata_1 = SplitMetadata { - split_id: split_id_1.to_string(), + split_id: split_id_1.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -1020,13 +1020,13 @@ mod tests { let split_id_0 = "test-delete-splits-storage-error--split-0"; let split_metadata_0 = SplitMetadata { - split_id: split_id_0.to_string(), + split_id: split_id_0.into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_1 = "test-delete-splits-storage-error--split-1"; let split_metadata_1 = SplitMetadata { - split_id: split_id_1.to_string(), + split_id: split_id_1.into(), index_uid: index_uid.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-index-management/src/index.rs b/quickwit/quickwit-index-management/src/index.rs index 1787ff1e276..af2d3bc34d5 100644 --- a/quickwit/quickwit-index-management/src/index.rs +++ b/quickwit/quickwit-index-management/src/index.rs @@ -230,7 +230,7 @@ impl IndexService { let query = ListSplitsQuery::for_index(index_uid.clone()) .with_split_states([SplitState::Staged, SplitState::Published]); let list_splits_request = ListSplitsRequest::try_from_list_splits_query(&query)?; - let split_ids: Vec = self + let split_ids = self .metastore .list_splits(list_splits_request) .await? @@ -477,7 +477,7 @@ impl IndexService { .await?; let split_ids: Vec = splits_metadata .iter() - .map(|split| split.split_id.to_string()) + .map(|split| split.split_id.clone()) .collect(); let mark_splits_for_deletion_request = MarkSplitsForDeletionRequest::new(index_uid.clone(), split_ids.clone()); @@ -771,7 +771,7 @@ mod tests { let split_id = "test-split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index da36d6b7994..52941167016 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -665,7 +665,7 @@ impl Indexer { return Ok(()); } let num_splits = splits.len() as u64; - let split_ids = splits.iter().map(|split| split.split_id()).join(","); + let split_ids: String = splits.iter().map(|split| split.split_id()).join(","); info!( index=%self.indexer_state.pipeline_id.index_uid, source=self.indexer_state.pipeline_id.source_id.as_str(), diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 69b606f93e3..6d0cbe69e19 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -2019,7 +2019,7 @@ mod tests { }) .return_once(|_request| { let splits = vec![Split { - split_metadata: SplitMetadata::for_test("test-split".to_string()), + split_metadata: SplitMetadata::for_test("test-split".into()), split_state: SplitState::Published, update_timestamp: 0, publish_timestamp: Some(0), diff --git a/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs b/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs index 02a4bca1661..f4e6e656f9f 100644 --- a/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs +++ b/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs @@ -78,13 +78,13 @@ impl Handler for Publisher { let index_checkpoint_delta_json_opt = serialize_checkpoint_delta(&checkpoint_delta_opt)?; let split_ids: Vec = new_splits .iter() - .map(|split| split.split_id.clone()) + .map(|split| split.split_id.to_string()) .collect(); if let Some(_guard) = publish_lock.acquire().await { let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid), staged_split_ids: split_ids.clone(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), index_checkpoint_delta_json_opt, publish_token_opt: publish_token_opt.clone(), }; @@ -129,7 +129,7 @@ mod tests { }; use quickwit_metastore::{PublishSplitsRequestExt, SplitMetadata}; use quickwit_proto::metastore::{EmptyResponse, MetastoreServiceClient, MockMetastoreService}; - use quickwit_proto::types::{IndexUid, Position}; + use quickwit_proto::types::{IndexUid, Position, SplitId}; use tracing::Span; use super::PUBLISHER_NAME; @@ -176,7 +176,7 @@ mod tests { .send_message(SplitsUpdate { index_uid: ref_index_uid.clone(), new_splits: vec![SplitMetadata { - split_id: "split".to_string(), + split_id: "split".into(), ..Default::default() }], replaced_split_ids: Vec::new(), @@ -320,10 +320,10 @@ mod tests { .send_message(SplitsUpdate { index_uid: ref_index_uid.clone(), new_splits: vec![SplitMetadata { - split_id: "split3".to_string(), + split_id: "split3".into(), ..Default::default() }], - replaced_split_ids: vec!["split1".to_string(), "split2".to_string()], + replaced_split_ids: vec![SplitId::from("split1"), SplitId::from("split2")], checkpoint_delta_opt: None, publish_lock: PublishLock::default(), publish_token_opt: None, @@ -365,7 +365,7 @@ mod tests { publisher_mailbox .send_message(SplitsUpdate { index_uid: IndexUid::new_with_random_ulid("index"), - new_splits: vec![SplitMetadata::for_test("test-split".to_string())], + new_splits: vec![SplitMetadata::for_test("test-split".into())], replaced_split_ids: Vec::new(), checkpoint_delta_opt: None, publish_lock, diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index d07c6e8046d..d4f06a1d8ee 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -257,7 +257,7 @@ pub fn merge_split_attrs( let num_docs = sum_num_docs(splits); let replaced_split_ids: Vec = splits .iter() - .map(|split| split.split_id().to_string()) + .map(|split| split.split_id().clone()) .collect(); let delete_opstamp = splits .iter() @@ -434,7 +434,7 @@ impl MergeExecutor { ); let mark_splits_for_deletion_request = MarkSplitsForDeletionRequest::new( split.index_uid.clone(), - vec![split.split_id.clone()], + [split.split_id.clone()], ); self.metastore .mark_splits_for_deletion(mark_splits_for_deletion_request) @@ -594,7 +594,7 @@ mod tests { use super::*; use crate::merge_policy::{MergeOperation, MergeTask}; - use crate::{TestSandbox, get_tantivy_directory_from_split_bundle, new_split_id}; + use crate::{TestSandbox, get_tantivy_directory_from_split_bundle}; #[tokio::test] async fn test_merge_executor() -> anyhow::Result<()> { @@ -755,7 +755,7 @@ mod tests { // We want to test a delete on a split with num_merge_ops > 0. let mut new_split_metadata = splits[0].split_metadata.clone(); - new_split_metadata.split_id = new_split_id(); + new_split_metadata.split_id = SplitId::new(); new_split_metadata.num_merge_ops = 1; let stage_splits_request = StageSplitsRequest::try_from_split_metadata(index_uid.clone(), &new_split_metadata) diff --git a/quickwit/quickwit-indexing/src/actors/merge_planner.rs b/quickwit/quickwit-indexing/src/actors/merge_planner.rs index 651e38fc8b3..0d532efbf63 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_planner.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_planner.rs @@ -20,7 +20,7 @@ use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; use quickwit_metastore::{SplitMaturity, SplitMetadata}; use quickwit_proto::indexing::MergePipelineId; -use quickwit_proto::types::DocMappingUid; +use quickwit_proto::types::{DocMappingUid, SplitId}; use tantivy::Inventory; use time::OffsetDateTime; use tracing::{info, warn}; @@ -70,7 +70,7 @@ pub struct MergePlanner { /// /// We incrementally build this set, by adding new splits to it. /// When it becomes too large, we entirely rebuild it. - known_split_ids: HashSet, + known_split_ids: HashSet, known_split_ids_recompute_attempt_id: usize, merge_policy: Arc, @@ -207,19 +207,19 @@ impl MergePlanner { merge_planner } - fn rebuild_known_split_ids(&self) -> HashSet { - let mut known_split_ids: HashSet = HashSet::default(); + fn rebuild_known_split_ids(&self) -> HashSet { + let mut known_split_ids: HashSet = HashSet::default(); // Add splits that in `partitioned_young_splits`. for young_split_partition in self.partitioned_young_splits.values() { for split in young_split_partition { - known_split_ids.insert(split.split_id().to_string()); + known_split_ids.insert(split.split_id().clone()); } } let ongoing_merge_operations = self.ongoing_merge_operations_inventory.list(); // Add splits that are known as in merge. for merge_op in ongoing_merge_operations { for split in &merge_op.splits { - known_split_ids.insert(split.split_id().to_string()); + known_split_ids.insert(split.split_id().clone()); } } if known_split_ids.len() * 2 >= self.known_split_ids.len() { @@ -235,11 +235,11 @@ impl MergePlanner { /// Updates `known_split_ids` and return true if the split was not /// previously known and should be recorded. - fn acknownledge_split(&mut self, split_id: &str) -> bool { + fn acknownledge_split(&mut self, split_id: &SplitId) -> bool { if self.known_split_ids.contains(split_id) { return false; } - self.known_split_ids.insert(split_id.to_string()); + self.known_split_ids.insert(split_id.clone()); true } @@ -389,7 +389,7 @@ mod tests { num_merge_ops: usize, ) -> SplitMetadata { SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), source_id: "test-source".to_string(), node_id: "test-node".to_string(), diff --git a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs index 5d68bb59285..11613ab84d7 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs @@ -101,7 +101,7 @@ impl MergeSplitDownloader { for split in splits { if ctx.kill_switch().is_dead() { debug!( - split_id = split.split_id(), + split_id = %split.split_id(), "Kill switch was activated. Cancelling download." ); return Err(ActorExitStatus::Killed); @@ -114,7 +114,7 @@ impl MergeSplitDownloader { let _protect_guard = ctx.protect_zone(); let tantivy_dir = self .split_store - .fetch_and_open_split(split.split_id(), download_directory, &io_controls) + .fetch_and_open_split(split.split_id().clone(), download_directory, &io_controls) .await .map_err(|error| { let split_id = split.split_id(); @@ -133,17 +133,17 @@ mod tests { use quickwit_actors::Universe; use quickwit_common::split_file; + use quickwit_proto::types::SplitId; use quickwit_storage::{PutPayload, RamStorageBuilder, SplitPayloadBuilder}; use super::*; use crate::merge_policy::MergeOperation; - use crate::new_split_id; #[tokio::test] async fn test_merge_split_downloader() -> anyhow::Result<()> { let scratch_directory = TempDirectory::for_test(); let splits_to_merge: Vec = iter::repeat_with(|| { - let split_id = new_split_id(); + let split_id = SplitId::new(); SplitMetadata { split_id, ..Default::default() diff --git a/quickwit/quickwit-indexing/src/actors/packager.rs b/quickwit/quickwit-indexing/src/actors/packager.rs index 9f691d285b6..df94e76ff93 100644 --- a/quickwit/quickwit-indexing/src/actors/packager.rs +++ b/quickwit/quickwit-indexing/src/actors/packager.rs @@ -272,12 +272,12 @@ fn create_packaged_split( tag_fields: &[NamedField], ctx: &ActorContext, ) -> anyhow::Result { - debug!(split_id = split.split_id(), "create-packaged-split"); + debug!(split_id = %split.split_id(), "create-packaged-split"); let split_files = list_split_files(segment_metas, &split.split_scratch_directory)?; // Extracts tag values from inverted indexes only when a field cardinality is less // than `MAX_VALUES_PER_TAG_FIELD`. - debug!(split_id = split.split_id(), tag_fields =? tag_fields, "extract-tags-values"); + debug!(split_id = %split.split_id(), tag_fields =? tag_fields, "extract-tags-values"); let index_reader = split .index .reader_builder() @@ -307,7 +307,7 @@ fn create_packaged_split( ctx.record_progress(); - debug!(split_id = split.split_id(), "build-hotcache"); + debug!(split_id = %split.split_id(), "build-hotcache"); let mut hotcache_bytes = Vec::new(); build_hotcache(split.split_scratch_directory.path(), &mut hotcache_bytes)?; ctx.record_progress(); @@ -516,7 +516,7 @@ mod tests { index_uid, source_id, doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split".to_string(), + split_id: "test-split".into(), partition_id: 17u64, num_docs, uncompressed_docs_size_in_bytes: num_docs * 15, diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs index 2d64f54b9b7..126f444e5f1 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs @@ -33,7 +33,7 @@ use quickwit_doc_mapper::{ArrowRowContext, RoutingExpr}; use quickwit_metastore::checkpoint::{IndexCheckpointDelta, SourceCheckpointDelta}; use quickwit_parquet_engine::index::{ParquetBatchAccumulator, ParquetIndexingConfig}; use quickwit_parquet_engine::split::ParquetSplitMetadata; -use quickwit_proto::types::{IndexUid, PublishToken, SourceId}; +use quickwit_proto::types::{IndexUid, PublishToken, SourceId, SplitId}; use serde::Serialize; use tokio::runtime::Handle; use tracing::{debug, info, info_span, warn}; @@ -122,7 +122,7 @@ pub struct ParquetSplitBatch { pub publish_token_opt: Option, /// Split IDs being replaced by this batch (non-empty for merges). /// Empty for the ingest path. - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, /// Holds the temp directory alive until the uploader finishes reading. /// `None` for the ingest path (packager manages its own temp dir). /// `Some` for the merge path (executor's scratch directory). diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs index ffe27b09a0e..e022163d3a4 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs @@ -56,7 +56,7 @@ use quickwit_parquet_engine::merge::{ MergeConfig, MergeOutputFile, execute_merge_operation, merge_sorted_parquet_files, }; use quickwit_parquet_engine::storage::{ParquetWriterConfig, RemoteByteSource}; -use quickwit_proto::types::IndexUid; +use quickwit_proto::types::{IndexUid, SplitId}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt}; use tracing::{info, instrument, warn}; @@ -309,9 +309,9 @@ impl Handler for ParquetMergeExecutor { .context("invalid index_uid in merge input") .map_err(|e| ActorExitStatus::from(anyhow::anyhow!(e)))?; - let replaced_split_ids: Vec = input_splits + let replaced_split_ids: Vec = input_splits .iter() - .map(|s| s.split_id.as_str().to_string()) + .map(|s| SplitId::from(s.split_id.as_str())) .collect(); // Verify pre-merge invariants on the inputs the planner gave us. diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs index b4ac6d197d0..20dd3d1ad2b 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs @@ -19,7 +19,7 @@ use std::fmt; use itertools::Itertools; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_parquet_engine::split::ParquetSplitMetadata; -use quickwit_proto::types::{IndexUid, PublishToken}; +use quickwit_proto::types::{IndexUid, PublishToken, SplitId}; use tracing::Span; use super::parquet_merge_messages::ParquetMergeTask; @@ -35,7 +35,7 @@ pub struct ParquetSplitsUpdate { /// The staged and uploaded splits (metrics or sketches). pub new_splits: Vec, /// Split IDs being replaced (for merges, typically empty for ingest). - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, /// Checkpoint delta covering the data in these splits. pub checkpoint_delta_opt: Option, /// Publish lock for coordination. diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs index 8ae01f06c68..1b911ab18c5 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs @@ -60,7 +60,7 @@ impl Handler for Publisher { let publish_request = PublishSketchSplitsRequest { index_uid: Some(index_uid.clone()), staged_split_ids: split_ids.clone(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), index_checkpoint_delta_json_opt, publish_token_opt: publish_token_opt.clone(), }; @@ -71,7 +71,7 @@ impl Handler for Publisher { let publish_request = PublishMetricsSplitsRequest { index_uid: Some(index_uid.clone()), staged_split_ids: split_ids.clone(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), index_checkpoint_delta_json_opt, publish_token_opt: publish_token_opt.clone(), }; @@ -116,7 +116,7 @@ impl Handler for Publisher { index_uid: index_uid_str.clone(), merge_id: split.split_id.as_str().to_string(), output_split_id: split.split_id.as_str().to_string(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), output_window: window, output_merge_ops: split.num_merge_ops, }); diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 0b2901f26d5..44dbf9d2aa1 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -323,7 +323,7 @@ impl Handler for Uploader { ) { Ok(split_streamer) => split_streamer, Err(e) => { - warn!(cause=?e, split_id=packaged_split.split_id(), "could not create split streamer"); + warn!(cause=?e, split_id=packaged_split.split_id_str(), "could not create split streamer"); return; } }; @@ -337,7 +337,7 @@ impl Handler for Uploader { report_splits.push(ReportSplit { storage_uri: split_store.remote_uri().to_string(), - split_id: packaged_split.split_id().to_string(), + split_id: packaged_split.split_id_str().to_string(), }); split_metadata_list.push(split_metadata); @@ -376,7 +376,7 @@ impl Handler for Uploader { .await; if let Err(cause) = upload_result { - warn!(cause=?cause, split_id=packaged_split.split_id(), "Failed to upload split. Killing!"); + warn!(cause=?cause, split_id=packaged_split.split_id_str(), "Failed to upload split. Killing!"); kill_switch.kill(); return; } @@ -516,7 +516,7 @@ mod tests { use quickwit_common::temp_dir::TempDirectory; use quickwit_metastore::checkpoint::{IndexCheckpointDelta, SourceCheckpointDelta}; use quickwit_proto::metastore::{EmptyResponse, MockMetastoreService}; - use quickwit_proto::types::{DocMappingUid, NodeId}; + use quickwit_proto::types::{DocMappingUid, NodeId, SplitId}; use quickwit_storage::RamStorage; use tantivy::DateTime; use tokio::sync::oneshot; @@ -586,7 +586,7 @@ mod tests { uncompressed_docs_size_in_bytes: 1_000, num_docs: 10, replaced_split_ids: Vec::new(), - split_id: "test-split".to_string(), + split_id: "test-split".into(), delete_opstamp: 10, num_merge_ops: 0, }, @@ -658,7 +658,7 @@ mod tests { .withf(move |stage_splits_request| -> bool { let splits_metadata = stage_splits_request.deserialize_splits_metadata().unwrap(); let is_metadata_valid = splits_metadata.iter().all(|metadata| { - ["test-split-1", "test-split-2"].contains(&metadata.split_id()) + ["test-split-1", "test-split-2"].contains(&metadata.split_id().as_str()) && metadata.time_range == Some(1628203589..=1628203640) }); let index_uid: IndexUid = stage_splits_request.index_uid().clone(); @@ -689,7 +689,7 @@ mod tests { index_uid: index_uid.clone(), source_id: source_id.clone(), doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split-1".to_string(), + split_id: "test-split-1".into(), partition_id: 3u64, num_docs: 10, uncompressed_docs_size_in_bytes: 1_000, @@ -698,8 +698,8 @@ mod tests { ..=DateTime::from_timestamp_secs(1_628_203_640), ), replaced_split_ids: vec![ - "replaced-split-1".to_string(), - "replaced-split-2".to_string(), + SplitId::from("replaced-split-1"), + SplitId::from("replaced-split-2"), ], delete_opstamp: 0, num_merge_ops: 0, @@ -716,7 +716,7 @@ mod tests { index_uid, source_id, doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split-2".to_string(), + split_id: "test-split-2".into(), partition_id: 3u64, num_docs: 10, uncompressed_docs_size_in_bytes: 1_000, @@ -725,8 +725,8 @@ mod tests { ..=DateTime::from_timestamp_secs(1_628_203_640), ), replaced_split_ids: vec![ - "replaced-split-1".to_string(), - "replaced-split-2".to_string(), + SplitId::from("replaced-split-1"), + SplitId::from("replaced-split-2"), ], delete_opstamp: 0, num_merge_ops: 0, @@ -778,8 +778,8 @@ mod tests { assert_eq!( &replaced_split_ids, &[ - "replaced-split-1".to_string(), - "replaced-split-2".to_string() + SplitId::from("replaced-split-1"), + SplitId::from("replaced-split-2"), ] ); assert!(checkpoint_delta_opt.is_none()); @@ -842,7 +842,7 @@ mod tests { index_uid, source_id, doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split".to_string(), + split_id: "test-split".into(), partition_id: 3u64, time_range: None, uncompressed_docs_size_in_bytes: 1_000, @@ -1029,7 +1029,7 @@ mod tests { uncompressed_docs_size_in_bytes: 1_000, num_docs: 10, replaced_split_ids: Vec::new(), - split_id: SPLIT_ULID_STR.to_string(), + split_id: SPLIT_ULID_STR.into(), delete_opstamp: 10, num_merge_ops: 0, }, diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index f0e79499b8a..894d194e2ce 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -55,10 +55,6 @@ pub use self::source::check_source_connectivity; /// Schema used for the OpenAPI generation which are apart of this crate. pub struct IndexingApiSchemas; -pub fn new_split_id() -> String { - ulid::Ulid::new().to_string() -} - #[allow(clippy::too_many_arguments)] pub async fn start_indexing_service( universe: &Universe, diff --git a/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs b/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs index ce3f4654de1..2ac0fa64f24 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs @@ -319,7 +319,7 @@ mod tests { fn test_const_write_merge_policy_single_split() { let merge_policy = ConstWriteAmplificationMergePolicy::for_test(); let mut splits = vec![SplitMetadata { - split_id: "01GE1R0KBFQHJ76030RYRAS8QA".to_string(), + split_id: "01GE1R0KBFQHJ76030RYRAS8QA".into(), num_docs: 1, create_timestamp: 1665000000, maturity: merge_policy.split_maturity(1, 0), @@ -337,7 +337,7 @@ mod tests { let create_timestamp = OffsetDateTime::now_utc().unix_timestamp(); let mut splits = (0..merge_policy.config.merge_factor) .map(|i| SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1_000, num_merge_ops: 1, create_timestamp, @@ -361,7 +361,7 @@ mod tests { let mut splits = (0..merge_policy.config.max_merge_factor + merge_policy.config.merge_factor - 1) .map(|i| SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1_000, num_merge_ops: 1, create_timestamp, @@ -384,7 +384,7 @@ mod tests { let now_timestamp: i64 = OffsetDateTime::now_utc().unix_timestamp(); let mut splits: Vec = (0..merge_policy.config.max_merge_factor) .map(|i| SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1_000, num_merge_ops: 1, create_timestamp: now_timestamp + i as i64, @@ -402,7 +402,7 @@ mod tests { let split_ids: Vec<&str> = operations[0] .splits_as_slice() .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .collect(); assert_eq!( &split_ids[..], @@ -419,7 +419,7 @@ mod tests { let num_docs = merge_policy.split_num_docs_target.div_ceil(3); let time_to_maturity = merge_policy.split_maturity(num_docs, 1); SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs, num_merge_ops: 1, create_timestamp, diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs index a0090c1edd1..95bae7a281d 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs @@ -33,7 +33,6 @@ use tantivy::TrackedObject; use tracing::{Span, info_span}; use crate::actors::MergePermit; -use crate::new_split_id; #[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub enum MergeOperationType { @@ -89,7 +88,7 @@ pub struct MergeOperation { impl MergeOperation { pub fn new_merge_operation(splits: Vec) -> Self { - let merge_split_id = new_split_id(); + let merge_split_id = SplitId::new(); let split_ids = splits.iter().map(|split| split.split_id()).collect_vec(); let merge_parent_span = info_span!("merge", merge_split_id=%merge_split_id, split_ids=?split_ids, typ=%MergeOperationType::Merge); Self { @@ -108,7 +107,7 @@ impl MergeOperation { } pub fn new_delete_and_merge_operation(split: SplitMetadata) -> Self { - let merge_split_id = new_split_id(); + let merge_split_id = SplitId::new(); let merge_parent_span = info_span!("delete", merge_split_id=%merge_split_id, split_ids=?split.split_id(), typ=%MergeOperationType::DeleteAndMerge); Self { merge_parent_span, @@ -237,7 +236,7 @@ pub mod tests { use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeSet, HashMap}; - use std::hash::Hasher; + use std::hash::{Hash as _, Hasher}; use std::ops::RangeInclusive; use proptest::prelude::*; @@ -274,11 +273,10 @@ pub mod tests { prop_compose! { fn split_strategy() (num_merge_ops in 0usize..5usize, start_timestamp in 1_664_000_000i64..1_665_000_000i64, average_time_delta in 100i64..120i64, delta_creation_date in 0u64..100_000u64, num_docs in num_docs_strategy()) -> SplitMetadata { - let split_id = crate::new_split_id(); let end_timestamp = start_timestamp + average_time_delta * pow_of_10(num_merge_ops) as i64; let create_timestamp: i64 = (end_timestamp as u64 + delta_creation_date) as i64; SplitMetadata { - split_id, + split_id: SplitId::new(), time_range: Some(start_timestamp..=end_timestamp), num_docs, create_timestamp, @@ -312,7 +310,7 @@ pub mod tests { let create_timestamp = OffsetDateTime::now_utc().unix_timestamp(); let time_to_maturity = merge_policy.split_maturity(num_docs, 0); SplitMetadata { - split_id: format!("split_{split_ord:02}"), + split_id: format!("split_{split_ord:02}").into(), num_docs, time_range: Some(time_range), create_timestamp, @@ -330,7 +328,7 @@ pub mod tests { let mut checksum = 0u64; for split in op.splits_as_slice() { let mut hasher = DefaultHasher::default(); - hasher.write(split.split_id.as_bytes()); + split.split_id.hash(&mut hasher); checksum ^= hasher.finish(); } checksum @@ -404,7 +402,7 @@ pub mod tests { fn fake_merge(merge_policy: &Arc, splits: &[SplitMetadata]) -> SplitMetadata { assert!(!splits.is_empty(), "Split list should not be empty."); - let merged_split_id = new_split_id(); + let merged_split_id = SplitId::new(); let tags = merge_tags(splits); let pipeline_id = MergePipelineId { node_id: NodeId::from_str("test_node"), @@ -421,7 +419,7 @@ pub mod tests { merge_op: &MergeOperation, ) -> SplitMetadata { for split in merge_op.splits_as_slice() { - assert!(split_index.remove(split.split_id()).is_some()); + assert!(split_index.remove(split.split_id().as_str()).is_some()); } let merged_split = fake_merge(merge_policy, merge_op.splits_as_slice()); split_index.insert(merged_split.split_id().to_string(), merged_split.clone()); @@ -505,7 +503,7 @@ pub mod tests { maturity: SplitMaturity, ) -> SplitMetadata { SplitMetadata { - split_id: crate::new_split_id(), + split_id: SplitId::new(), partition_id: 3u64, num_docs: num_docs as usize, uncompressed_docs_size_in_bytes: 256u64 * num_docs, diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index 03728fe2f6a..6d96d223dcf 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -20,7 +20,7 @@ use quickwit_common::temp_dir::TempDirectory; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::IndexingPipelineId; -use quickwit_proto::types::{DocMappingUid, IndexUid, PublishToken}; +use quickwit_proto::types::{DocMappingUid, IndexUid, PublishToken, SplitId}; use tantivy::IndexBuilder; use tantivy::directory::MmapDirectory; use tracing::{Span, instrument}; @@ -28,7 +28,6 @@ use tracing::{Span, instrument}; use crate::controlled_directory::ControlledDirectory; use crate::merge_policy::MergeTask; use crate::models::{PublishLock, SplitAttrs}; -use crate::new_split_id; pub struct IndexedSplitBuilder { pub split_attrs: SplitAttrs, @@ -45,7 +44,7 @@ pub struct IndexedSplit { } impl IndexedSplit { - pub fn split_id(&self) -> &str { + pub fn split_id(&self) -> &SplitId { &self.split_attrs.split_id } } @@ -85,7 +84,7 @@ impl IndexedSplitBuilder { // We avoid intermediary merge, and instead merge all segments in the packager. // The benefit is that we don't have to wait for potentially existing merges, // and avoid possible race conditions. - let split_id = new_split_id(); + let split_id = SplitId::new(); let split_scratch_directory_prefix = format!("split-{split_id}-"); let split_scratch_directory = scratch_directory.named_temp_child(&split_scratch_directory_prefix)?; @@ -145,7 +144,7 @@ impl IndexedSplitBuilder { self.split_scratch_directory.path() } - pub fn split_id(&self) -> &str { + pub fn split_id(&self) -> &SplitId { &self.split_attrs.split_id } } diff --git a/quickwit/quickwit-indexing/src/models/packaged_split.rs b/quickwit/quickwit-indexing/src/models/packaged_split.rs index db9b4abae39..6c8894da79e 100644 --- a/quickwit/quickwit-indexing/src/models/packaged_split.rs +++ b/quickwit/quickwit-indexing/src/models/packaged_split.rs @@ -39,8 +39,8 @@ impl PackagedSplit { &self.split_attrs.index_uid } - pub fn split_id(&self) -> &str { - &self.split_attrs.split_id + pub fn split_id_str(&self) -> &str { + self.split_attrs.split_id.as_str() } } diff --git a/quickwit/quickwit-indexing/src/models/publisher_message.rs b/quickwit/quickwit-indexing/src/models/publisher_message.rs index 13182a8f76a..22199d98656 100644 --- a/quickwit/quickwit-indexing/src/models/publisher_message.rs +++ b/quickwit/quickwit-indexing/src/models/publisher_message.rs @@ -17,7 +17,7 @@ use std::fmt; use itertools::Itertools; use quickwit_metastore::SplitMetadata; use quickwit_metastore::checkpoint::IndexCheckpointDelta; -use quickwit_proto::types::{IndexUid, PublishToken}; +use quickwit_proto::types::{IndexUid, PublishToken, SplitId}; use tracing::Span; use crate::merge_policy::MergeTask; @@ -26,7 +26,7 @@ use crate::models::PublishLock; pub struct SplitsUpdate { pub index_uid: IndexUid, pub new_splits: Vec, - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, pub checkpoint_delta_opt: Option, pub publish_lock: PublishLock, pub publish_token_opt: Option, diff --git a/quickwit/quickwit-indexing/src/models/split_attrs.rs b/quickwit/quickwit-indexing/src/models/split_attrs.rs index 2e0504bdd35..f94150444d9 100644 --- a/quickwit/quickwit-indexing/src/models/split_attrs.rs +++ b/quickwit/quickwit-indexing/src/models/split_attrs.rs @@ -60,7 +60,7 @@ pub struct SplitAttrs { pub time_range: Option>, - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, /// Delete opstamp. pub delete_opstamp: u64, diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 8b4db0844ad..94ee76de711 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -762,10 +762,9 @@ mod test_setup_helper { use quickwit_metastore::checkpoint::{IndexCheckpointDelta, PartitionId}; use quickwit_metastore::{CreateIndexRequestExt, SplitMetadata, StageSplitsRequestExt}; use quickwit_proto::metastore::{CreateIndexRequest, PublishSplitsRequest, StageSplitsRequest}; - use quickwit_proto::types::Position; + use quickwit_proto::types::{Position, SplitId}; use super::*; - use crate::new_split_id; pub async fn setup_index( metastore: MetastoreServiceClient, @@ -790,7 +789,7 @@ mod test_setup_helper { if partition_deltas.is_empty() { return index_uid; } - let split_id = new_split_id(); + let split_id = SplitId::new(); let split_metadata = SplitMetadata::for_test(split_id.clone()); let stage_splits_request = StageSplitsRequest::try_from_split_metadata(index_uid.clone(), &split_metadata) @@ -811,7 +810,7 @@ mod test_setup_helper { let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid.clone()), index_checkpoint_delta_json_opt: Some(checkpoint_delta_json), - staged_split_ids: vec![split_id.clone()], + staged_split_ids: vec![split_id.to_string()], replaced_split_ids: Vec::new(), publish_token_opt: None, }; diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index 70f19410ecf..ebb92977740 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -12,17 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; -use std::collections::btree_map::Entry; +use std::collections::{BTreeMap, HashMap}; use std::io; use std::path::{Path, PathBuf}; -use std::str::FromStr; use std::time::{Duration, SystemTime}; use anyhow::Context; use bytesize::ByteSize; use quickwit_common::split_file; use quickwit_directories::BundleDirectory; +use quickwit_proto::types::SplitId; use quickwit_storage::StorageResult; use tantivy::Directory; use tantivy::directory::MmapDirectory; @@ -66,40 +65,71 @@ async fn num_bytes_in_folder(directory_path: &Path) -> io::Result { Ok(ByteSize(total_bytes)) } +/// Recovers the creation time of a split from its id. +/// +/// TEMPORARY: the indexing split store is slated for removal once the compactor service lands. +/// +/// Split IDs are either a plain 26-char ULID or the prefixed form produced when +/// `QW_RANDOM_SPLIT_PREFIX` is enabled: `<4 random chars>_<22 body chars>` (27 chars). +/// In the prefixed form the original ULID is reconstructed as `body + prefix` before parsing. +/// IDs that do not parse as a valid ULID fall back to now, treating them as freshly created. +fn split_creation_time(split_id: &SplitId) -> SystemTime { + let s = split_id.as_str(); + // Detect the prefixed format by checking for `_` at byte position 4. + let ulid_str: std::borrow::Cow = if s.len() == 27 && s.as_bytes()[4] == b'_' { + let prefix = &s[..4]; // chars 22-25 of the original ULID + let body = &s[5..]; // chars 0-21 of the original ULID + format!("{body}{prefix}").into() + } else { + s.into() + }; + if let Ok(ulid) = Ulid::from_string(&ulid_str) { + ulid.datetime() + } else { + SystemTime::now() + } +} + /// The local split store is a cache for freshly indexed splits. /// /// In order to save the cost of an extra write, we store splits in the form /// of a directory and the split bundles are built upon upload. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] struct SplitFolder { - split_id: Ulid, + created_at: SystemTime, + split_id: SplitId, num_bytes: ByteSize, } +/// Orders `SplitFolder`s for eviction: oldest `created_at` first, ties broken by +/// insertion order (`insertion_seq`) rather than by `split_id`. +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] +struct SplitFolderOrderKey { + created_at: SystemTime, + insertion_seq: u64, +} + impl SplitFolder { /// Creates a new `SplitFolder`. /// /// There are no specific constraints on `path`. - pub async fn create(split_id: &str, path: &Path) -> io::Result { - let split_id = Ulid::from_str(split_id).map_err(|_err| { - let error_msg = format!("split Id should be an ulid: got `{split_id}`"); - io::Error::new(io::ErrorKind::InvalidInput, error_msg) - })?; + pub async fn create(split_id: SplitId, path: &Path) -> io::Result { let num_bytes = num_bytes_in_folder(path).await?; + let created_at = split_creation_time(&split_id); Ok(SplitFolder { + created_at, split_id, num_bytes, }) } - - /// Returns the creation time as encoded in the split id ULID. - fn creation_time(&self) -> SystemTime { - self.split_id.datetime() - } } -fn split_id_from_split_folder(dir_path: &Path) -> Option<&str> { - dir_path.file_name()?.to_str()?.strip_suffix(".split") +fn split_id_from_split_folder(dir_path: &Path) -> Option { + dir_path + .file_name()? + .to_str()? + .strip_suffix(".split") + .map(SplitId::from) } /// The [`IndexingSplitCache`] is a local cache used to improve the performance of indexing nodes. @@ -145,13 +175,17 @@ struct InnerSplitCache { struct SplitFolderRegistry { /// Splits owned by the local split store, which reside in the split_store_folder. /// - /// Splits ids are generated using ULID, so that they are sorted - /// according to their creation date. - /// - /// We evict the oldest split first. Note this is not an LRU strategy - /// because we do not care about the last access time, but we only - /// consider the creation time. - split_folders: BTreeMap, + /// Entries are ordered by `SplitFolderOrderKey` (see its `Ord` impl), so iteration order + /// matches creation order and the oldest split is the set's first element. We evict the oldest + /// split first. Note this is not an LRU strategy because we do not care about the last access + /// time, but we only consider the creation time. + split_folders: BTreeMap, + /// Index from `split_id` to the key used in `split_folders`, so that a split can be + /// removed by id without recomputing `created_at` (which is not deterministic for split ids + /// that do not parse as a ULID). + key_by_split_id: HashMap, + /// Monotonically increasing counter used as the tiebreaker component of `SplitFolderOrderKey`. + next_insertion_seq: u64, /// The split store quota shared among all indexing split stores. split_store_quota: SplitStoreQuota, } @@ -160,52 +194,61 @@ impl SplitFolderRegistry { pub fn with_quota(split_store_quota: SplitStoreQuota) -> SplitFolderRegistry { SplitFolderRegistry { split_folders: BTreeMap::default(), + key_by_split_id: HashMap::default(), + next_insertion_seq: 0, split_store_quota, } } - /// Returns an iterator over the split folders sorted by ULID. + /// Returns an iterator over the split folders sorted by creation time. #[cfg(any(test, feature = "testsuite"))] fn iter(&self) -> impl Iterator + '_ { - self.split_folders - .iter() - .map(|(&split_id, &num_bytes)| SplitFolder { - split_id, - num_bytes, - }) + self.split_folders.values().cloned() } /// Returns whether the element was inserted or was already present fn insert(&mut self, split_folder: SplitFolder) -> bool { - if let Entry::Vacant(entry) = self.split_folders.entry(split_folder.split_id) { - entry.insert(split_folder.num_bytes); - self.split_store_quota.add_split(split_folder.num_bytes); - true - } else { - false + if self.key_by_split_id.contains_key(&split_folder.split_id) { + return false; } + let split_folder_key = SplitFolderOrderKey { + created_at: split_folder.created_at, + insertion_seq: self.next_insertion_seq, + }; + self.next_insertion_seq = self.next_insertion_seq.wrapping_add(1); + self.key_by_split_id + .insert(split_folder.split_id.clone(), split_folder_key.clone()); + self.split_store_quota.add_split(split_folder.num_bytes); + self.split_folders.insert(split_folder_key, split_folder); + true } - /// Returns true if the split was indeed present in the registry - fn remove(&mut self, split_id: Ulid) -> Option { - let num_bytes = self.split_folders.remove(&split_id)?; - self.split_store_quota.remove_split(num_bytes); - Some(SplitFolder { - num_bytes, - split_id, - }) + /// Returns the split folder if it was indeed present in the registry. + fn remove(&mut self, split_id: &SplitId) -> Option { + let split_folder_key = self.key_by_split_id.remove(split_id)?; + let split_folder = self + .split_folders + .remove(&split_folder_key) + .expect("key_by_split_id and split_folders must stay in sync"); + self.split_store_quota.remove_split(split_folder.num_bytes); + Some(split_folder) } - /// Returns the oldest split (oldest in the sense of the ULID = creation time). - fn oldest_split(&self) -> Option { - let (split_id, _) = self.split_folders.first_key_value()?; - Some(*split_id) + /// Returns the creation time of the oldest split in the registry. + fn oldest_split_folder_key(&self) -> Option { + self.split_folders.keys().next().cloned() } /// Removes the oldest split. fn pop_oldest(&mut self) -> Option { - let oldest_split_id = self.oldest_split()?; - self.remove(oldest_split_id) + let oldest_key = self.oldest_split_folder_key()?; + let split_folder = self + .split_folders + .remove(&oldest_key) + .expect("key was just read from split_folders"); + self.key_by_split_id.remove(&split_folder.split_id); + self.split_store_quota.remove_split(split_folder.num_bytes); + Some(split_folder) } fn quota(&self) -> &SplitStoreQuota { @@ -219,7 +262,7 @@ impl InnerSplitCache { /// Returns `None` if the split is not available in the cache. async fn move_out( &mut self, - split_id: Ulid, + split_id: &SplitId, to_folder: &Path, ) -> StorageResult> { let Some(split_folder) = self.split_registry.remove(split_id) else { @@ -264,7 +307,7 @@ impl InnerSplitCache { } /// Returns the directory filepath of a split in cache. - fn split_path(&self, split_id: Ulid) -> PathBuf { + fn split_path(&self, split_id: &SplitId) -> PathBuf { let split_file = split_file(split_id); self.split_store_folder.join(split_file) } @@ -274,11 +317,11 @@ impl InnerSplitCache { /// # Panics /// Panics if there are no remaining splits. async fn evict_one_split(&mut self) -> io::Result<()> { - let evicted_split = self + let evicted_split: SplitFolder = self .split_registry .pop_oldest() .expect("split cache should not be empty"); - let result = tokio::fs::remove_dir_all(&self.split_path(evicted_split.split_id)).await; + let result = tokio::fs::remove_dir_all(&self.split_path(&evicted_split.split_id)).await; if let Err(io_err) = result { if io_err.kind() == io::ErrorKind::NotFound { // This could happen if some files have been manually deleted @@ -299,17 +342,17 @@ impl InnerSplitCache { /// If the cache capacity does not allow it returns Ok(false). /// /// Ok(true) means the file was effectively accepted. - async fn move_into_cache(&mut self, split_id_str: &str, split_path: &Path) -> io::Result { - let split_folder = SplitFolder::create(split_id_str, split_path).await?; - let split_id = split_folder.split_id; + async fn move_into_cache(&mut self, split_id: SplitId, split_path: &Path) -> io::Result { + let split_folder = SplitFolder::create(split_id, split_path).await?; + let split_id = split_folder.split_id.clone(); let should_move_split = self.make_room_and_record_split(split_folder).await?; if !should_move_split { return Ok(false); } - let to_full_path = self.split_path(split_id); + let to_full_path = self.split_path(&split_id); if let Err(io_err) = tokio::fs::rename(split_path, &to_full_path).await { // keep the registry stats accurate - self.split_registry.remove(split_id); + self.split_registry.remove(&split_id); return Err(io_err); } Ok(true) @@ -317,8 +360,8 @@ impl InnerSplitCache { /// Removes all splits that have a creation date older than `limit`. async fn remove_splits_older_than_limit(&mut self, limit: SystemTime) -> io::Result<()> { - while let Some(split_id) = self.split_registry.oldest_split() { - if split_id.datetime() >= limit { + while let Some(oldest_split_folder_key) = self.split_registry.oldest_split_folder_key() { + if oldest_split_folder_key.created_at >= limit { break; } self.evict_one_split().await?; @@ -343,7 +386,7 @@ impl InnerSplitCache { self.evict_one_split().await?; } - if let Some(creation_time_limit) = split_folder.creation_time().checked_sub(SPLIT_MAX_AGE) { + if let Some(creation_time_limit) = split_folder.created_at.checked_sub(SPLIT_MAX_AGE) { self.remove_splits_older_than_limit(creation_time_limit) .await?; }; @@ -412,16 +455,16 @@ impl IndexingSplitCache { split_registry: SplitFolderRegistry::with_quota(space_quota), }; - split_folders.sort_by_key(SplitFolder::creation_time); + split_folders.sort_by_key(|split_folder| split_folder.created_at); // We record all `split_folder`, sorted by `creation_time`. for split_folder in split_folders { - let split_id = split_folder.split_id; + let split_id = split_folder.split_id.clone(); if !inner_local_split_store .make_room_and_record_split(split_folder) .await? { - let split_dir = inner_local_split_store.split_path(split_id); + let split_dir = inner_local_split_store.split_path(&split_id); tokio::fs::remove_dir_all(&split_dir).await?; } } @@ -432,13 +475,13 @@ impl IndexingSplitCache { } #[cfg(any(test, feature = "testsuite"))] - pub async fn inspect_registry(&self) -> std::collections::HashMap { + pub async fn inspect_registry(&self) -> std::collections::HashMap { self.inner .lock() .await .split_registry .iter() - .map(|split_folder| (split_folder.split_id.to_string(), split_folder.num_bytes)) + .map(|split_folder| (split_folder.split_id.clone(), split_folder.num_bytes)) .collect() } @@ -462,20 +505,14 @@ impl IndexingSplitCache { /// storage. pub(super) async fn get_cached_split( &self, - split_id: &str, + split_id: &SplitId, output_dir_path: &Path, ) -> StorageResult> { let mut split_store_lock = self.inner.lock().await; - let split_ulid = if let Ok(split_ulid) = Ulid::from_str(split_id) { - split_ulid - } else { - return Ok(None); - }; - let split_file_opt: Option = split_store_lock - .move_out(split_ulid, output_dir_path) - .await?; + let split_file_opt: Option = + split_store_lock.move_out(split_id, output_dir_path).await?; if split_file_opt.is_none() { - debug!(split_id, "split folder not in cache"); + debug!(split_id=%split_id, "split folder not in cache"); } Ok(split_file_opt) } @@ -490,7 +527,7 @@ impl IndexingSplitCache { /// Ok(true) means the file was effectively accepted. pub(super) async fn move_into_cache( &self, - split_id: &str, + split_id: SplitId, split_path: &Path, ) -> io::Result { assert!(split_path.is_dir()); @@ -505,7 +542,7 @@ mod tests { use std::io; use std::io::Write; use std::path::Path; - use std::time::Duration; + use std::time::{Duration, SystemTime}; use bytesize::ByteSize; use quickwit_directories::BundleDirectory; @@ -516,9 +553,31 @@ mod tests { use tokio::fs; use ulid::Ulid; - use super::SPLIT_MAX_AGE; + use super::{SPLIT_MAX_AGE, SplitId, split_creation_time}; use crate::split_store::{IndexingSplitCache, SplitStoreQuota}; + #[test] + fn test_split_creation_time() { + let ulid_str = "01GF5449X7DA53TK9F9W2ZJST2"; + let ulid = Ulid::from_string(ulid_str).unwrap(); + let expected = ulid.datetime(); + + // A bare ULID split id (no prefix) resolves to that ULID's creation time. + assert_eq!(split_creation_time(&SplitId::from(ulid_str)), expected); + + // A prefixed split id (QW_RANDOM_SPLIT_PREFIX format) resolves to the same creation time. + let prefixed_split_id = SplitId::from_ulid(ulid, true); + assert_eq!(split_creation_time(&prefixed_split_id), expected); + + // An id that does not parse as a valid ULID falls back to ~now, so a malformed split is + // treated as freshly created rather than as the first eviction candidate. + let now = SystemTime::now(); + let date_from_invalid = split_creation_time(&SplitId::from("this-is-not-a-valid-ulid!!")); + let date_from_short = split_creation_time(&SplitId::from("short")); + assert!(date_from_invalid >= now); + assert!(date_from_short >= now); + } + async fn create_fake_split( split_cache_path: &Path, split_id: &str, @@ -627,7 +686,10 @@ mod tests { .await .unwrap(); local_split_store - .move_into_cache("01GFCZJBMBMEPMAQSFD09VTST2", extra_split.path()) + .move_into_cache( + SplitId::from("01GFCZJBMBMEPMAQSFD09VTST2"), + extra_split.path(), + ) .await .unwrap(); assert_eq!(local_split_store.inspect_registry().await.len(), 1); @@ -672,7 +734,10 @@ mod tests { let extra_split = tempdir().unwrap(); local_split_store // 2022-10-15T4:48:49.803Z - .move_into_cache("01GFCZJBMBMEPMAQSFD09VTST2", extra_split.path()) + .move_into_cache( + SplitId::from("01GFCZJBMBMEPMAQSFD09VTST2"), + extra_split.path(), + ) .await .unwrap(); let cache_content = local_split_store.inspect_registry().await; @@ -685,7 +750,10 @@ mod tests { let extra_split = tempdir().unwrap(); let was_accepted = local_split_store // 2025-01-13T14:28:17.364Z - .move_into_cache("01JHG11FAM8F2XPWHY24R3HF6M", extra_split.path()) + .move_into_cache( + SplitId::from("01JHG11FAM8F2XPWHY24R3HF6M"), + extra_split.path(), + ) .await .unwrap(); assert!(was_accepted); @@ -722,7 +790,7 @@ mod tests { #[tokio::test] async fn test_store_and_fetch() { let temp_dir_in = tempfile::tempdir().unwrap(); - let split_id = Ulid::default().to_string(); + let split_id: SplitId = Ulid::default().to_string().into(); let cache_dir = tempfile::tempdir().unwrap(); let quota = SplitStoreQuota::default(); let local_store = IndexingSplitCache::open(cache_dir.path().to_path_buf(), quota) @@ -733,7 +801,7 @@ mod tests { tokio::fs::create_dir(&split_dir).await.unwrap(); assert!( local_store - .move_into_cache(&split_id, &split_dir) + .move_into_cache(split_id.clone(), &split_dir) .await .unwrap() ); @@ -787,7 +855,10 @@ mod tests { let target_dir = tempdir().unwrap(); let path_opt = local_split_store - .get_cached_split("01GF5215TMV48JT7GZ543BV193", target_dir.path()) + .get_cached_split( + &SplitId::from("01GF5215TMV48JT7GZ543BV193"), + target_dir.path(), + ) .await .unwrap(); assert_eq!(path_opt, None); @@ -815,7 +886,10 @@ mod tests { let extra_split = tempdir().unwrap(); let was_accepted = local_split_store // 2022-10-12T02:14:54.347Z - .move_into_cache("01GF4ZJBMBMEPMAQSFD09VTST2", extra_split.path()) + .move_into_cache( + SplitId::from("01GF4ZJBMBMEPMAQSFD09VTST2"), + extra_split.path(), + ) .await .unwrap(); assert!(was_accepted); @@ -843,11 +917,43 @@ mod tests { extra_split_file.write_all(&[0u8; 15]).unwrap(); let was_accepted = split_store - .move_into_cache(split_id, extra_split.path()) + .move_into_cache(split_id.into(), extra_split.path()) .await .unwrap(); assert!(!was_accepted); let quota = split_store.inspect_quota().await; assert_eq!(quota.used_num_bytes(), ByteSize(15)); } + + #[tokio::test] + async fn test_store_and_fetch_non_ulid_split_id() { + // `split_creation_time` falls back to `SystemTime::now()` for split ids that do not + // parse as a ULID. Before the fix, the registry recomputed the key at removal time by + // calling `split_creation_time` again, so a split whose id does not parse as a ULID could + // never be found again: the two `SystemTime::now()` calls (at insertion and at removal) + // essentially never return the same value. + let temp_dir_in = tempfile::tempdir().unwrap(); + let split_id: SplitId = SplitId::from("this-is-not-a-valid-ulid!!"); + let cache_dir = tempfile::tempdir().unwrap(); + let quota = SplitStoreQuota::default(); + let local_store = IndexingSplitCache::open(cache_dir.path().to_path_buf(), quota) + .await + .unwrap(); + + let split_dir = temp_dir_in.path().join("scratch"); + tokio::fs::create_dir(&split_dir).await.unwrap(); + assert!( + local_store + .move_into_cache(split_id.clone(), &split_dir) + .await + .unwrap() + ); + + let split_path = local_store + .get_cached_split(&split_id, temp_dir_in.path()) + .await + .unwrap() + .unwrap(); + assert!(split_path.try_exists().unwrap()); + } } diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs index 88904cbd6dd..df157ce8d29 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs @@ -24,6 +24,7 @@ use bytesize::ByteSize; use quickwit_common::io::{IoControls, IoControlsAccess}; use quickwit_common::uri::Uri; use quickwit_metastore::SplitMetadata; +use quickwit_proto::types::SplitId; use quickwit_storage::{PutPayload, Storage, StorageResult}; use tantivy::Directory; use tantivy::directory::{Advice, MmapDirectory}; @@ -93,7 +94,7 @@ impl IndexingSplitStore { self.inner.remote_storage.uri() } - fn split_path(&self, split_id: &str) -> PathBuf { + fn split_path(&self, split_id: &SplitId) -> PathBuf { PathBuf::from(quickwit_common::split_file(split_id)) } @@ -149,7 +150,7 @@ impl IndexingSplitStore { if self .inner .split_cache - .move_into_cache(split.split_id(), split_folder_path) + .move_into_cache(split.split_id().clone(), split_folder_path) .await? { return Ok(()); @@ -178,15 +179,15 @@ impl IndexingSplitStore { #[instrument(skip(self, output_dir_path, io_controls), fields(cache_hit))] pub async fn fetch_and_open_split( &self, - split_id: &str, + split_id: SplitId, output_dir_path: &Path, io_controls: &IoControls, ) -> StorageResult> { - let path = PathBuf::from(quickwit_common::split_file(split_id)); + let path = PathBuf::from(quickwit_common::split_file(&split_id)); if let Some(split_path) = self .inner .split_cache - .get_cached_split(split_id, output_dir_path) + .get_cached_split(&split_id, output_dir_path) .await? { tracing::Span::current().record("cache_hit", true); @@ -211,7 +212,7 @@ impl IndexingSplitStore { /// Takes a snapshot of the cache view (only used for testing). #[cfg(any(test, feature = "testsuite"))] - pub async fn inspect_split_cache(&self) -> HashMap { + pub async fn inspect_split_cache(&self) -> HashMap { self.inner.split_cache.inspect_registry().await } } @@ -224,18 +225,18 @@ mod tests { use bytesize::ByteSize; use quickwit_common::io::IoControls; use quickwit_metastore::{SplitMaturity, SplitMetadata}; + use quickwit_proto::types::SplitId; use quickwit_storage::{PutPayload, RamStorage, SplitPayloadBuilder}; use tempfile::tempdir; use time::OffsetDateTime; use tokio::fs; - use ulid::Ulid; use super::IndexingSplitStore; use crate::split_store::{IndexingSplitCache, SplitStoreQuota}; - fn create_test_split_metadata(split_id: &str) -> SplitMetadata { + fn create_test_split_metadata(split_id: &SplitId) -> SplitMetadata { SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.clone(), create_timestamp: OffsetDateTime::now_utc().unix_timestamp(), maturity: SplitMaturity::Immature { maturation_period: Duration::from_secs(3600), @@ -257,11 +258,11 @@ mod tests { let remote_storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::new(remote_storage, Arc::new(split_cache)); - let split_id1 = Ulid::new().to_string(); - let split_id2 = Ulid::new().to_string(); + let split_id1: SplitId = SplitId::new(); + let split_id2: SplitId = SplitId::new(); { - let split1_dir = temp_dir.path().join(&split_id1); + let split1_dir = temp_dir.path().join(split_id1.as_str()); fs::create_dir_all(&split1_dir).await?; let split_metadata1 = create_test_split_metadata(&split_id1); fs::write(split1_dir.join("splitfile"), b"1234").await?; @@ -283,7 +284,7 @@ mod tests { ); } { - let split2_dir = temp_dir.path().join(&split_id2); + let split2_dir = temp_dir.path().join(split_id2.as_str()); fs::create_dir_all(&split2_dir).await?; fs::write(split2_dir.join("splitfile"), b"567").await?; let split_metadata2 = create_test_split_metadata(&split_id2); @@ -314,7 +315,7 @@ mod tests { { let output = tempfile::tempdir()?; let split1 = split_store - .fetch_and_open_split(&split_id1, output.path(), &io_controls) + .fetch_and_open_split(split_id1.clone(), output.path(), &io_controls) .await?; let local_store_stats = split_store.inspect_split_cache().await; assert_eq!(local_store_stats.len(), 1); @@ -323,7 +324,7 @@ mod tests { { let output = tempfile::tempdir()?; let split2 = split_store - .fetch_and_open_split(&split_id2, output.path(), &io_controls) + .fetch_and_open_split(split_id2.clone(), output.path(), &io_controls) .await?; let local_store_stats = split_store.inspect_split_cache().await; assert_eq!(local_store_stats.len(), 0); @@ -347,13 +348,13 @@ mod tests { let remote_storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::new(remote_storage, Arc::new(split_cache)); - let split_id1 = Ulid::new().to_string(); + let split_id1 = SplitId::new(); let split_payload1 = SplitPayloadBuilder::get_split_payload(&[], &[], &[5, 5, 5])?; - let split_id2 = Ulid::new().to_string(); + let split_id2 = SplitId::new(); let split_payload2 = SplitPayloadBuilder::get_split_payload(&[], &[], &[5, 5, 5, 5])?; { - let split_path = temp_dir.path().join(&split_id1); + let split_path = temp_dir.path().join(split_id1.as_str()); fs::create_dir_all(&split_path).await?; fs::write(split_path.join("splitdatafile"), b"hello-world").await?; let split_metadata1 = create_test_split_metadata(&split_id1); @@ -379,7 +380,7 @@ mod tests { ); } { - let split_path = temp_dir.path().join(&split_id2); + let split_path = temp_dir.path().join(split_id2.as_str()); fs::create_dir_all(&split_path).await?; fs::write(split_path.join("splitdatafile2"), b"hello-world2").await?; let split_metadata2 = create_test_split_metadata(&split_id2); @@ -410,7 +411,7 @@ mod tests { // get from remote storage because split_id1 was evicted by split_id2 let output = tempfile::tempdir()?; let _split1 = split_store - .fetch_and_open_split(&split_id1, output.path(), &io_controls) + .fetch_and_open_split(split_id1.clone(), output.path(), &io_controls) .await?; assert_eq!(io_controls.num_bytes(), split_payload1.len()); } @@ -418,7 +419,7 @@ mod tests { // get from cache let output = tempfile::tempdir()?; let _split2 = split_store - .fetch_and_open_split(&split_id2, output.path(), &io_controls) + .fetch_and_open_split(split_id2.clone(), output.path(), &io_controls) .await?; // the number of downloaded by didn't change (still the size of split_payload1) assert_eq!(io_controls.num_bytes(), split_payload1.len()); @@ -427,7 +428,7 @@ mod tests { // get from remote because getting from cache removes the split from the cache let output = tempfile::tempdir()?; let _split2 = split_store - .fetch_and_open_split(&split_id2, output.path(), &io_controls) + .fetch_and_open_split(split_id2.clone(), output.path(), &io_controls) .await?; assert_eq!( io_controls.num_bytes(), diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 42014752a7c..7f5dfcfc56d 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -280,7 +280,7 @@ pub fn mock_split(split_id: &str) -> Split { pub fn mock_split_meta(split_id: &str, index_uid: &IndexUid) -> SplitMetadata { SplitMetadata { index_uid: index_uid.clone(), - split_id: split_id.to_string(), + split_id: split_id.into(), partition_id: 13u64, num_docs: if split_id == "split1" { 1_000_000 } else { 10 }, uncompressed_docs_size_in_bytes: 256, diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index f293d938410..6937fb3f517 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -377,7 +377,7 @@ mod tests { .iter() .map(|split_id| Split { split_metadata: SplitMetadata { - split_id: split_id.to_string(), + split_id: (*split_id).into(), index_uid: IndexUid::for_test(index_id, 0), footer_offsets: 5..20, ..Default::default() diff --git a/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs b/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs index 3b79f5e489b..da43bb7f371 100644 --- a/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs +++ b/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs @@ -343,7 +343,7 @@ mod tests { fn make_split(split_id: &str, time_range: Option>) -> Split { Split { split_metadata: SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), footer_offsets: 5..20, time_range, ..Default::default() diff --git a/quickwit/quickwit-janitor/src/retention_policy_execution.rs b/quickwit/quickwit-janitor/src/retention_policy_execution.rs index bf06dd1b032..97109cec427 100644 --- a/quickwit/quickwit-janitor/src/retention_policy_execution.rs +++ b/quickwit/quickwit-janitor/src/retention_policy_execution.rs @@ -63,9 +63,9 @@ pub async fn run_execute_retention_policy( .partition(|split_metadata| split_metadata.time_range.is_some()); if !ignored_splits.is_empty() { - let ignored_split_ids: Vec = ignored_splits - .into_iter() - .map(|split_metadata| split_metadata.split_id) + let ignored_split_ids: Vec<&str> = ignored_splits + .iter() + .map(|split_metadata| split_metadata.split_id().as_str()) .collect(); warn!( index_id=%index_uid.index_id, @@ -80,7 +80,7 @@ pub async fn run_execute_retention_policy( // Mark the expired splits for deletion. let expired_split_ids: Vec = expired_splits .iter() - .map(|split_metadata| split_metadata.split_id.to_string()) + .map(|split_metadata| split_metadata.split_id.clone()) .collect(); info!( index_id=%index_uid.index_id, diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs index 7bb73f71e52..a1d204adc63 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs @@ -232,9 +232,9 @@ impl FileBackedIndex { .map(|delete_task| delete_task.opstamp) .max() .unwrap_or(0) as usize; - let splits = splits + let splits: HashMap = splits .into_iter() - .map(|split| (split.split_id().to_string(), split)) + .map(|split| (split.split_id().clone(), split)) .collect(); let metrics_splits = metrics_splits .into_iter() @@ -330,7 +330,7 @@ impl FileBackedIndex { publish_timestamp: None, split_metadata, }; - self.splits.insert(split.split_id().to_string(), split); + self.splits.insert(split.split_id().clone(), split); Ok(()) } @@ -1331,7 +1331,7 @@ mod tests { [ Split { split_metadata: SplitMetadata { - split_id: "split-1".to_string(), + split_id: "split-1".into(), delete_opstamp: 9, time_range: Some(32..=40), tags: BTreeSet::from(["tag-1".to_string()]), @@ -1345,7 +1345,7 @@ mod tests { }, Split { split_metadata: SplitMetadata { - split_id: "split-2".to_string(), + split_id: "split-2".into(), delete_opstamp: 4, time_range: None, tags: BTreeSet::from(["tag-2".to_string(), "tag-3".to_string()]), @@ -1359,7 +1359,7 @@ mod tests { }, Split { split_metadata: SplitMetadata { - split_id: "split-3".to_string(), + split_id: "split-3".into(), delete_opstamp: 0, time_range: Some(0..=90), tags: BTreeSet::from(["tag-2".to_string(), "tag-4".to_string()]), diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs index aa5e90f466f..3939d207563 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs @@ -1834,7 +1834,7 @@ mod tests { let split_id = "split-one"; let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: split_id.to_string(), + split_id: split_id.into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(RangeInclusive::new(0, 99)), @@ -1936,7 +1936,7 @@ mod tests { let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: "split1".to_string(), + split_id: "split1".into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(0..=99), @@ -1999,7 +1999,7 @@ mod tests { let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: "split1".to_string(), + split_id: "split1".into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(0..=99), @@ -2058,7 +2058,7 @@ mod tests { async move { let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(RangeInclusive::new(0, 99)), diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs index 43bd995e1ba..de080bfcc4e 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs @@ -625,7 +625,7 @@ impl MetastoreService for PostgresqlMetastore { let tags: Vec = split_metadata.tags.into_iter().collect(); tags_list.push(sqlx::types::Json(tags)); - split_ids.push(split_metadata.split_id); + split_ids.push(split_metadata.split_id.to_string()); delete_opstamps.push(split_metadata.delete_opstamp as i64); node_ids.push(split_metadata.node_id); } @@ -3375,7 +3375,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()).after_split(&crate::SplitMetadata { index_uid: index_uid.clone(), - split_id: "my_split".to_string(), + split_id: "my_split".into(), ..Default::default() }); append_query_filters_and_order_by(sql, &query); diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/model.rs b/quickwit/quickwit-metastore/src/metastore/postgres/model.rs index 117f37047d9..8aac26a8c7e 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/model.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/model.rs @@ -19,7 +19,7 @@ use std::str::FromStr; use quickwit_proto::ingest::{Shard, ShardState}; use quickwit_proto::metastore::{DeleteQuery, DeleteTask, MetastoreError, MetastoreResult}; -use quickwit_proto::types::{DocMappingUid, IndexId, IndexUid, ShardId, SourceId, SplitId}; +use quickwit_proto::types::{DocMappingUid, IndexId, IndexUid, ShardId, SourceId}; use sea_query::{Iden, Write}; use tracing::error; @@ -101,8 +101,9 @@ impl Iden for ToTimestampFunc { /// A model structure for handling split metadata in a database. #[derive(sqlx::FromRow)] pub(super) struct PgSplit { - /// Split ID. - pub split_id: SplitId, + /// It is only used for logging here (the authoritative split id lives in + /// `split_metadata_json`). + pub split_id: String, /// The state of the split. With `update_timestamp`, this is the only mutable attribute of the /// split. pub split_state: String, diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs index 28519b8a294..153096cc0b4 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs @@ -198,7 +198,10 @@ pub(super) fn append_query_filters_and_order_by( Expr::col(Splits::IndexUid).into(), Expr::col(Splits::SplitId).into(), ]) - .gt(Expr::tuple([Expr::value(index_uid), Expr::value(split_id)])), + .gt(Expr::tuple([ + Expr::value(index_uid), + Expr::value(split_id.as_str()), + ])), ); } diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index fe88fe379d3..f3b6e868b7e 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -47,7 +47,7 @@ pub struct Split { impl Split { /// Returns the split_id. - pub fn split_id(&self) -> &str { + pub fn split_id(&self) -> &SplitId { &self.split_metadata.split_id } } @@ -201,7 +201,7 @@ impl SplitMetadata { } /// Returns the split_id. - pub fn split_id(&self) -> &str { + pub fn split_id(&self) -> &SplitId { &self.split_id } @@ -264,7 +264,7 @@ pub struct SplitInfo { impl quickwit_config::TestableForRegression for SplitMetadata { fn sample_for_regression() -> Self { SplitMetadata { - split_id: "split".to_string(), + split_id: "split".into(), index_uid: IndexUid::for_test("my-index", 1), source_id: "source".to_string(), node_id: "node".to_string(), @@ -398,7 +398,7 @@ mod tests { #[test] fn test_split_metadata_debug() { let split_metadata = SplitMetadata { - split_id: "split-1".to_string(), + split_id: "split-1".into(), index_uid: IndexUid::for_test("00000000-0000-0000-0000-000000000000", 0), partition_id: 0, source_id: "source-1".to_string(), diff --git a/quickwit/quickwit-metastore/src/tests/index.rs b/quickwit/quickwit-metastore/src/tests/index.rs index c093c7b1a6c..de991a89dca 100644 --- a/quickwit/quickwit-metastore/src/tests/index.rs +++ b/quickwit/quickwit-metastore/src/tests/index.rs @@ -800,7 +800,7 @@ pub async fn test_metastore_delete_index< let split_id = format!("{index_id}--split"); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -844,7 +844,7 @@ pub async fn test_metastore_list_index_stats< let split_id_1 = format!("{index_id_1}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), footer_offsets: 0..2048, ..Default::default() @@ -852,7 +852,7 @@ pub async fn test_metastore_list_index_stats< let split_id_2 = format!("{index_id_1}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_1.clone(), footer_offsets: 0..2048, ..Default::default() @@ -860,7 +860,7 @@ pub async fn test_metastore_list_index_stats< let split_id_3 = format!("{index_id_1}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_2.clone(), footer_offsets: 0..1000, ..Default::default() @@ -884,7 +884,7 @@ pub async fn test_metastore_list_index_stats< let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid_1.clone()), - staged_split_ids: vec![split_id_1.clone(), split_id_2.clone()], + staged_split_ids: vec![split_id_1, split_id_2], ..Default::default() }; metastore diff --git a/quickwit/quickwit-metastore/src/tests/list_splits.rs b/quickwit/quickwit-metastore/src/tests/list_splits.rs index 01145612dd0..3f95b5172fa 100644 --- a/quickwit/quickwit-metastore/src/tests/list_splits.rs +++ b/quickwit/quickwit-metastore/src/tests/list_splits.rs @@ -33,7 +33,7 @@ use crate::metastore::MetastoreServiceStreamSplitsExt; use crate::tests::{cleanup_index, collect_split_ids}; use crate::{ CreateIndexRequestExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, - MetastoreServiceExt, SplitMaturity, SplitMetadata, SplitState, StageSplitsRequestExt, + MetastoreServiceExt, Split, SplitMaturity, SplitMetadata, SplitState, StageSplitsRequestExt, }; pub async fn test_metastore_list_all_splits< @@ -48,37 +48,37 @@ pub async fn test_metastore_list_all_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_4 = format!("{index_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_5 = format!("{index_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_6 = format!("{index_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -180,16 +180,16 @@ pub async fn test_metastore_stream_splits4}"); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; split_metadatas_to_create.push(split_metadata); if split_idx > 0 && split_idx % 100 == 0 { - let staged_split_ids: Vec = split_metadatas_to_create + let staged_split_ids: Vec = split_metadatas_to_create .iter() - .map(|split_metadata| split_metadata.split_id.clone()) + .map(|split_metadata| split_metadata.split_id.to_string()) .collect(); let stage_splits_request = StageSplitsRequest::try_from_splits_metadata( index_uid.clone(), @@ -226,10 +226,13 @@ pub async fn test_metastore_stream_splits = splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .sorted() .collect(); assert_eq!(split_ids, &[&split_id_1, &split_id_5]); @@ -726,7 +729,7 @@ pub async fn test_metastore_list_splits = splits + let split_ids: Vec<&SplitId> = splits .iter() .map(|split| &split.split_metadata.split_id) .sorted() @@ -914,7 +917,7 @@ pub async fn test_metastore_list_splits_by_node_id< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 20, @@ -923,7 +926,7 @@ pub async fn test_metastore_list_splits_by_node_id< }; let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 10, @@ -968,7 +971,7 @@ pub async fn test_metastore_list_stale_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 20, @@ -976,7 +979,7 @@ pub async fn test_metastore_list_stale_splits< }; let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 10, @@ -984,7 +987,7 @@ pub async fn test_metastore_list_stale_splits< }; let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 0, @@ -992,7 +995,7 @@ pub async fn test_metastore_list_stale_splits< }; let split_id_4 = format!("{index_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 20, @@ -1001,7 +1004,7 @@ pub async fn test_metastore_list_stale_splits< // immature split let split_id_5 = format!("{index_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, maturity: SplitMaturity::Immature { @@ -1100,7 +1103,7 @@ pub async fn test_metastore_list_stale_splits< delete_opstamp: 100, num_splits: 4, }; - let splits = metastore + let splits: Vec = metastore .list_stale_splits(list_stale_splits_request) .await .unwrap() @@ -1169,42 +1172,42 @@ pub async fn test_metastore_list_sorted_splits< let split_id_1 = format!("{split_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), delete_opstamp: 5, ..Default::default() }; let split_id_2 = format!("{split_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_2.clone(), delete_opstamp: 3, ..Default::default() }; let split_id_3 = format!("{split_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_1.clone(), delete_opstamp: 1, ..Default::default() }; let split_id_4 = format!("{split_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid_2.clone(), delete_opstamp: 0, ..Default::default() }; let split_id_5 = format!("{split_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid_1.clone(), delete_opstamp: 2, ..Default::default() }; let split_id_6 = format!("{split_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid_2.clone(), delete_opstamp: 4, ..Default::default() @@ -1287,9 +1290,9 @@ pub async fn test_metastore_list_sorted_splits< .await .unwrap(); // we don't use collect_split_ids because it sorts splits internally - let split_ids = splits + let split_ids: Vec<&str> = splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .collect::>(); assert_eq!( split_ids, @@ -1315,9 +1318,9 @@ pub async fn test_metastore_list_sorted_splits< .await .unwrap(); // we don't use collect_split_ids because it sorts splits internally - let split_ids = splits + let split_ids: Vec<&str> = splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .collect::>(); assert_eq!( split_ids, @@ -1366,37 +1369,37 @@ pub async fn test_metastore_list_after_split< let split_id_1 = format!("{split_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_2 = format!("{split_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_3 = format!("{split_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_4 = format!("{split_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_5 = format!("{split_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_6 = format!("{split_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; @@ -1535,37 +1538,37 @@ pub async fn test_metastore_list_splits_from_all_indexes< let split_id_1 = format!("{split_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_2 = format!("{split_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_3 = format!("{split_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_4 = format!("{split_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_5 = format!("{split_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_6 = format!("{split_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-metastore/src/tests/mod.rs b/quickwit/quickwit-metastore/src/tests/mod.rs index 377c763a606..deda34fac5d 100644 --- a/quickwit/quickwit-metastore/src/tests/mod.rs +++ b/quickwit/quickwit-metastore/src/tests/mod.rs @@ -113,7 +113,7 @@ async fn create_channel(client: tokio::io::DuplexStream) -> anyhow::Result Vec<&str> { splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .sorted() .collect() } diff --git a/quickwit/quickwit-metastore/src/tests/source.rs b/quickwit/quickwit-metastore/src/tests/source.rs index e67f6bdbb3f..157c3360e75 100644 --- a/quickwit/quickwit-metastore/src/tests/source.rs +++ b/quickwit/quickwit-metastore/src/tests/source.rs @@ -459,7 +459,7 @@ pub async fn test_metastore_reset_checkpoint< .unwrap(); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-metastore/src/tests/split.rs b/quickwit/quickwit-metastore/src/tests/split.rs index 9e6d45265e3..a66e91a84c4 100644 --- a/quickwit/quickwit-metastore/src/tests/split.rs +++ b/quickwit/quickwit-metastore/src/tests/split.rs @@ -134,7 +134,7 @@ pub async fn test_metastore_publish_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), time_range: Some(0..=99), create_timestamp: current_timestamp, @@ -143,7 +143,7 @@ pub async fn test_metastore_publish_splits< let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), time_range: Some(30..=99), create_timestamp: current_timestamp, @@ -715,7 +715,7 @@ pub async fn test_metastore_publish_splits_concurrency< async move { let split_id = format!("{index_id}--split-{partition_id}"); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -784,7 +784,7 @@ pub async fn test_metastore_replace_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), time_range: None, create_timestamp: current_timestamp, @@ -793,7 +793,7 @@ pub async fn test_metastore_replace_splits< let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), time_range: None, create_timestamp: current_timestamp, @@ -802,7 +802,7 @@ pub async fn test_metastore_replace_splits< let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), time_range: None, create_timestamp: current_timestamp, @@ -1135,7 +1135,7 @@ pub async fn test_metastore_mark_splits_for_deletion< "index-not-found:00000000000000000000000000" .parse() .unwrap(), - Vec::new(), + Vec::::new(), ); let error = metastore .mark_splits_for_deletion(mark_splits_for_deletion_request) @@ -1155,7 +1155,7 @@ pub async fn test_metastore_mark_splits_for_deletion< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, ..Default::default() @@ -1166,7 +1166,7 @@ pub async fn test_metastore_mark_splits_for_deletion< let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, ..Default::default() @@ -1186,7 +1186,7 @@ pub async fn test_metastore_mark_splits_for_deletion< let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, ..Default::default() @@ -1228,7 +1228,7 @@ pub async fn test_metastore_mark_splits_for_deletion< .unwrap(); assert_eq!(marked_splits.len(), 1); - assert_eq!(marked_splits[0].split_id(), split_id_3); + assert_eq!(marked_splits[0].split_id().as_str(), split_id_3); let split_3_update_timestamp = marked_splits[0].update_timestamp; assert!(current_timestamp < split_3_update_timestamp); @@ -1267,13 +1267,13 @@ pub async fn test_metastore_mark_splits_for_deletion< assert_eq!(marked_splits.len(), 3); - assert_eq!(marked_splits[0].split_id(), split_id_1); + assert_eq!(marked_splits[0].split_id().as_str(), split_id_1); assert!(current_timestamp + 2 <= marked_splits[0].update_timestamp); - assert_eq!(marked_splits[1].split_id(), split_id_2); + assert_eq!(marked_splits[1].split_id().as_str(), split_id_2); assert!(current_timestamp + 2 <= marked_splits[1].update_timestamp); - assert_eq!(marked_splits[2].split_id(), split_id_3); + assert_eq!(marked_splits[2].split_id().as_str(), split_id_3); assert_eq!(marked_splits[2].update_timestamp, split_3_update_timestamp); cleanup_index(&mut metastore, index_uid).await; @@ -1335,7 +1335,7 @@ pub async fn test_metastore_delete_splits(&self, this: &T, other: &T) -> Ordering { + pub fn compare(&self, this: &T, other: &T) -> Ordering { if self == &search::SortOrder::Desc { this.cmp(other) } else { diff --git a/quickwit/quickwit-proto/src/metastore/mod.rs b/quickwit/quickwit-proto/src/metastore/mod.rs index 4f53b9abb5c..47cd4b50580 100644 --- a/quickwit/quickwit-proto/src/metastore/mod.rs +++ b/quickwit/quickwit-proto/src/metastore/mod.rs @@ -19,7 +19,7 @@ use quickwit_common::retry::Retryable; use quickwit_common::tower::{MakeLoadShedError, TimeoutExceeded}; use serde::{Deserialize, Serialize}; -use crate::types::{IndexId, IndexUid, QueueId, SourceId, SplitId}; +use crate::types::{IndexId, IndexUid, QueueId, SourceId}; use crate::{GrpcServiceError, ServiceError, ServiceErrorCode}; pub mod events; @@ -67,7 +67,7 @@ pub enum EntityKind { /// A split. Split { /// Split ID. - split_id: SplitId, + split_id: String, }, /// A set of splits. Splits { @@ -371,10 +371,13 @@ impl IndexMetadataRequest { } impl MarkSplitsForDeletionRequest { - pub fn new(index_uid: IndexUid, split_ids: Vec) -> Self { + pub fn new( + index_uid: IndexUid, + split_ids: impl IntoIterator>, + ) -> Self { Self { index_uid: index_uid.into(), - split_ids, + split_ids: split_ids.into_iter().map(Into::into).collect(), } } } diff --git a/quickwit/quickwit-proto/src/types/mod.rs b/quickwit/quickwit-proto/src/types/mod.rs index 7fea3fae4f8..a7e5f8c23a9 100644 --- a/quickwit/quickwit-proto/src/types/mod.rs +++ b/quickwit/quickwit-proto/src/types/mod.rs @@ -28,6 +28,7 @@ mod index_uid; mod pipeline_uid; mod position; mod shard_id; +mod split_id; pub use doc_mapping_uid::DocMappingUid; pub use doc_uid::{DocUid, DocUidGenerator}; @@ -35,6 +36,7 @@ pub use index_uid::IndexUid; pub use pipeline_uid::PipelineUid; pub use position::Position; pub use shard_id::ShardId; +pub use split_id::SplitId; /// The size of an ULID in bytes. Use `ULID_LEN` for the length of Base32 encoded ULID strings. pub(crate) const ULID_SIZE: usize = 16; @@ -43,8 +45,6 @@ pub type IndexId = String; pub type SourceId = String; -pub type SplitId = String; - pub type SubrequestId = u32; /// See the file `ingest.proto` for more details. diff --git a/quickwit/quickwit-proto/src/types/split_id.rs b/quickwit/quickwit-proto/src/types/split_id.rs new file mode 100644 index 00000000000..af8942d17fb --- /dev/null +++ b/quickwit/quickwit-proto/src/types/split_id.rs @@ -0,0 +1,206 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Borrow; +use std::convert::Infallible; +use std::fmt; +use std::str::FromStr; +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + +/// Identifies a split. +/// +/// Joined with the index URI (`/`), this ID is enough to +/// uniquely identify a split. +/// +/// Split IDs are currently generated as ULIDs (see +/// `quickwit_indexing::new_split_id`), but the rest of the codebase must treat +/// them as **opaque** strings: no part of the system may assume the id is a +/// ULID or extract information (such as a creation timestamp) from it. This +/// indirection is what allows the generator to prepend a random prefix for S3 +/// load distribution without rippling through the codebase. +/// +/// The inner storage is an [`Arc`] so that cloning is cheap (a split id is +/// frequently used as a cache key and copied around the search and indexing +/// pipelines). +#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct SplitId(Arc); + +impl SplitId { + /// Generates a new split id. + /// + /// When the `QW_RANDOM_SPLIT_PREFIX` environment variable is set to `true`, + /// the ID is prefixed with 4 random characters taken from the tail of the + /// ULID. This spreads S3 key prefixes across partitions and avoids hot-spots + /// when writes concentrate on the same time window. + /// + /// Layout with prefix enabled: `<4 random chars>_<22 remaining ULID chars>` + /// (27 chars total vs the usual 26). + pub fn new() -> Self { + static RANDOM_PREFIX_ENABLED: std::sync::LazyLock = std::sync::LazyLock::new(|| { + quickwit_common::get_bool_from_env("QW_RANDOM_SPLIT_PREFIX", false) + }); + Self::from_ulid(ulid::Ulid::new(), *RANDOM_PREFIX_ENABLED) + } + + /// Constructs a `SplitId` from a `Ulid`, optionally prepending a random prefix. + /// + /// The prefix is the last 4 characters of the ULID string (pure random bits), + /// separated from the remaining 22 characters by `_`. + pub fn from_ulid(ulid: ulid::Ulid, random_prefix: bool) -> Self { + let ulid_str = ulid.to_string(); // 26 Crockford base32 chars + if random_prefix { + // The last 4 characters of a ULID are random bits. + // Moving them to a prefix spreads S3 key prefixes while keeping the + // full ULID entropy in the string. + let prefix = &ulid_str[22..]; // chars 22-25: random section + let body = &ulid_str[..22]; // chars 0-21: timestamp + most of random + SplitId::from(format!("{prefix}_{body}")) + } else { + SplitId::from(ulid_str) + } + } + + /// Returns the id as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +// `Debug` delegates to the inner string so a split id renders identically to the +// `String` it replaced (e.g. `"01HAV29D4XY3D462FS3D8K5Q2H"`, not +// `SplitId("01HAV29D4XY3D462FS3D8K5Q2H")`). +impl fmt::Debug for SplitId { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.0, formatter) + } +} + +impl fmt::Display for SplitId { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(&self.0) + } +} + +impl From for SplitId { + fn from(split_id: String) -> Self { + SplitId(split_id.into()) + } +} + +impl From for String { + fn from(split_id: SplitId) -> Self { + split_id.0.as_ref().to_owned() + } +} + +impl From<&SplitId> for String { + fn from(split_id: &SplitId) -> Self { + split_id.0.as_ref().to_owned() + } +} + +impl From<&str> for SplitId { + fn from(split_id: &str) -> Self { + SplitId(Arc::from(split_id)) + } +} + +impl FromStr for SplitId { + // A split id imposes no constraint on its content, so parsing never fails. + type Err = Infallible; + + fn from_str(split_id_str: &str) -> Result { + Ok(SplitId::from(split_id_str)) + } +} + +impl AsRef for SplitId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +// Enables looking up a `HashMap` or `BTreeMap` with a `&str` key. +impl Borrow for SplitId { + fn borrow(&self) -> &str { + &self.0 + } +} + +impl PartialEq for SplitId { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for SplitId { + fn eq(&self, other: &String) -> bool { + self.as_str() == other.as_str() + } +} + +impl PartialEq<&str> for SplitId { + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_id_serde_is_transparent() { + let split_id = SplitId::from("01HAV29D4XY3D462FS3D8K5Q2H"); + let serialized = serde_json::to_string(&split_id).unwrap(); + assert_eq!(serialized, "\"01HAV29D4XY3D462FS3D8K5Q2H\""); + let deserialized: SplitId = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, split_id); + } + + #[test] + fn test_split_id_borrow_as_str() { + use std::collections::HashMap; + + let mut map: HashMap = HashMap::new(); + map.insert(SplitId::from("split-1"), 42); + assert_eq!(map.get("split-1"), Some(&42)); + } + + #[test] + fn test_split_id_ordering_is_lexicographic() { + let split_id_a = SplitId::from("aaa"); + let split_id_b = SplitId::from("bbb"); + assert!(split_id_a < split_id_b); + } + + #[test] + fn test_split_id_from_ulid_without_prefix() { + let ulid = ulid::Ulid::from_string("01HAV29D4XY3D462FS3D8K5Q2H").unwrap(); + let split_id = SplitId::from_ulid(ulid, false); + assert_eq!(split_id.as_str(), "01HAV29D4XY3D462FS3D8K5Q2H"); + } + + #[test] + fn test_split_id_from_ulid_with_prefix() { + // "01HAV29D4XY3D462FS3D8K5Q2H": last 4 chars = "5Q2H", first 22 = "01HAV29D4XY3D462FS3D8K". + let ulid = ulid::Ulid::from_string("01HAV29D4XY3D462FS3D8K5Q2H").unwrap(); + let split_id = SplitId::from_ulid(ulid, true); + assert_eq!(split_id.as_str(), "5Q2H_01HAV29D4XY3D462FS3D8K"); + assert_eq!(split_id.as_str().len(), 27); + } +} diff --git a/quickwit/quickwit-query/Cargo.toml b/quickwit/quickwit-query/Cargo.toml index de9b3dd07ed..d992f75a63c 100644 --- a/quickwit/quickwit-query/Cargo.toml +++ b/quickwit/quickwit-query/Cargo.toml @@ -29,7 +29,6 @@ rustc-hash = { workspace = true } quickwit-common = { workspace = true } quickwit-datetime = { workspace = true } -quickwit-proto = { workspace = true } [dev-dependencies] criterion = { workspace = true } diff --git a/quickwit/quickwit-query/src/query_ast/cache_node.rs b/quickwit/quickwit-query/src/query_ast/cache_node.rs index 48eeaf794e4..48f0fabf419 100644 --- a/quickwit/quickwit-query/src/query_ast/cache_node.rs +++ b/quickwit/quickwit-query/src/query_ast/cache_node.rs @@ -15,7 +15,6 @@ use std::sync::Arc; use bitpacking::{BitPacker, BitPacker1x}; -use quickwit_proto::types::SplitId; use serde::{Deserialize, Serialize}; use super::{BuildTantivyAst, BuildTantivyAstContext, TantivyQueryAst}; @@ -485,9 +484,9 @@ impl crate::query_ast::QueryAstTransformer for PredicateCacheInjector { // we use a trait to dodge circular dependancies with quickwit-storage pub trait PredicateCache: Send + Sync + 'static { - fn get(&self, split_id: SplitId, query_ast_json: String) -> Option<(SegmentId, HitSet)>; + fn get(&self, split_id: String, query_ast_json: String) -> Option<(SegmentId, HitSet)>; - fn put(&self, split_id: SplitId, query_ast_json: String, segment: SegmentId, results: HitSet); + fn put(&self, split_id: String, query_ast_json: String, segment: SegmentId, results: HitSet); } #[cfg(test)] @@ -504,8 +503,8 @@ mod tests { BuildTantivyAstContext, QueryAstTransformer, QueryAstVisitor, TermQuery, }; - impl PredicateCache for Mutex> { - fn get(&self, split_id: SplitId, query_ast_json: String) -> Option<(SegmentId, HitSet)> { + impl PredicateCache for Mutex> { + fn get(&self, split_id: String, query_ast_json: String) -> Option<(SegmentId, HitSet)> { self.lock() .unwrap() .get(&(split_id, query_ast_json)) @@ -514,7 +513,7 @@ mod tests { fn put( &self, - split_id: SplitId, + split_id: String, query_ast_json: String, segment: SegmentId, results: HitSet, diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index daa01f2a4b0..66a0ef56046 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -490,7 +490,7 @@ pub(crate) struct SegmentPartialHit { impl SegmentPartialHit { pub fn into_partial_hit( self, - split_id: SplitId, + split_id: &SplitId, segment_ord: SegmentOrdinal, first: &SortingFieldExtractorComponent, second: &Option, @@ -514,7 +514,7 @@ impl SegmentPartialHit { sort_value: Some(sort_value), }), doc_id: self.doc_id, - split_id, + split_id: split_id.to_string(), segment_ord, } } @@ -670,7 +670,7 @@ impl QuickwitIncrementalAggregations { sort_value: Some(SortValue::I64(timestamp)), }), sort_value2: None, - split_id: SplitId::new(), + split_id: String::new(), segment_ord: 0, doc_id: 0, }); @@ -1069,7 +1069,7 @@ pub(crate) fn make_merge_collector( }; let sort_by = sort_by_from_request(search_request); Ok(QuickwitCollector { - split_id: SplitId::default(), + split_id: SplitId::from(""), start_offset: search_request.start_offset as usize, max_hits: search_request.max_hits as usize, sort_by, @@ -1320,6 +1320,7 @@ mod tests { LeafResourceStats, LeafSearchResponse, PartialHit, SearchRequest, SortByValue, SortField, SortOrder, SortValue, SplitResourceStats, SplitSearchError, }; + use quickwit_proto::types::SplitId; use tantivy::TantivyDocument; use tantivy::aggregation::agg_req::Aggregations; use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults; @@ -1594,7 +1595,7 @@ mod tests { // Check increasing slice sizes of the dataset for slice_len in 0..dataset.len() { let collector = super::make_collector_for_split( - "fake_split_id".to_string(), + SplitId::from("fake_split_id"), &make_request(slice_len as u64, sort_str), Default::default(), ) @@ -1690,7 +1691,7 @@ mod tests { ..SearchRequest::default() }; let collector = super::make_collector_for_split( - "fake_split_id".to_string(), + SplitId::from("fake_split_id"), &request, Default::default(), ) @@ -1728,7 +1729,7 @@ mod tests { }; let collector = super::make_collector_for_split( - "fake_split_id1".to_string(), + SplitId::from("fake_split_id1"), &request, Default::default(), ) @@ -1742,7 +1743,7 @@ mod tests { assert_eq!(res.partial_hits.len(), dataset.len()); let collector = super::make_collector_for_split( - "fake_split_id2".to_string(), + SplitId::from("fake_split_id2"), &request, Default::default(), ) @@ -1755,7 +1756,7 @@ mod tests { assert_eq!(res.partial_hits.len(), 5); let collector = super::make_collector_for_split( - "fake_split_id3".to_string(), + SplitId::from("fake_split_id3"), &request, Default::default(), ) diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 16e435fb61f..121e2d7df02 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -37,6 +37,7 @@ use quickwit_proto::search::{ CountHits, LeafResourceStats, LeafSearchRequest, LeafSearchResponse, PartialHit, SearchRequest, SortOrder, SortValue, SplitIdAndFooterOffsets, SplitResourceStats, SplitSearchError, }; +use quickwit_proto::types::SplitId; use quickwit_query::query_ast::{ BoolQuery, CacheNode, HitSet, PredicateCache, QueryAst, QueryAstTransformer, RangeQuery, TermQuery, @@ -717,8 +718,11 @@ async fn leaf_search_single_split( limits: ctx.searcher_context.get_aggregation_limits(), tokenizers: ctx.doc_mapper.tokenizer_manager().tantivy_manager().clone(), }; - let mut collector = - make_collector_for_split(split_id.clone(), &search_request, agg_context_params)?; + let mut collector = make_collector_for_split( + SplitId::from(split_id.as_str()), + &search_request, + agg_context_params, + )?; let predicate_cache = if collector.requires_scoring() { // at the moment the predicate cache doesn't support scoring diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index ad0a30cc4e6..4cb9f37d6c5 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -19,7 +19,6 @@ use quickwit_config::CacheConfig; use quickwit_proto::search::{ CountHits, LeafResourceStats, LeafSearchResponse, SearchRequest, SplitIdAndFooterOffsets, }; -use quickwit_proto::types::SplitId; use quickwit_storage::{MemorySizedCache, OwnedBytes}; use tantivy::index::SegmentId; @@ -86,7 +85,7 @@ impl LeafSearchCache { #[derive(Debug, Hash, Clone, PartialEq, Eq)] struct CacheKey { /// The split this entry refers to - split_id: SplitId, + split_id: String, /// The request this matches. The timerange of the request was removed. request: SearchRequest, /// The effective time range of the request, that is, the intersection of the timerange @@ -196,7 +195,7 @@ impl RangeBounds for HalfOpenRange { } pub struct PredicateCacheImpl { - content: MemorySizedCache<(SplitId, String)>, + content: MemorySizedCache<(String, String)>, } impl PredicateCacheImpl { @@ -213,7 +212,7 @@ impl PredicateCacheImpl { impl quickwit_query::query_ast::PredicateCache for PredicateCacheImpl { fn get( &self, - split_id: SplitId, + split_id: String, query_ast_json: String, ) -> Option<(SegmentId, quickwit_query::query_ast::HitSet)> { let encoded_result = self.content.get(&(split_id, query_ast_json))?; @@ -226,7 +225,7 @@ impl quickwit_query::query_ast::PredicateCache for PredicateCacheImpl { fn put( &self, - split_id: SplitId, + split_id: String, query_ast_json: String, segment: SegmentId, hits: quickwit_query::query_ast::HitSet, diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index 8d2ad0f924d..cde7597b186 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -164,7 +164,7 @@ impl std::str::FromStr for GlobalDocAddress { fn extract_split_and_footer_offsets(split_metadata: &SplitMetadata) -> SplitIdAndFooterOffsets { SplitIdAndFooterOffsets { - split_id: split_metadata.split_id.clone(), + split_id: split_metadata.split_id.to_string(), split_footer_start: split_metadata.footer_offsets.start, split_footer_end: split_metadata.footer_offsets.end, timestamp_start: split_metadata diff --git a/quickwit/quickwit-search/src/list_fields/cache.rs b/quickwit/quickwit-search/src/list_fields/cache.rs index bf9611cb9fb..b29091fcfa3 100644 --- a/quickwit/quickwit-search/src/list_fields/cache.rs +++ b/quickwit/quickwit-search/src/list_fields/cache.rs @@ -13,11 +13,10 @@ // limitations under the License. use quickwit_config::CacheConfig; -use quickwit_proto::types::SplitId; use quickwit_storage::{MemorySizedCache, OwnedBytes}; pub struct ListFieldsCache { - cache: MemorySizedCache, + cache: MemorySizedCache, } impl ListFieldsCache { @@ -30,11 +29,11 @@ impl ListFieldsCache { } } - pub fn get(&self, split_id: &SplitId) -> Option { + pub fn get(&self, split_id: &String) -> Option { self.cache.get(split_id) } - pub fn put(&self, split_id: SplitId, serialized_split_fields: OwnedBytes) { + pub fn put(&self, split_id: String, serialized_split_fields: OwnedBytes) { self.cache.put(split_id, serialized_split_fields); } } diff --git a/quickwit/quickwit-search/src/list_fields/leaf.rs b/quickwit/quickwit-search/src/list_fields/leaf.rs index 09d02314d6d..c3b79742b80 100644 --- a/quickwit/quickwit-search/src/list_fields/leaf.rs +++ b/quickwit/quickwit-search/src/list_fields/leaf.rs @@ -21,7 +21,7 @@ use quickwit_common::shared_consts::{FIELD_PRESENCE_FIELD_NAME, SPLIT_FIELDS_FIL use quickwit_proto::search::{ ListFieldsEntry, ListFieldsMetadata, ListFieldsResponse, SplitIdAndFooterOffsets, }; -use quickwit_proto::types::{IndexId, SplitId}; +use quickwit_proto::types::IndexId; use quickwit_storage::{OwnedBytes, Storage}; use tracing::{Span, instrument}; @@ -154,7 +154,7 @@ async fn fetch_fields_metadata( async fn process_fields_metadata( serialized_entries: OwnedBytes, index_id: IndexId, - split_id: SplitId, + split_id: String, field_patterns: FieldPatterns, ) -> anyhow::Result> { let parent_span = Span::current(); diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 7624c7c8e30..f04bb437851 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -947,11 +947,11 @@ pub(crate) async fn fetch_docs_phase( // Build map of Split ID > index ID to add the index ID to the hits. // Used for ES compatibility. - let split_id_to_index_id_map: HashMap<&SplitId, &str> = split_metadatas + let split_id_to_index_id_map: HashMap = split_metadatas .iter() .map(|split_metadata| { ( - &split_metadata.split_id, + split_metadata.split_id().clone(), split_metadata.index_uid.index_id.as_str(), ) }) @@ -984,7 +984,7 @@ pub(crate) async fn fetch_docs_phase( fn build_hit_with_position( mut leaf_hit: LeafHit, - split_id_to_index_id_map: &HashMap<&SplitId, &str>, + split_id_to_index_id_map: &HashMap, hit_order: &HashMap<(String, u32, u32), usize>, sort_field_1_datetime_format_opt: &Option, sort_field_2_datetime_format_opt: &Option, @@ -1018,7 +1018,7 @@ fn build_hit_with_position( } let position = *hit_order.get(&key).expect("hit order must be present"); let index_id = split_id_to_index_id_map - .get(&partial_hit_ref.split_id) + .get(partial_hit_ref.split_id.as_str()) .map(|split_id| split_id.to_string()) .unwrap_or_default(); diff --git a/quickwit/quickwit-search/src/top_k_collector.rs b/quickwit/quickwit-search/src/top_k_collector.rs index f36eb6370e2..5ac84769c9f 100644 --- a/quickwit/quickwit-search/src/top_k_collector.rs +++ b/quickwit/quickwit-search/src/top_k_collector.rs @@ -597,7 +597,7 @@ impl< .map(|el| el.into_segment_partial_hit()) .map(|segment_partial_hit: SegmentPartialHit| { segment_partial_hit.into_partial_hit( - self.split_id.clone(), + &self.split_id, self.segment_ord, &self.hit_fetcher.first, &self.hit_fetcher.second, @@ -634,7 +634,7 @@ impl GenericQuickwitSegmentTopKCollector { let sort_key_mapper = HitSortingMapper { order1, order2 }; let precomp_search_after_order = match &search_after_option { Some(search_after) if !search_after.split_id.is_empty() => order1 - .compare(&split_id, &search_after.split_id) + .compare(split_id.as_str(), search_after.split_id.as_str()) .then_with(|| order1.compare(&segment_ord, &search_after.segment_ord)), // This value isn't actually used. _ => Ordering::Equal, @@ -807,7 +807,7 @@ impl QuickwitSegmentTopKCollector for GenericQuickwitSegmentTopKCollector { .into_iter() .map(|segment_partial_hit: SegmentPartialHit| { segment_partial_hit.into_partial_hit( - self.split_id.clone(), + &self.split_id, self.segment_ord, &self.score_extractor.first, &self.score_extractor.second, diff --git a/quickwit/quickwit-serve/src/index_api/split_resource.rs b/quickwit/quickwit-serve/src/index_api/split_resource.rs index a062186e551..241fd4455a2 100644 --- a/quickwit/quickwit-serve/src/index_api/split_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/split_resource.rs @@ -20,7 +20,7 @@ use quickwit_proto::metastore::{ IndexMetadataRequest, ListSplitsRequest, MarkSplitsForDeletionRequest, MetastoreResult, MetastoreService, MetastoreServiceClient, }; -use quickwit_proto::types::{IndexId, IndexUid}; +use quickwit_proto::types::{IndexId, IndexUid, SplitId}; use serde::{Deserialize, Serialize}; use tracing::info; use warp::{Filter, Rejection}; @@ -180,10 +180,10 @@ pub async fn mark_splits_for_deletion( .deserialize_index_metadata()? .index_uid; info!(index_id = %index_id, splits_ids = ?splits_for_deletion.split_ids, "mark-splits-for-deletion"); - let split_ids: Vec = splits_for_deletion + let split_ids: Vec = splits_for_deletion .split_ids .iter() - .map(|split_id| split_id.to_string()) + .map(|split_id| SplitId::from(split_id.as_str())) .collect(); let mark_splits_for_deletion_request = MarkSplitsForDeletionRequest::new(index_uid, split_ids.clone()); diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 7cc855b1822..c95f04b91cc 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -19,14 +19,14 @@ use std::ops::Range; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; +use quickwit_proto::types::SplitId; use tantivy::directory::OwnedBytes; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; -use ulid::Ulid; use crate::metrics::{FD_CACHE_METRICS, SingleCacheMetrics}; pub struct FileDescriptorCache { - fd_cache: Mutex>, + fd_cache: Mutex>, fd_semaphore: Arc, fd_cache_metrics: SingleCacheMetrics, } @@ -41,7 +41,7 @@ struct SplitFileInner { _fd_semaphore_guard: OwnedSemaphorePermit, } -fn get_split_file_path(root_path: &Path, split_id: Ulid) -> PathBuf { +fn get_split_file_path(root_path: &Path, split_id: &SplitId) -> PathBuf { let split_filename = quickwit_common::split_file(split_id); root_path.join(split_filename) } @@ -92,11 +92,11 @@ impl FileDescriptorCache { ) } - fn get_split_file(&self, split_id: Ulid) -> Option { - self.fd_cache.lock().unwrap().get(&split_id).cloned() + fn get_split_file(&self, split_id: &SplitId) -> Option { + self.fd_cache.lock().unwrap().get(split_id).cloned() } - fn put_split_file(&self, split_id: Ulid, split_file: SplitFile) { + fn put_split_file(&self, split_id: SplitId, split_file: SplitFile) { let mut fd_cache_lock = self.fd_cache.lock().unwrap(); fd_cache_lock.push(split_id, split_file); self.fd_cache_metrics @@ -106,7 +106,7 @@ impl FileDescriptorCache { /// Evicts the given list of split ids from the file descriptor cache. /// This method does NOT remove the actual files. - pub fn evict_split_files(&self, split_ids: &[Ulid]) { + pub fn evict_split_files(&self, split_ids: &[SplitId]) { let mut fd_cache_lock = self.fd_cache.lock().unwrap(); for split_id in split_ids { fd_cache_lock.pop(split_id); @@ -122,16 +122,16 @@ impl FileDescriptorCache { pub async fn get_or_open_split_file( &self, root_path: &Path, - split_id: Ulid, + split_id: SplitId, num_bytes: u64, ) -> std::io::Result { - if let Some(split_file) = self.get_split_file(split_id) { + if let Some(split_file) = self.get_split_file(&split_id) { self.fd_cache_metrics.hits_num_items.inc(); return Ok(split_file); } else { self.fd_cache_metrics.misses_num_items.inc(); } - let split_path = get_split_file_path(root_path, split_id); + let split_path = get_split_file_path(root_path, &split_id); let fd_semaphore_guard = Semaphore::acquire_owned(self.fd_semaphore.clone()) .await .expect("fd_semaphore acquire failed. please report"); @@ -177,12 +177,17 @@ impl SplitFile { mod tests { use std::num::NonZeroU32; + use quickwit_proto::types::SplitId; use tokio::fs; use ulid::Ulid; use super::FileDescriptorCache; use crate::metrics::CacheMetrics; + fn new_test_split_id() -> SplitId { + SplitId::from(Ulid::new().to_string()) + } + #[tokio::test] async fn test_fd_cache_big_cache() { let cache_metrics = CacheMetrics::for_component("fdtest").cache_metrics; @@ -192,28 +197,30 @@ mod tests { cache_metrics.clone(), ); let tempdir = tempfile::tempdir().unwrap(); - let split_ids: Vec = std::iter::repeat_with(Ulid::new).take(100).collect(); - for &split_id in &split_ids { + let split_ids: Vec = std::iter::repeat_with(new_test_split_id) + .take(100) + .collect(); + for split_id in &split_ids { let split_filepath = super::get_split_file_path(tempdir.path(), split_id); let content = split_id.to_string(); assert_eq!(content.len(), 26); fs::write(split_filepath, content.as_bytes()).await.unwrap(); } - for &split_id in &split_ids[0..10] { + for split_id in &split_ids[0..10] { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } - for &split_id in &split_ids[0..10] { + for split_id in &split_ids[0..10] { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } - for &split_id in &split_ids[0..10] { + for split_id in &split_ids[0..10] { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } @@ -234,17 +241,19 @@ mod tests { cache_metrics.clone(), ); let tempdir = tempfile::tempdir().unwrap(); - let split_ids: Vec = std::iter::repeat_with(Ulid::new).take(100).collect(); - for &split_id in &split_ids { + let split_ids: Vec = std::iter::repeat_with(new_test_split_id) + .take(100) + .collect(); + for split_id in &split_ids { let split_filepath = super::get_split_file_path(tempdir.path(), split_id); let content = split_id.to_string(); assert_eq!(content.len(), 26); fs::write(split_filepath, content.as_bytes()).await.unwrap(); } - for &split_id in &split_ids[0..100] { + for split_id in &split_ids[0..100] { for _ in 0..10 { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } @@ -258,8 +267,8 @@ mod tests { async fn test_split_file() { let fd_cache = FileDescriptorCache::with_fd_cache_capacity(NonZeroU32::new(20).unwrap()); let tempdir = tempfile::tempdir().unwrap(); - let split_id: Ulid = Ulid::new(); - let split_filepath = super::get_split_file_path(tempdir.path(), split_id); + let split_id: SplitId = new_test_split_id(); + let split_filepath = super::get_split_file_path(tempdir.path(), &split_id); let content = split_id.to_string(); assert_eq!(content.len(), 26); fs::write(split_filepath, content.as_bytes()).await.unwrap(); diff --git a/quickwit/quickwit-storage/src/split_cache/download_task.rs b/quickwit/quickwit-storage/src/split_cache/download_task.rs index 9af0390bfb5..d7336d76aba 100644 --- a/quickwit/quickwit-storage/src/split_cache/download_task.rs +++ b/quickwit/quickwit-storage/src/split_cache/download_task.rs @@ -29,11 +29,11 @@ async fn download_split( storage_resolver: StorageResolver, ) -> anyhow::Result { let CandidateSplit { - split_ulid, + split_id, storage_uri, living_token: _, } = candidate_split; - let split_filename = split_file(*split_ulid); + let split_filename = split_file(split_id); let target_filepath = root_path.join(&split_filename); let storage = storage_resolver.resolve(storage_uri).await?; let num_bytes = storage @@ -52,7 +52,7 @@ async fn perform_eviction_and_download( splits_to_delete, split_to_download, } = download_opportunity; - let split_ulid = split_to_download.split_ulid; + let split_id = split_to_download.split_id.clone(); // tokio io runs on `spawn_blocking` threads anyway. let split_cache_clone = split_cache.clone(); let _ = tokio::task::spawn_blocking(move || { @@ -62,7 +62,7 @@ async fn perform_eviction_and_download( let num_bytes = download_split(&split_cache.root_path, &split_to_download, storage_resolver).await?; let mut shared_split_table_lock = split_cache.split_table.lock().unwrap(); - shared_split_table_lock.register_as_downloaded(split_ulid, num_bytes); + shared_split_table_lock.register_as_downloaded(split_id, num_bytes); Ok(()) } diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs index 1c0de124d73..a02cdd3d484 100644 --- a/quickwit/quickwit-storage/src/split_cache/mod.rs +++ b/quickwit/quickwit-storage/src/split_cache/mod.rs @@ -28,9 +28,9 @@ use quickwit_common::split_file; use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; use quickwit_proto::search::ReportSplit; +use quickwit_proto::types::SplitId; use tantivy::directory::OwnedBytes; use tracing::{error, info, instrument, warn}; -use ulid::Ulid; use crate::file_descriptor_cache::{FileDescriptorCache, SplitFile}; use crate::split_cache::download_task::spawn_download_task; @@ -59,7 +59,7 @@ impl SplitCache { limits: SplitCacheLimits, ) -> io::Result> { std::fs::create_dir_all(&root_path)?; - let mut existing_splits: BTreeMap = Default::default(); + let mut existing_splits: BTreeMap = Default::default(); for dir_entry_res in std::fs::read_dir(&root_path)? { let dir_entry = dir_entry_res?; let path = dir_entry.path(); @@ -80,10 +80,10 @@ impl SplitCache { } } "split" => { - if let Some(split_ulid) = split_id_from_path(&path) { - existing_splits.insert(split_ulid, meta.len()); + if let Some(split_id) = split_id_from_path(&path) { + existing_splits.insert(split_id, meta.len()); } else { - warn!(path=%path.display(), ".split file with invalid ulid in split cache directory, ignoring"); + warn!(path=%path.display(), ".split file with invalid name in split cache directory, ignoring"); } } _ => { @@ -120,7 +120,7 @@ impl SplitCache { /// Remove splits from both the fd cache and the split cache. /// This method does NOT update the split table. - pub(crate) fn evict(&self, splits_to_evict: &[Ulid]) { + pub(crate) fn evict(&self, splits_to_evict: &[SplitId]) { self.fd_cache.evict_split_files(splits_to_evict); delete_evicted_splits(&self.root_path, splits_to_evict); } @@ -138,28 +138,26 @@ impl SplitCache { pub fn report_splits(&self, report_splits: Vec) { let mut split_table = self.split_table.lock().unwrap(); for report_split in report_splits { - let Ok(split_ulid) = Ulid::from_str(&report_split.split_id) else { - error!(split_id=%report_split.split_id, "received invalid split ulid: ignoring"); - continue; - }; + // Split ids are opaque: any non-empty string is accepted (no ULID validation). + let split_id = SplitId::from(report_split.split_id); let Ok(storage_uri) = Uri::from_str(&report_split.storage_uri) else { error!(storage_uri=%report_split.storage_uri, "received invalid storage uri: ignoring"); continue; }; - split_table.report(split_ulid, storage_uri); + split_table.report(split_id, storage_uri); } } // Returns a split guard object. As long as it is not dropped, the // split won't be evinced from the cache. - async fn get_split_file(&self, split_id: Ulid, storage_uri: &Uri) -> Option { + async fn get_split_file(&self, split_id: SplitId, storage_uri: &Uri) -> Option { // We touch before even checking the fd cache in order to update the file's last access time // for the file cache. let num_bytes_opt: Option = self .split_table .lock() .unwrap() - .touch(split_id, storage_uri); + .touch(split_id.clone(), storage_uri); let num_bytes = num_bytes_opt?; self.fd_cache @@ -175,8 +173,8 @@ impl SplitCache { /// At this point, the disk space is already accounted as released, /// so the error could result in a "disk space leak". #[instrument] -fn delete_evicted_splits(root_path: &Path, splits_to_delete: &[Ulid]) { - for &split_to_delete in splits_to_delete { +fn delete_evicted_splits(root_path: &Path, splits_to_delete: &[SplitId]) { + for split_to_delete in splits_to_delete { let split_file_path = root_path.join(split_file(split_to_delete)); if let Err(_io_err) = std::fs::remove_file(&split_file_path) { // This is an pretty critical error. The split size is not tracked anymore at this @@ -186,10 +184,12 @@ fn delete_evicted_splits(root_path: &Path, splits_to_delete: &[Ulid]) { } } -fn split_id_from_path(split_path: &Path) -> Option { +fn split_id_from_path(split_path: &Path) -> Option { let split_filename = split_path.file_name()?.to_str()?; let split_id_str = split_filename.strip_suffix(".split")?; - Ulid::from_str(split_id_str).ok() + // The split id is opaque: we take the whole file stem (a random prefix, if any, is part of + // the id) rather than parsing it as a ULID. + Some(SplitId::from(split_id_str)) } struct SplitCacheBackingStorage { diff --git a/quickwit/quickwit-storage/src/split_cache/split_table.rs b/quickwit/quickwit-storage/src/split_cache/split_table.rs index ee24a393e84..4e191bc47fa 100644 --- a/quickwit/quickwit-storage/src/split_cache/split_table.rs +++ b/quickwit/quickwit-storage/src/split_cache/split_table.rs @@ -19,7 +19,7 @@ use std::time::{Duration, Instant}; use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; -use ulid::Ulid; +use quickwit_proto::types::SplitId; use crate::metrics::SEARCHER_SPLIT_CACHE; @@ -31,10 +31,10 @@ const MAX_NUM_CANDIDATES: usize = 1_000; /// Splits that are freshly reported get a last access time of `now - NEWLY_REPORT_SPLIT_LAST_TIME`. const NEWLY_REPORTED_SPLIT_LAST_TIME: Duration = Duration::from_secs(60 * 10); // 10mn -#[derive(Clone, Copy)] +#[derive(Clone)] pub(crate) struct SplitKey { pub last_accessed: LastAccessDate, - pub split_ulid: Ulid, + pub split_id: SplitId, } impl PartialOrd for SplitKey { @@ -45,13 +45,13 @@ impl PartialOrd for SplitKey { impl Ord for SplitKey { fn cmp(&self, other: &Self) -> Ordering { - (self.last_accessed, &self.split_ulid).cmp(&(other.last_accessed, &other.split_ulid)) + (self.last_accessed, &self.split_id).cmp(&(other.last_accessed, &other.split_id)) } } impl PartialEq for SplitKey { fn eq(&self, other: &Self) -> bool { - (self.last_accessed, &self.split_ulid) == (other.last_accessed, &other.split_ulid) + (self.last_accessed, &self.split_id) == (other.last_accessed, &other.split_id) } } @@ -103,7 +103,7 @@ pub struct SplitTable { on_disk_splits: BTreeSet, downloading_splits: BTreeSet, candidate_splits: BTreeSet, - split_to_status: HashMap, + split_to_status: HashMap, origin_time: Instant, limits: SplitCacheLimits, on_disk_bytes: u64, @@ -112,7 +112,7 @@ pub struct SplitTable { impl SplitTable { pub(crate) fn with_limits_and_existing_splits( limits: SplitCacheLimits, - existing_filepaths: BTreeMap, + existing_filepaths: BTreeMap, ) -> SplitTable { let origin_time = Instant::now() - NEWLY_REPORTED_SPLIT_LAST_TIME; let mut split_table = SplitTable { @@ -128,12 +128,12 @@ impl SplitTable { split_table } - fn acknowledge_on_disk_splits(&mut self, existing_filepaths: BTreeMap) { - for (split_ulid, num_bytes) in existing_filepaths { + fn acknowledge_on_disk_splits(&mut self, existing_filepaths: BTreeMap) { + for (split_id, num_bytes) in existing_filepaths { let split_info = SplitInfo { split_key: SplitKey { last_accessed: 0, - split_ulid, + split_id, }, status: Status::OnDisk { num_bytes }, }; @@ -147,8 +147,8 @@ fn compute_timestamp(start: Instant) -> LastAccessDate { } impl SplitTable { - fn remove(&mut self, split_ulid: Ulid) -> Option { - let split_info = self.split_to_status.remove(&split_ulid)?; + fn remove(&mut self, split_id: &SplitId) -> Option { + let split_info = self.split_to_status.remove(split_id)?; let split_queue: &mut BTreeSet = match split_info.status { Status::Candidate { .. } => &mut self.candidate_splits, Status::Downloading { .. } => &mut self.downloading_splits, @@ -185,15 +185,15 @@ impl SplitTable { } let mut splits_to_remove = Vec::new(); for split in &self.downloading_splits { - if let Some(split_info) = self.split_to_status.get(&split.split_ulid) + if let Some(split_info) = self.split_to_status.get(&split.split_id) && let Status::Downloading { alive_token } = &split_info.status && alive_token.strong_count() == 0 { - splits_to_remove.push(split.split_ulid); + splits_to_remove.push(split.split_id.clone()); } } for split in splits_to_remove { - self.remove(split); + self.remove(&split); } } @@ -207,9 +207,11 @@ impl SplitTable { // we truncate *before* inserting, otherwise way may end up in an inconsistent // state which make truncate_candidate_list loop indefinitely self.truncate_candidate_list(); - self.candidate_splits.insert(split_info.split_key) + self.candidate_splits.insert(split_info.split_key.clone()) + } + Status::Downloading { .. } => { + self.downloading_splits.insert(split_info.split_key.clone()) } - Status::Downloading { .. } => self.downloading_splits.insert(split_info.split_key), Status::OnDisk { num_bytes } => { self.on_disk_bytes += num_bytes; SEARCHER_SPLIT_CACHE.cache_metrics.in_cache_count.inc(); @@ -217,18 +219,18 @@ impl SplitTable { .cache_metrics .in_cache_num_bytes .inc_by(num_bytes as f64); - self.on_disk_splits.insert(split_info.split_key) + self.on_disk_splits.insert(split_info.split_key.clone()) } }; // this is fine to do in an inconsistent state, the last entry will just be ignored while // gcing self.gc_downloading_splits_if_necessary(); assert!(was_not_in_queue); - let split_ulid_was_absent = self + let split_id_was_absent = self .split_to_status - .insert(split_info.split_key.split_ulid, split_info) + .insert(split_info.split_key.split_id.clone(), split_info) .is_none(); - assert!(split_ulid_was_absent); + assert!(split_id_was_absent); } /// Touch the file, updating its last access time, possibly extending its life in the @@ -237,21 +239,21 @@ impl SplitTable { /// If the file is already on the disk cache, return `Some(num_bytes)`. /// If the file is not in cache, return `None`, and register the file in the candidate for /// download list. - pub fn touch(&mut self, split_ulid: Ulid, storage_uri: &Uri) -> Option { + pub fn touch(&mut self, split_id: SplitId, storage_uri: &Uri) -> Option { let timestamp = compute_timestamp(self.origin_time); - let status = self.mutate_split(split_ulid, |old_split_info| { + let status = self.mutate_split(split_id, |old_split_info, split_id| { if let Some(mut split_info) = old_split_info { split_info.split_key.last_accessed = timestamp; split_info } else { SplitInfo { split_key: SplitKey { - split_ulid, + split_id: split_id.clone(), last_accessed: timestamp, }, status: Status::Candidate(CandidateSplit { storage_uri: storage_uri.clone(), - split_ulid, + split_id, living_token: Arc::new(()), }), } @@ -264,25 +266,27 @@ impl SplitTable { } } - /// Mutates a split ulid. + /// Mutates the split with the given id. /// /// By design this function maintains the invariant. - /// It removes the split with the given ulid, modifies, and re + /// It removes the split with the given id, modifies it, and re-inserts it. + /// The owned `split_id` is handed to `mutate_fn` so it can be reused when building a fresh + /// `SplitInfo` (avoiding an extra allocation). fn mutate_split( &mut self, - split_ulid: Ulid, - mutate_fn: impl FnOnce(Option) -> SplitInfo, + split_id: SplitId, + mutate_fn: impl FnOnce(Option, SplitId) -> SplitInfo, ) -> Status { - let split_info_opt = self.remove(split_ulid); - let new_split: SplitInfo = mutate_fn(split_info_opt); + let split_info_opt = self.remove(&split_id); + let new_split: SplitInfo = mutate_fn(split_info_opt, split_id); let new_status = new_split.status.clone(); self.insert(new_split); new_status } - fn change_split_status(&mut self, split_ulid: Ulid, status: Status) { + fn change_split_status(&mut self, split_id: SplitId, status: Status) { let start_time = self.origin_time; - self.mutate_split(split_ulid, move |split_info_opt| { + self.mutate_split(split_id, move |split_info_opt, split_id| { if let Some(mut split_info) = split_info_opt { split_info.status = status; split_info @@ -290,7 +294,7 @@ impl SplitTable { SplitInfo { split_key: SplitKey { last_accessed: compute_timestamp(start_time), - split_ulid, + split_id, }, status, } @@ -298,9 +302,9 @@ impl SplitTable { }); } - pub(crate) fn report(&mut self, split_ulid: Ulid, storage_uri: Uri) { + pub(crate) fn report(&mut self, split_id: SplitId, storage_uri: Uri) { let origin_time = self.origin_time; - self.mutate_split(split_ulid, move |split_info_opt| { + self.mutate_split(split_id, move |split_info_opt, split_id| { if let Some(split_info) = split_info_opt { return split_info; } @@ -308,11 +312,11 @@ impl SplitTable { split_key: SplitKey { last_accessed: compute_timestamp(origin_time) .saturating_sub(NEWLY_REPORTED_SPLIT_LAST_TIME.as_micros() as u64), - split_ulid, + split_id: split_id.clone(), }, status: Status::Candidate(CandidateSplit { storage_uri, - split_ulid, + split_id, living_token: Arc::new(()), }), } @@ -323,13 +327,13 @@ impl SplitTable { fn truncate_candidate_list(&mut self) { // we remove one more to make place for one candidate about to be inserted while self.candidate_splits.len() >= MAX_NUM_CANDIDATES { - let worst_candidate = self.candidate_splits.first().unwrap().split_ulid; - self.remove(worst_candidate); + let worst_candidate = self.candidate_splits.first().unwrap().split_id.clone(); + self.remove(&worst_candidate); } } - pub(crate) fn register_as_downloaded(&mut self, split_ulid: Ulid, num_bytes: u64) { - self.change_split_status(split_ulid, Status::OnDisk { num_bytes }); + pub(crate) fn register_as_downloaded(&mut self, split_id: SplitId, num_bytes: u64) { + self.change_split_status(split_id, Status::OnDisk { num_bytes }); } /// Change the state of the given split from candidate to downloading state, @@ -337,8 +341,8 @@ impl SplitTable { /// /// This function does NOT trigger the download itself. It is up to /// the caller to actually initiate the download. - pub(crate) fn start_download(&mut self, split_ulid: Ulid) -> Option { - let split_info = self.remove(split_ulid)?; + pub(crate) fn start_download(&mut self, split_id: &SplitId) -> Option { + let split_info = self.remove(split_id)?; let Status::Candidate(candidate_split) = split_info.status else { self.insert(split_info); return None; @@ -352,7 +356,7 @@ impl SplitTable { } fn best_candidate(&self) -> Option { - self.candidate_splits.last().copied() + self.candidate_splits.last().cloned() } fn is_out_of_limits(&self) -> bool { @@ -380,15 +384,18 @@ impl SplitTable { pub(crate) fn make_room_for_split_if_necessary( &mut self, last_access_date: LastAccessDate, - ) -> Result, NoRoomAvailable> { + ) -> Result, NoRoomAvailable> { let mut split_infos = Vec::new(); while self.is_out_of_limits() { - if let Some(first_split) = self.on_disk_splits.first() { - if first_split.last_accessed > last_access_date { + // We clone the oldest split's key so we can drop the immutable borrow on + // `on_disk_splits` before calling `remove`, which needs `&mut self`. + let oldest_split_key_opt: Option = self.on_disk_splits.first().cloned(); + if let Some(oldest_split_key) = oldest_split_key_opt { + if oldest_split_key.last_accessed > last_access_date { // This is not worth doing the eviction. break; } - split_infos.extend(self.remove(first_split.split_ulid)); + split_infos.extend(self.remove(&oldest_split_key.split_id)); } else { break; } @@ -403,18 +410,18 @@ impl SplitTable { } else { Ok(split_infos .into_iter() - .map(|split_info| split_info.split_key.split_ulid) + .map(|split_info| split_info.split_key.split_id) .collect()) } } pub(crate) fn find_download_opportunity(&mut self) -> Option { let best_candidate_split_key = self.best_candidate()?; - let splits_to_delete: Vec = self + let splits_to_delete: Vec = self .make_room_for_split_if_necessary(best_candidate_split_key.last_accessed) .ok()?; let split_to_download: CandidateSplit = - self.start_download(best_candidate_split_key.split_ulid)?; + self.start_download(&best_candidate_split_key.split_id)?; Some(DownloadOpportunity { splits_to_delete, split_to_download, @@ -433,14 +440,14 @@ pub(crate) struct NoRoomAvailable; #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct CandidateSplit { pub storage_uri: Uri, - pub split_ulid: Ulid, + pub split_id: SplitId, pub living_token: Arc<()>, } pub(crate) struct DownloadOpportunity { // At this point, the split have already been removed from the split table. // The file however need to be deleted. - pub splits_to_delete: Vec, + pub splits_to_delete: Vec, pub split_to_download: CandidateSplit, } @@ -452,6 +459,7 @@ mod tests { use bytesize::ByteSize; use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; + use quickwit_proto::types::SplitId; use ulid::Ulid; use crate::split_cache::split_table::{ @@ -460,11 +468,19 @@ mod tests { const TEST_STORAGE_URI: &str = "s3://test"; - fn sorted_split_ulids(num_splits: usize) -> Vec { - let mut split_ulids: Vec = - std::iter::repeat_with(Ulid::new).take(num_splits).collect(); - split_ulids.sort(); - split_ulids + /// Generates split ids backed by ULIDs. We keep using ULIDs in tests because the cache's + /// ordering relies on lexicographic ordering of the ids, and ULID strings happen to be + /// time-sortable — which makes for readable, deterministic tests. + fn new_test_split_id() -> SplitId { + SplitId::from(Ulid::new().to_string()) + } + + fn sorted_split_ids(num_splits: usize) -> Vec { + let mut split_ids: Vec = std::iter::repeat_with(new_test_split_id) + .take(num_splits) + .collect(); + split_ids.sort(); + split_ids } #[test] @@ -478,13 +494,13 @@ mod tests { }, Default::default(), ); - let ulids = sorted_split_ulids(2); - let ulid1 = ulids[0]; - let ulid2 = ulids[1]; - split_table.report(ulid1, Uri::for_test(TEST_STORAGE_URI)); - split_table.report(ulid2, Uri::for_test(TEST_STORAGE_URI)); + let split_ids = sorted_split_ids(2); + let split_id1 = split_ids[0].clone(); + let split_id2 = split_ids[1].clone(); + split_table.report(split_id1, Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id2.clone(), Uri::for_test(TEST_STORAGE_URI)); let candidate = split_table.best_candidate().unwrap(); - assert_eq!(candidate.split_ulid, ulid2); + assert_eq!(candidate.split_id, split_id2); } #[test] @@ -498,15 +514,15 @@ mod tests { }, Default::default(), ); - let ulids = sorted_split_ulids(2); - let ulid1 = ulids[0]; - let ulid2 = ulids[1]; - split_table.report(ulid1, Uri::for_test(TEST_STORAGE_URI)); - split_table.report(ulid2, Uri::for_test(TEST_STORAGE_URI)); - let num_bytes_opt = split_table.touch(ulid1, &Uri::for_test("s3://test1/")); + let split_ids = sorted_split_ids(2); + let split_id1 = split_ids[0].clone(); + let split_id2 = split_ids[1].clone(); + split_table.report(split_id1.clone(), Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id2, Uri::for_test(TEST_STORAGE_URI)); + let num_bytes_opt = split_table.touch(split_id1.clone(), &Uri::for_test("s3://test1/")); assert!(num_bytes_opt.is_none()); let candidate = split_table.best_candidate().unwrap(); - assert_eq!(candidate.split_ulid, ulid1); + assert_eq!(candidate.split_id, split_id1); } #[test] @@ -520,25 +536,25 @@ mod tests { }, Default::default(), ); - let ulid1 = Ulid::new(); - split_table.report(ulid1, Uri::for_test(TEST_STORAGE_URI)); + let split_id1 = new_test_split_id(); + split_table.report(split_id1.clone(), Uri::for_test(TEST_STORAGE_URI)); assert_eq!(split_table.num_bytes(), 0); - let download = split_table.start_download(ulid1); + let download = split_table.start_download(&split_id1); assert!(download.is_some()); - assert!(split_table.start_download(ulid1).is_none()); - split_table.register_as_downloaded(ulid1, 10_000_000); + assert!(split_table.start_download(&split_id1).is_none()); + split_table.register_as_downloaded(split_id1.clone(), 10_000_000); assert_eq!(split_table.num_bytes(), 10_000_000); assert_eq!( - split_table.touch(ulid1, &Uri::for_test(TEST_STORAGE_URI)), + split_table.touch(split_id1, &Uri::for_test(TEST_STORAGE_URI)), Some(10_000_000) ); - let ulid2 = Ulid::new(); - split_table.report(ulid2, Uri::for_test("s3://test`/")); - let download = split_table.start_download(ulid2); + let split_id2 = new_test_split_id(); + split_table.report(split_id2.clone(), Uri::for_test("s3://test`/")); + let download = split_table.start_download(&split_id2); assert!(download.is_some()); - assert!(split_table.start_download(ulid2).is_none()); + assert!(split_table.start_download(&split_id2).is_none()); assert_eq!(split_table.num_bytes(), 10_000_000); - split_table.register_as_downloaded(ulid2, 3_000_000); + split_table.register_as_downloaded(split_id2, 3_000_000); assert_eq!(split_table.num_bytes(), 13_000_000); } @@ -553,31 +569,34 @@ mod tests { }, Default::default(), ); - let mut split_ulids: Vec = std::iter::repeat_with(Ulid::new).take(6).collect(); - split_ulids.sort(); + let split_ids = sorted_split_ids(6); let splits = [ - (split_ulids[0], 10_000), - (split_ulids[1], 20_000), - (split_ulids[2], 300_000), - (split_ulids[3], 400_000), - (split_ulids[4], 100_000), - (split_ulids[5], 300_000), + (split_ids[0].clone(), 10_000), + (split_ids[1].clone(), 20_000), + (split_ids[2].clone(), 300_000), + (split_ids[3].clone(), 400_000), + (split_ids[4].clone(), 100_000), + (split_ids[5].clone(), 300_000), ]; - for (split_ulid, num_bytes) in splits { - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); - split_table.register_as_downloaded(split_ulid, num_bytes); + for (split_id, num_bytes) in &splits { + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); + split_table.register_as_downloaded(split_id.clone(), *num_bytes); } - let new_ulid = Ulid::new(); - split_table.report(new_ulid, Uri::for_test(TEST_STORAGE_URI)); + let new_split_id = new_test_split_id(); + split_table.report(new_split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); let DownloadOpportunity { splits_to_delete, split_to_download, } = split_table.find_download_opportunity().unwrap(); assert_eq!( &splits_to_delete[..], - &[splits[0].0, splits[1].0, splits[2].0][..] + &[ + splits[0].0.clone(), + splits[1].0.clone(), + splits[2].0.clone() + ][..] ); - assert_eq!(split_to_download.split_ulid, new_ulid); + assert_eq!(split_to_download.split_id, new_split_id); } #[test] @@ -591,28 +610,30 @@ mod tests { }, Default::default(), ); - let mut split_ulids: Vec = std::iter::repeat_with(Ulid::new).take(6).collect(); - split_ulids.sort(); + let split_ids = sorted_split_ids(6); let splits = [ - (split_ulids[0], 10_000), - (split_ulids[1], 20_000), - (split_ulids[2], 300_000), - (split_ulids[3], 400_000), - (split_ulids[4], 100_000), - (split_ulids[5], 300_000), + (split_ids[0].clone(), 10_000), + (split_ids[1].clone(), 20_000), + (split_ids[2].clone(), 300_000), + (split_ids[3].clone(), 400_000), + (split_ids[4].clone(), 100_000), + (split_ids[5].clone(), 300_000), ]; - for (split_ulid, num_bytes) in splits { - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); - split_table.register_as_downloaded(split_ulid, num_bytes); + for (split_id, num_bytes) in &splits { + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); + split_table.register_as_downloaded(split_id.clone(), *num_bytes); } - let new_ulid = Ulid::new(); - split_table.report(new_ulid, Uri::for_test(TEST_STORAGE_URI)); + let new_split_id = new_test_split_id(); + split_table.report(new_split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); let DownloadOpportunity { splits_to_delete, split_to_download, } = split_table.find_download_opportunity().unwrap(); - assert_eq!(&splits_to_delete[..], &[splits[0].0, splits[1].0]); - assert_eq!(split_to_download.split_ulid, new_ulid); + assert_eq!( + &splits_to_delete[..], + &[splits[0].0.clone(), splits[1].0.clone()] + ); + assert_eq!(split_to_download.split_id, new_split_id); } #[test] @@ -626,23 +647,23 @@ mod tests { }, Default::default(), ); - let split_ulid = Ulid::new(); - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); - let candidate = split_table.start_download(split_ulid).unwrap(); + let split_id = new_test_split_id(); + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); + let candidate = split_table.start_download(&split_id).unwrap(); // This report should be cancelled as we have a download currently running. - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); - assert!(split_table.start_download(split_ulid).is_none()); + assert!(split_table.start_download(&split_id).is_none()); std::mem::drop(candidate); // Still not possible to start a download. - assert!(split_table.start_download(split_ulid).is_none()); + assert!(split_table.start_download(&split_id).is_none()); // This report should be considered as our candidate (and its alive token has been dropped) - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); - let candidate2 = split_table.start_download(split_ulid).unwrap(); - assert_eq!(candidate2.split_ulid, split_ulid); + let candidate2 = split_table.start_download(&split_id).unwrap(); + assert_eq!(candidate2.split_id, split_id); } #[test] @@ -657,8 +678,8 @@ mod tests { Default::default(), ); for i in 1..2_000 { - let split_ulid = Ulid::new(); - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); + let split_id = new_test_split_id(); + split_table.report(split_id, Uri::for_test(TEST_STORAGE_URI)); assert_eq!( split_table.candidate_splits.len(), i.min(super::MAX_NUM_CANDIDATES) @@ -679,16 +700,18 @@ mod tests { Default::default(), ); for i in (0u128..=super::MAX_NUM_CANDIDATES as u128).rev() { - let split_ulid = Ulid(i); + // ULID strings preserve the ordering of the underlying u128, so the lexicographic + // ordering of these split ids matches the numeric ordering of `i`. + let split_id = SplitId::from(Ulid(i).to_string()); let candidate_split = CandidateSplit { storage_uri: Uri::for_test(TEST_STORAGE_URI), - split_ulid, + split_id: split_id.clone(), living_token: Arc::new(()), }; let split_info = SplitInfo { split_key: SplitKey { last_accessed: 0u64, - split_ulid, + split_id, }, status: Status::Candidate(candidate_split), };