From 2d30d2adbceab5f0836d2bc69bae4023439de05d Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 30 Jun 2026 09:45:42 +0200 Subject: [PATCH 1/4] Introduce SplitId(Arc) newtype, confine Ulid to split id generation Replace the `pub type SplitId = String` alias with an opaque `SplitId(Arc)` newtype so that no code outside of split id generation depends on the internal structure of a split id. This is the first step towards adding a random prefix to split ids (to spread S3 read load across prefixes), keeping that future change localized to the generator. The newtype is serde-transparent, so on-disk and wire representations are unchanged (backward compatible). It is adopted by SplitMetadata, the searcher split cache (where Ulid was used as an opaque key) and the indexing split store registry. Wire/proto types, search-internal hit tracking, CLI args and id-list helpers feeding proto requests stay String. After this change ulid::Ulid is referenced for split ids in only two places: - new_split_id(), the generation site - a temporary shim in the indexing split cache that recovers a split's creation time from the trailing ULID of its id, tolerant of a future random prefix. This store is slated for removal with the compactor service. --- quickwit/quickwit-cli/src/index.rs | 2 +- quickwit/quickwit-cli/src/split.rs | 7 +- quickwit/quickwit-cli/src/tool.rs | 5 +- quickwit/quickwit-cli/tests/cli.rs | 2 +- .../quickwit-doc-mapper/src/query_builder.rs | 3 +- .../src/garbage_collection.rs | 18 +- .../quickwit-index-management/src/index.rs | 6 +- .../quickwit-indexing/src/actors/indexer.rs | 4 +- .../src/actors/indexing_service.rs | 2 +- .../src/actors/log_publisher_impl.rs | 14 +- .../src/actors/merge_executor.rs | 8 +- .../src/actors/merge_planner.rs | 18 +- .../src/actors/merge_split_downloader.rs | 8 +- .../quickwit-indexing/src/actors/packager.rs | 10 +- .../parquet_pipeline/parquet_indexer.rs | 4 +- .../parquet_merge_executor.rs | 6 +- .../parquet_pipeline/parquet_splits_update.rs | 4 +- .../actors/parquet_pipeline/publisher_impl.rs | 6 +- .../quickwit-indexing/src/actors/uploader.rs | 32 +- quickwit/quickwit-indexing/src/lib.rs | 4 - .../merge_policy/const_write_amplification.rs | 16 +- .../quickwit-indexing/src/merge_policy/mod.rs | 20 +- .../merge_policy/stable_log_merge_policy.rs | 2 +- .../src/models/indexed_split.rs | 13 +- .../src/models/packaged_split.rs | 4 +- .../src/models/publisher_message.rs | 4 +- .../src/models/split_attrs.rs | 2 +- quickwit/quickwit-indexing/src/source/mod.rs | 2 +- .../src/split_store/indexing_split_cache.rs | 220 +++++++++----- .../src/split_store/indexing_split_store.rs | 37 +-- quickwit/quickwit-indexing/src/test_utils.rs | 2 +- .../src/actors/garbage_collector.rs | 2 +- .../src/actors/retention_policy_executor.rs | 2 +- .../src/retention_policy_execution.rs | 8 +- .../file_backed/file_backed_index/mod.rs | 12 +- .../src/metastore/file_backed/mod.rs | 8 +- .../src/metastore/postgres/metastore.rs | 4 +- .../src/metastore/postgres/model.rs | 7 +- .../src/metastore/postgres/utils.rs | 5 +- .../quickwit-metastore/src/split_metadata.rs | 8 +- .../quickwit-metastore/src/tests/index.rs | 10 +- .../src/tests/list_splits.rs | 105 +++---- quickwit/quickwit-metastore/src/tests/mod.rs | 2 +- .../quickwit-metastore/src/tests/source.rs | 2 +- .../quickwit-metastore/src/tests/split.rs | 48 +-- quickwit/quickwit-proto/src/lib.rs | 2 +- quickwit/quickwit-proto/src/metastore/mod.rs | 11 +- quickwit/quickwit-proto/src/types/mod.rs | 4 +- quickwit/quickwit-proto/src/types/split_id.rs | 168 +++++++++++ .../src/query_ast/cache_node.rs | 11 +- quickwit/quickwit-search/src/collector.rs | 19 +- quickwit/quickwit-search/src/leaf.rs | 8 +- quickwit/quickwit-search/src/leaf_cache.rs | 9 +- quickwit/quickwit-search/src/lib.rs | 2 +- .../quickwit-search/src/list_fields/cache.rs | 7 +- .../quickwit-search/src/list_fields/leaf.rs | 4 +- quickwit/quickwit-search/src/root.rs | 8 +- .../quickwit-search/src/top_k_collector.rs | 6 +- .../src/index_api/split_resource.rs | 6 +- .../src/file_descriptor_cache.rs | 57 ++-- .../src/split_cache/download_task.rs | 8 +- .../quickwit-storage/src/split_cache/mod.rs | 34 +-- .../src/split_cache/split_table.rs | 277 ++++++++++-------- 63 files changed, 813 insertions(+), 536 deletions(-) create mode 100644 quickwit/quickwit-proto/src/types/split_id.rs diff --git a/quickwit/quickwit-cli/src/index.rs b/quickwit/quickwit-cli/src/index.rs index 9d696065ced..9b66007416c 100644 --- a/quickwit/quickwit-cli/src/index.rs +++ b/quickwit/quickwit-cli/src/index.rs @@ -1186,7 +1186,7 @@ mod test { time_range: RangeInclusive, size: u64, ) -> SplitMetadata { - let mut split_metadata = SplitMetadata::for_test(split_id.to_string()); + let mut split_metadata = SplitMetadata::for_test(split_id.into()); split_metadata.num_docs = num_docs; split_metadata.time_range = Some(time_range); split_metadata.footer_offsets = (size - 10)..size; diff --git a/quickwit/quickwit-cli/src/split.rs b/quickwit/quickwit-cli/src/split.rs index ebdebbb7091..f3d13d6c6ab 100644 --- a/quickwit/quickwit-cli/src/split.rs +++ b/quickwit/quickwit-cli/src/split.rs @@ -261,9 +261,10 @@ impl SplitCliCommand { let index_id = matches .remove_one::("index") .expect("`index` should be a required arg."); - let split_id = matches + let split_id: SplitId = matches .remove_one::("split") - .expect("`split` should be a required arg."); + .expect("`split` should be a required arg.") + .into(); let client_args = ClientArgs::parse(&mut matches)?; let verbose = matches.get_flag("verbose"); @@ -345,7 +346,7 @@ async fn describe_split_cli(args: DescribeSplitArgs) -> anyhow::Result<()> { .await .expect("Failed to fetch splits.") .into_iter() - .find(|split| split.split_id() == args.split_id) + .find(|split| split.split_id() == args.split_id.as_str()) .with_context(|| { format!( "could not find split metadata in metastore {}", diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index 8c212080ec4..d3eba3eba2a 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -366,9 +366,10 @@ impl ToolCliCommand { let index_id = matches .remove_one::("index") .expect("`index` should be a required arg."); - let split_id = matches + let split_id: SplitId = matches .remove_one::("split") - .expect("`split` should be a required arg."); + .expect("`split` should be a required arg.") + .into(); let config_uri = matches .remove_one::("config") .map(|uri_str| Uri::from_str(&uri_str)) diff --git a/quickwit/quickwit-cli/tests/cli.rs b/quickwit/quickwit-cli/tests/cli.rs index 5f695ec78d9..2fc61548a2f 100644 --- a/quickwit/quickwit-cli/tests/cli.rs +++ b/quickwit/quickwit-cli/tests/cli.rs @@ -865,7 +865,7 @@ async fn test_garbage_collect_index_cli() { index_uid: Some(index_uid.clone()), split_ids: splits_metadata .into_iter() - .map(|split_metadata| split_metadata.split_id) + .map(|split_metadata| split_metadata.split_id.to_string()) .collect(), }) .await diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index 8dee1f6335a..27b4a0cffb6 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -17,7 +17,6 @@ use std::convert::Infallible; use std::ops::Bound; use std::sync::Arc; -use quickwit_proto::types::SplitId; use quickwit_query::query_ast::{ BuildTantivyAstContext, FieldPresenceQuery, FullTextQuery, PhrasePrefixQuery, QueryAst, QueryAstTransformer, QueryAstVisitor, RangeQuery, RegexQuery, TermSetQuery, WildcardQuery, @@ -158,7 +157,7 @@ impl<'a, 'f> QueryAstVisitor<'a> for ExistsQueryFastFields<'f> { pub(crate) fn build_query( query_ast: QueryAst, context: &BuildTantivyAstContext, - cache_context: Option<(Arc, SplitId)>, + cache_context: Option<(Arc, String)>, ) -> Result<(Box, WarmupInfo), QueryParserError> { let mut fast_fields: HashSet = HashSet::new(); diff --git a/quickwit/quickwit-index-management/src/garbage_collection.rs b/quickwit/quickwit-index-management/src/garbage_collection.rs index 5d85e4c70bb..8739220f866 100644 --- a/quickwit/quickwit-index-management/src/garbage_collection.rs +++ b/quickwit/quickwit-index-management/src/garbage_collection.rs @@ -474,7 +474,7 @@ pub async fn delete_splits_from_storage_and_metastore( let splits_to_delete: Vec = split_infos .values() .map(|info| SplitToDelete { - split_id: info.split_id.clone(), + split_id: info.split_id.to_string(), path: info.file_name.clone(), size_bytes: info.file_size_bytes.as_u64(), }) @@ -508,7 +508,7 @@ pub async fn delete_splits_from_storage_and_metastore( } if !successes.is_empty() { - let split_ids: Vec = successes + let split_ids: Vec = successes .iter() .map(|split_info| split_info.split_id.to_string()) .collect(); @@ -597,7 +597,7 @@ mod tests { let split_id = "test-run-gc--split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -697,7 +697,7 @@ mod tests { let split_id = "test-run-gc--split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -827,7 +827,7 @@ mod tests { let split_id = "test-delete-splits-happy--split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: IndexUid::new_with_random_ulid(index_id), ..Default::default() }; @@ -932,13 +932,13 @@ mod tests { let split_id_0 = "test-delete-splits-storage-error--split-0"; let split_metadata_0 = SplitMetadata { - split_id: split_id_0.to_string(), + split_id: split_id_0.into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_1 = "test-delete-splits-storage-error--split-1"; let split_metadata_1 = SplitMetadata { - split_id: split_id_1.to_string(), + split_id: split_id_1.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -1020,13 +1020,13 @@ mod tests { let split_id_0 = "test-delete-splits-storage-error--split-0"; let split_metadata_0 = SplitMetadata { - split_id: split_id_0.to_string(), + split_id: split_id_0.into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_1 = "test-delete-splits-storage-error--split-1"; let split_metadata_1 = SplitMetadata { - split_id: split_id_1.to_string(), + split_id: split_id_1.into(), index_uid: index_uid.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-index-management/src/index.rs b/quickwit/quickwit-index-management/src/index.rs index 1787ff1e276..af2d3bc34d5 100644 --- a/quickwit/quickwit-index-management/src/index.rs +++ b/quickwit/quickwit-index-management/src/index.rs @@ -230,7 +230,7 @@ impl IndexService { let query = ListSplitsQuery::for_index(index_uid.clone()) .with_split_states([SplitState::Staged, SplitState::Published]); let list_splits_request = ListSplitsRequest::try_from_list_splits_query(&query)?; - let split_ids: Vec = self + let split_ids = self .metastore .list_splits(list_splits_request) .await? @@ -477,7 +477,7 @@ impl IndexService { .await?; let split_ids: Vec = splits_metadata .iter() - .map(|split| split.split_id.to_string()) + .map(|split| split.split_id.clone()) .collect(); let mark_splits_for_deletion_request = MarkSplitsForDeletionRequest::new(index_uid.clone(), split_ids.clone()); @@ -771,7 +771,7 @@ mod tests { let split_id = "test-split"; let split_metadata = SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index da36d6b7994..552362a8e71 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -134,7 +134,7 @@ impl IndexerState { io_controls, )?; info!( - split_id=%indexed_split.split_id(), + split_id=indexed_split.split_id_str(), partition_id=%partition_id, "new-split" ); @@ -665,7 +665,7 @@ impl Indexer { return Ok(()); } let num_splits = splits.len() as u64; - let split_ids = splits.iter().map(|split| split.split_id()).join(","); + let split_ids = splits.iter().map(|split| split.split_id_str()).join(","); info!( index=%self.indexer_state.pipeline_id.index_uid, source=self.indexer_state.pipeline_id.source_id.as_str(), diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 69b606f93e3..6d0cbe69e19 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -2019,7 +2019,7 @@ mod tests { }) .return_once(|_request| { let splits = vec![Split { - split_metadata: SplitMetadata::for_test("test-split".to_string()), + split_metadata: SplitMetadata::for_test("test-split".into()), split_state: SplitState::Published, update_timestamp: 0, publish_timestamp: Some(0), diff --git a/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs b/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs index 02a4bca1661..f4e6e656f9f 100644 --- a/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs +++ b/quickwit/quickwit-indexing/src/actors/log_publisher_impl.rs @@ -78,13 +78,13 @@ impl Handler for Publisher { let index_checkpoint_delta_json_opt = serialize_checkpoint_delta(&checkpoint_delta_opt)?; let split_ids: Vec = new_splits .iter() - .map(|split| split.split_id.clone()) + .map(|split| split.split_id.to_string()) .collect(); if let Some(_guard) = publish_lock.acquire().await { let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid), staged_split_ids: split_ids.clone(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), index_checkpoint_delta_json_opt, publish_token_opt: publish_token_opt.clone(), }; @@ -129,7 +129,7 @@ mod tests { }; use quickwit_metastore::{PublishSplitsRequestExt, SplitMetadata}; use quickwit_proto::metastore::{EmptyResponse, MetastoreServiceClient, MockMetastoreService}; - use quickwit_proto::types::{IndexUid, Position}; + use quickwit_proto::types::{IndexUid, Position, SplitId}; use tracing::Span; use super::PUBLISHER_NAME; @@ -176,7 +176,7 @@ mod tests { .send_message(SplitsUpdate { index_uid: ref_index_uid.clone(), new_splits: vec![SplitMetadata { - split_id: "split".to_string(), + split_id: "split".into(), ..Default::default() }], replaced_split_ids: Vec::new(), @@ -320,10 +320,10 @@ mod tests { .send_message(SplitsUpdate { index_uid: ref_index_uid.clone(), new_splits: vec![SplitMetadata { - split_id: "split3".to_string(), + split_id: "split3".into(), ..Default::default() }], - replaced_split_ids: vec!["split1".to_string(), "split2".to_string()], + replaced_split_ids: vec![SplitId::from("split1"), SplitId::from("split2")], checkpoint_delta_opt: None, publish_lock: PublishLock::default(), publish_token_opt: None, @@ -365,7 +365,7 @@ mod tests { publisher_mailbox .send_message(SplitsUpdate { index_uid: IndexUid::new_with_random_ulid("index"), - new_splits: vec![SplitMetadata::for_test("test-split".to_string())], + new_splits: vec![SplitMetadata::for_test("test-split".into())], replaced_split_ids: Vec::new(), checkpoint_delta_opt: None, publish_lock, diff --git a/quickwit/quickwit-indexing/src/actors/merge_executor.rs b/quickwit/quickwit-indexing/src/actors/merge_executor.rs index d07c6e8046d..d4f06a1d8ee 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_executor.rs @@ -257,7 +257,7 @@ pub fn merge_split_attrs( let num_docs = sum_num_docs(splits); let replaced_split_ids: Vec = splits .iter() - .map(|split| split.split_id().to_string()) + .map(|split| split.split_id().clone()) .collect(); let delete_opstamp = splits .iter() @@ -434,7 +434,7 @@ impl MergeExecutor { ); let mark_splits_for_deletion_request = MarkSplitsForDeletionRequest::new( split.index_uid.clone(), - vec![split.split_id.clone()], + [split.split_id.clone()], ); self.metastore .mark_splits_for_deletion(mark_splits_for_deletion_request) @@ -594,7 +594,7 @@ mod tests { use super::*; use crate::merge_policy::{MergeOperation, MergeTask}; - use crate::{TestSandbox, get_tantivy_directory_from_split_bundle, new_split_id}; + use crate::{TestSandbox, get_tantivy_directory_from_split_bundle}; #[tokio::test] async fn test_merge_executor() -> anyhow::Result<()> { @@ -755,7 +755,7 @@ mod tests { // We want to test a delete on a split with num_merge_ops > 0. let mut new_split_metadata = splits[0].split_metadata.clone(); - new_split_metadata.split_id = new_split_id(); + new_split_metadata.split_id = SplitId::new(); new_split_metadata.num_merge_ops = 1; let stage_splits_request = StageSplitsRequest::try_from_split_metadata(index_uid.clone(), &new_split_metadata) diff --git a/quickwit/quickwit-indexing/src/actors/merge_planner.rs b/quickwit/quickwit-indexing/src/actors/merge_planner.rs index 651e38fc8b3..0d532efbf63 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_planner.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_planner.rs @@ -20,7 +20,7 @@ use async_trait::async_trait; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; use quickwit_metastore::{SplitMaturity, SplitMetadata}; use quickwit_proto::indexing::MergePipelineId; -use quickwit_proto::types::DocMappingUid; +use quickwit_proto::types::{DocMappingUid, SplitId}; use tantivy::Inventory; use time::OffsetDateTime; use tracing::{info, warn}; @@ -70,7 +70,7 @@ pub struct MergePlanner { /// /// We incrementally build this set, by adding new splits to it. /// When it becomes too large, we entirely rebuild it. - known_split_ids: HashSet, + known_split_ids: HashSet, known_split_ids_recompute_attempt_id: usize, merge_policy: Arc, @@ -207,19 +207,19 @@ impl MergePlanner { merge_planner } - fn rebuild_known_split_ids(&self) -> HashSet { - let mut known_split_ids: HashSet = HashSet::default(); + fn rebuild_known_split_ids(&self) -> HashSet { + let mut known_split_ids: HashSet = HashSet::default(); // Add splits that in `partitioned_young_splits`. for young_split_partition in self.partitioned_young_splits.values() { for split in young_split_partition { - known_split_ids.insert(split.split_id().to_string()); + known_split_ids.insert(split.split_id().clone()); } } let ongoing_merge_operations = self.ongoing_merge_operations_inventory.list(); // Add splits that are known as in merge. for merge_op in ongoing_merge_operations { for split in &merge_op.splits { - known_split_ids.insert(split.split_id().to_string()); + known_split_ids.insert(split.split_id().clone()); } } if known_split_ids.len() * 2 >= self.known_split_ids.len() { @@ -235,11 +235,11 @@ impl MergePlanner { /// Updates `known_split_ids` and return true if the split was not /// previously known and should be recorded. - fn acknownledge_split(&mut self, split_id: &str) -> bool { + fn acknownledge_split(&mut self, split_id: &SplitId) -> bool { if self.known_split_ids.contains(split_id) { return false; } - self.known_split_ids.insert(split_id.to_string()); + self.known_split_ids.insert(split_id.clone()); true } @@ -389,7 +389,7 @@ mod tests { num_merge_ops: usize, ) -> SplitMetadata { SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), index_uid: index_uid.clone(), source_id: "test-source".to_string(), node_id: "test-node".to_string(), diff --git a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs index 5d68bb59285..11613ab84d7 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_split_downloader.rs @@ -101,7 +101,7 @@ impl MergeSplitDownloader { for split in splits { if ctx.kill_switch().is_dead() { debug!( - split_id = split.split_id(), + split_id = %split.split_id(), "Kill switch was activated. Cancelling download." ); return Err(ActorExitStatus::Killed); @@ -114,7 +114,7 @@ impl MergeSplitDownloader { let _protect_guard = ctx.protect_zone(); let tantivy_dir = self .split_store - .fetch_and_open_split(split.split_id(), download_directory, &io_controls) + .fetch_and_open_split(split.split_id().clone(), download_directory, &io_controls) .await .map_err(|error| { let split_id = split.split_id(); @@ -133,17 +133,17 @@ mod tests { use quickwit_actors::Universe; use quickwit_common::split_file; + use quickwit_proto::types::SplitId; use quickwit_storage::{PutPayload, RamStorageBuilder, SplitPayloadBuilder}; use super::*; use crate::merge_policy::MergeOperation; - use crate::new_split_id; #[tokio::test] async fn test_merge_split_downloader() -> anyhow::Result<()> { let scratch_directory = TempDirectory::for_test(); let splits_to_merge: Vec = iter::repeat_with(|| { - let split_id = new_split_id(); + let split_id = SplitId::new(); SplitMetadata { split_id, ..Default::default() diff --git a/quickwit/quickwit-indexing/src/actors/packager.rs b/quickwit/quickwit-indexing/src/actors/packager.rs index 9f691d285b6..5ac42845ab1 100644 --- a/quickwit/quickwit-indexing/src/actors/packager.rs +++ b/quickwit/quickwit-indexing/src/actors/packager.rs @@ -126,7 +126,7 @@ impl Handler for Packager { let split_ids: Vec = batch .splits .iter() - .map(|split| split.split_id().to_string()) + .map(|split| split.split_id_str().to_string()) .collect_vec(); debug!( split_ids=?split_ids, @@ -272,12 +272,12 @@ fn create_packaged_split( tag_fields: &[NamedField], ctx: &ActorContext, ) -> anyhow::Result { - debug!(split_id = split.split_id(), "create-packaged-split"); + debug!(split_id = split.split_id_str(), "create-packaged-split"); let split_files = list_split_files(segment_metas, &split.split_scratch_directory)?; // Extracts tag values from inverted indexes only when a field cardinality is less // than `MAX_VALUES_PER_TAG_FIELD`. - debug!(split_id = split.split_id(), tag_fields =? tag_fields, "extract-tags-values"); + debug!(split_id = split.split_id_str(), tag_fields =? tag_fields, "extract-tags-values"); let index_reader = split .index .reader_builder() @@ -307,7 +307,7 @@ fn create_packaged_split( ctx.record_progress(); - debug!(split_id = split.split_id(), "build-hotcache"); + debug!(split_id = split.split_id_str(), "build-hotcache"); let mut hotcache_bytes = Vec::new(); build_hotcache(split.split_scratch_directory.path(), &mut hotcache_bytes)?; ctx.record_progress(); @@ -516,7 +516,7 @@ mod tests { index_uid, source_id, doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split".to_string(), + split_id: "test-split".into(), partition_id: 17u64, num_docs, uncompressed_docs_size_in_bytes: num_docs * 15, diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs index 2d64f54b9b7..126f444e5f1 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_indexer.rs @@ -33,7 +33,7 @@ use quickwit_doc_mapper::{ArrowRowContext, RoutingExpr}; use quickwit_metastore::checkpoint::{IndexCheckpointDelta, SourceCheckpointDelta}; use quickwit_parquet_engine::index::{ParquetBatchAccumulator, ParquetIndexingConfig}; use quickwit_parquet_engine::split::ParquetSplitMetadata; -use quickwit_proto::types::{IndexUid, PublishToken, SourceId}; +use quickwit_proto::types::{IndexUid, PublishToken, SourceId, SplitId}; use serde::Serialize; use tokio::runtime::Handle; use tracing::{debug, info, info_span, warn}; @@ -122,7 +122,7 @@ pub struct ParquetSplitBatch { pub publish_token_opt: Option, /// Split IDs being replaced by this batch (non-empty for merges). /// Empty for the ingest path. - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, /// Holds the temp directory alive until the uploader finishes reading. /// `None` for the ingest path (packager manages its own temp dir). /// `Some` for the merge path (executor's scratch directory). diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs index ffe27b09a0e..e022163d3a4 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_merge_executor.rs @@ -56,7 +56,7 @@ use quickwit_parquet_engine::merge::{ MergeConfig, MergeOutputFile, execute_merge_operation, merge_sorted_parquet_files, }; use quickwit_parquet_engine::storage::{ParquetWriterConfig, RemoteByteSource}; -use quickwit_proto::types::IndexUid; +use quickwit_proto::types::{IndexUid, SplitId}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt}; use tracing::{info, instrument, warn}; @@ -309,9 +309,9 @@ impl Handler for ParquetMergeExecutor { .context("invalid index_uid in merge input") .map_err(|e| ActorExitStatus::from(anyhow::anyhow!(e)))?; - let replaced_split_ids: Vec = input_splits + let replaced_split_ids: Vec = input_splits .iter() - .map(|s| s.split_id.as_str().to_string()) + .map(|s| SplitId::from(s.split_id.as_str())) .collect(); // Verify pre-merge invariants on the inputs the planner gave us. diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs index b4ac6d197d0..20dd3d1ad2b 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/parquet_splits_update.rs @@ -19,7 +19,7 @@ use std::fmt; use itertools::Itertools; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_parquet_engine::split::ParquetSplitMetadata; -use quickwit_proto::types::{IndexUid, PublishToken}; +use quickwit_proto::types::{IndexUid, PublishToken, SplitId}; use tracing::Span; use super::parquet_merge_messages::ParquetMergeTask; @@ -35,7 +35,7 @@ pub struct ParquetSplitsUpdate { /// The staged and uploaded splits (metrics or sketches). pub new_splits: Vec, /// Split IDs being replaced (for merges, typically empty for ingest). - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, /// Checkpoint delta covering the data in these splits. pub checkpoint_delta_opt: Option, /// Publish lock for coordination. diff --git a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs index 8ae01f06c68..1b911ab18c5 100644 --- a/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs +++ b/quickwit/quickwit-indexing/src/actors/parquet_pipeline/publisher_impl.rs @@ -60,7 +60,7 @@ impl Handler for Publisher { let publish_request = PublishSketchSplitsRequest { index_uid: Some(index_uid.clone()), staged_split_ids: split_ids.clone(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), index_checkpoint_delta_json_opt, publish_token_opt: publish_token_opt.clone(), }; @@ -71,7 +71,7 @@ impl Handler for Publisher { let publish_request = PublishMetricsSplitsRequest { index_uid: Some(index_uid.clone()), staged_split_ids: split_ids.clone(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), index_checkpoint_delta_json_opt, publish_token_opt: publish_token_opt.clone(), }; @@ -116,7 +116,7 @@ impl Handler for Publisher { index_uid: index_uid_str.clone(), merge_id: split.split_id.as_str().to_string(), output_split_id: split.split_id.as_str().to_string(), - replaced_split_ids: replaced_split_ids.clone(), + replaced_split_ids: replaced_split_ids.iter().map(String::from).collect(), output_window: window, output_merge_ops: split.num_merge_ops, }); diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index 0b2901f26d5..44dbf9d2aa1 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -323,7 +323,7 @@ impl Handler for Uploader { ) { Ok(split_streamer) => split_streamer, Err(e) => { - warn!(cause=?e, split_id=packaged_split.split_id(), "could not create split streamer"); + warn!(cause=?e, split_id=packaged_split.split_id_str(), "could not create split streamer"); return; } }; @@ -337,7 +337,7 @@ impl Handler for Uploader { report_splits.push(ReportSplit { storage_uri: split_store.remote_uri().to_string(), - split_id: packaged_split.split_id().to_string(), + split_id: packaged_split.split_id_str().to_string(), }); split_metadata_list.push(split_metadata); @@ -376,7 +376,7 @@ impl Handler for Uploader { .await; if let Err(cause) = upload_result { - warn!(cause=?cause, split_id=packaged_split.split_id(), "Failed to upload split. Killing!"); + warn!(cause=?cause, split_id=packaged_split.split_id_str(), "Failed to upload split. Killing!"); kill_switch.kill(); return; } @@ -516,7 +516,7 @@ mod tests { use quickwit_common::temp_dir::TempDirectory; use quickwit_metastore::checkpoint::{IndexCheckpointDelta, SourceCheckpointDelta}; use quickwit_proto::metastore::{EmptyResponse, MockMetastoreService}; - use quickwit_proto::types::{DocMappingUid, NodeId}; + use quickwit_proto::types::{DocMappingUid, NodeId, SplitId}; use quickwit_storage::RamStorage; use tantivy::DateTime; use tokio::sync::oneshot; @@ -586,7 +586,7 @@ mod tests { uncompressed_docs_size_in_bytes: 1_000, num_docs: 10, replaced_split_ids: Vec::new(), - split_id: "test-split".to_string(), + split_id: "test-split".into(), delete_opstamp: 10, num_merge_ops: 0, }, @@ -658,7 +658,7 @@ mod tests { .withf(move |stage_splits_request| -> bool { let splits_metadata = stage_splits_request.deserialize_splits_metadata().unwrap(); let is_metadata_valid = splits_metadata.iter().all(|metadata| { - ["test-split-1", "test-split-2"].contains(&metadata.split_id()) + ["test-split-1", "test-split-2"].contains(&metadata.split_id().as_str()) && metadata.time_range == Some(1628203589..=1628203640) }); let index_uid: IndexUid = stage_splits_request.index_uid().clone(); @@ -689,7 +689,7 @@ mod tests { index_uid: index_uid.clone(), source_id: source_id.clone(), doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split-1".to_string(), + split_id: "test-split-1".into(), partition_id: 3u64, num_docs: 10, uncompressed_docs_size_in_bytes: 1_000, @@ -698,8 +698,8 @@ mod tests { ..=DateTime::from_timestamp_secs(1_628_203_640), ), replaced_split_ids: vec![ - "replaced-split-1".to_string(), - "replaced-split-2".to_string(), + SplitId::from("replaced-split-1"), + SplitId::from("replaced-split-2"), ], delete_opstamp: 0, num_merge_ops: 0, @@ -716,7 +716,7 @@ mod tests { index_uid, source_id, doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split-2".to_string(), + split_id: "test-split-2".into(), partition_id: 3u64, num_docs: 10, uncompressed_docs_size_in_bytes: 1_000, @@ -725,8 +725,8 @@ mod tests { ..=DateTime::from_timestamp_secs(1_628_203_640), ), replaced_split_ids: vec![ - "replaced-split-1".to_string(), - "replaced-split-2".to_string(), + SplitId::from("replaced-split-1"), + SplitId::from("replaced-split-2"), ], delete_opstamp: 0, num_merge_ops: 0, @@ -778,8 +778,8 @@ mod tests { assert_eq!( &replaced_split_ids, &[ - "replaced-split-1".to_string(), - "replaced-split-2".to_string() + SplitId::from("replaced-split-1"), + SplitId::from("replaced-split-2"), ] ); assert!(checkpoint_delta_opt.is_none()); @@ -842,7 +842,7 @@ mod tests { index_uid, source_id, doc_mapping_uid: DocMappingUid::default(), - split_id: "test-split".to_string(), + split_id: "test-split".into(), partition_id: 3u64, time_range: None, uncompressed_docs_size_in_bytes: 1_000, @@ -1029,7 +1029,7 @@ mod tests { uncompressed_docs_size_in_bytes: 1_000, num_docs: 10, replaced_split_ids: Vec::new(), - split_id: SPLIT_ULID_STR.to_string(), + split_id: SPLIT_ULID_STR.into(), delete_opstamp: 10, num_merge_ops: 0, }, diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index f0e79499b8a..894d194e2ce 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -55,10 +55,6 @@ pub use self::source::check_source_connectivity; /// Schema used for the OpenAPI generation which are apart of this crate. pub struct IndexingApiSchemas; -pub fn new_split_id() -> String { - ulid::Ulid::new().to_string() -} - #[allow(clippy::too_many_arguments)] pub async fn start_indexing_service( universe: &Universe, diff --git a/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs b/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs index ce3f4654de1..0fc1dcc813b 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs @@ -123,7 +123,7 @@ impl ConstWriteAmplificationMergePolicy { splits.sort_by(|left, right| { left.create_timestamp .cmp(&right.create_timestamp) - .then_with(|| left.split_id().cmp(right.split_id())) + .then_with(|| left.split_id().cmp(&right.split_id())) }); let mut merge_operations = Vec::new(); while let Some(merge_op) = @@ -194,7 +194,7 @@ impl MergePolicy for ConstWriteAmplificationMergePolicy { left.create_timestamp .cmp(&right.create_timestamp) .reverse() - .then_with(|| left.split_id().cmp(right.split_id())) + .then_with(|| left.split_id().cmp(&right.split_id())) }); let mut merge_operations = Vec::new(); while merge_operations.len() < self.config.max_finalize_merge_operations { @@ -319,7 +319,7 @@ mod tests { fn test_const_write_merge_policy_single_split() { let merge_policy = ConstWriteAmplificationMergePolicy::for_test(); let mut splits = vec![SplitMetadata { - split_id: "01GE1R0KBFQHJ76030RYRAS8QA".to_string(), + split_id: "01GE1R0KBFQHJ76030RYRAS8QA".into(), num_docs: 1, create_timestamp: 1665000000, maturity: merge_policy.split_maturity(1, 0), @@ -337,7 +337,7 @@ mod tests { let create_timestamp = OffsetDateTime::now_utc().unix_timestamp(); let mut splits = (0..merge_policy.config.merge_factor) .map(|i| SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1_000, num_merge_ops: 1, create_timestamp, @@ -361,7 +361,7 @@ mod tests { let mut splits = (0..merge_policy.config.max_merge_factor + merge_policy.config.merge_factor - 1) .map(|i| SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1_000, num_merge_ops: 1, create_timestamp, @@ -384,7 +384,7 @@ mod tests { let now_timestamp: i64 = OffsetDateTime::now_utc().unix_timestamp(); let mut splits: Vec = (0..merge_policy.config.max_merge_factor) .map(|i| SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1_000, num_merge_ops: 1, create_timestamp: now_timestamp + i as i64, @@ -402,7 +402,7 @@ mod tests { let split_ids: Vec<&str> = operations[0] .splits_as_slice() .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .collect(); assert_eq!( &split_ids[..], @@ -419,7 +419,7 @@ mod tests { let num_docs = merge_policy.split_num_docs_target.div_ceil(3); let time_to_maturity = merge_policy.split_maturity(num_docs, 1); SplitMetadata { - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs, num_merge_ops: 1, create_timestamp, diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs index a0090c1edd1..95bae7a281d 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs @@ -33,7 +33,6 @@ use tantivy::TrackedObject; use tracing::{Span, info_span}; use crate::actors::MergePermit; -use crate::new_split_id; #[derive(Clone, Debug, PartialEq, Eq, Serialize)] pub enum MergeOperationType { @@ -89,7 +88,7 @@ pub struct MergeOperation { impl MergeOperation { pub fn new_merge_operation(splits: Vec) -> Self { - let merge_split_id = new_split_id(); + let merge_split_id = SplitId::new(); let split_ids = splits.iter().map(|split| split.split_id()).collect_vec(); let merge_parent_span = info_span!("merge", merge_split_id=%merge_split_id, split_ids=?split_ids, typ=%MergeOperationType::Merge); Self { @@ -108,7 +107,7 @@ impl MergeOperation { } pub fn new_delete_and_merge_operation(split: SplitMetadata) -> Self { - let merge_split_id = new_split_id(); + let merge_split_id = SplitId::new(); let merge_parent_span = info_span!("delete", merge_split_id=%merge_split_id, split_ids=?split.split_id(), typ=%MergeOperationType::DeleteAndMerge); Self { merge_parent_span, @@ -237,7 +236,7 @@ pub mod tests { use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeSet, HashMap}; - use std::hash::Hasher; + use std::hash::{Hash as _, Hasher}; use std::ops::RangeInclusive; use proptest::prelude::*; @@ -274,11 +273,10 @@ pub mod tests { prop_compose! { fn split_strategy() (num_merge_ops in 0usize..5usize, start_timestamp in 1_664_000_000i64..1_665_000_000i64, average_time_delta in 100i64..120i64, delta_creation_date in 0u64..100_000u64, num_docs in num_docs_strategy()) -> SplitMetadata { - let split_id = crate::new_split_id(); let end_timestamp = start_timestamp + average_time_delta * pow_of_10(num_merge_ops) as i64; let create_timestamp: i64 = (end_timestamp as u64 + delta_creation_date) as i64; SplitMetadata { - split_id, + split_id: SplitId::new(), time_range: Some(start_timestamp..=end_timestamp), num_docs, create_timestamp, @@ -312,7 +310,7 @@ pub mod tests { let create_timestamp = OffsetDateTime::now_utc().unix_timestamp(); let time_to_maturity = merge_policy.split_maturity(num_docs, 0); SplitMetadata { - split_id: format!("split_{split_ord:02}"), + split_id: format!("split_{split_ord:02}").into(), num_docs, time_range: Some(time_range), create_timestamp, @@ -330,7 +328,7 @@ pub mod tests { let mut checksum = 0u64; for split in op.splits_as_slice() { let mut hasher = DefaultHasher::default(); - hasher.write(split.split_id.as_bytes()); + split.split_id.hash(&mut hasher); checksum ^= hasher.finish(); } checksum @@ -404,7 +402,7 @@ pub mod tests { fn fake_merge(merge_policy: &Arc, splits: &[SplitMetadata]) -> SplitMetadata { assert!(!splits.is_empty(), "Split list should not be empty."); - let merged_split_id = new_split_id(); + let merged_split_id = SplitId::new(); let tags = merge_tags(splits); let pipeline_id = MergePipelineId { node_id: NodeId::from_str("test_node"), @@ -421,7 +419,7 @@ pub mod tests { merge_op: &MergeOperation, ) -> SplitMetadata { for split in merge_op.splits_as_slice() { - assert!(split_index.remove(split.split_id()).is_some()); + assert!(split_index.remove(split.split_id().as_str()).is_some()); } let merged_split = fake_merge(merge_policy, merge_op.splits_as_slice()); split_index.insert(merged_split.split_id().to_string(), merged_split.clone()); @@ -505,7 +503,7 @@ pub mod tests { maturity: SplitMaturity, ) -> SplitMetadata { SplitMetadata { - split_id: crate::new_split_id(), + split_id: SplitId::new(), partition_id: 3u64, num_docs: num_docs as usize, uncompressed_docs_size_in_bytes: 256u64 * num_docs, diff --git a/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs b/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs index 43f2e352001..878965c500a 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs @@ -173,7 +173,7 @@ fn cmp_splits_by_reverse_time_end(left: &SplitMetadata, right: &SplitMetadata) - .reverse() .then_with(|| left.num_docs.cmp(&right.num_docs)) .then_with(|| { - left.split_id().cmp(right.split_id()) //< for determinism. + left.split_id().cmp(&right.split_id()) //< for determinism. }) } diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index 03728fe2f6a..8b30bada3fb 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -20,7 +20,7 @@ use quickwit_common::temp_dir::TempDirectory; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_metrics::GaugeGuard; use quickwit_proto::indexing::IndexingPipelineId; -use quickwit_proto::types::{DocMappingUid, IndexUid, PublishToken}; +use quickwit_proto::types::{DocMappingUid, IndexUid, PublishToken, SplitId}; use tantivy::IndexBuilder; use tantivy::directory::MmapDirectory; use tracing::{Span, instrument}; @@ -28,7 +28,6 @@ use tracing::{Span, instrument}; use crate::controlled_directory::ControlledDirectory; use crate::merge_policy::MergeTask; use crate::models::{PublishLock, SplitAttrs}; -use crate::new_split_id; pub struct IndexedSplitBuilder { pub split_attrs: SplitAttrs, @@ -45,8 +44,8 @@ pub struct IndexedSplit { } impl IndexedSplit { - pub fn split_id(&self) -> &str { - &self.split_attrs.split_id + pub fn split_id_str(&self) -> &str { + &self.split_attrs.split_id.as_str() } } @@ -85,7 +84,7 @@ impl IndexedSplitBuilder { // We avoid intermediary merge, and instead merge all segments in the packager. // The benefit is that we don't have to wait for potentially existing merges, // and avoid possible race conditions. - let split_id = new_split_id(); + let split_id = SplitId::new(); let split_scratch_directory_prefix = format!("split-{split_id}-"); let split_scratch_directory = scratch_directory.named_temp_child(&split_scratch_directory_prefix)?; @@ -145,8 +144,8 @@ impl IndexedSplitBuilder { self.split_scratch_directory.path() } - pub fn split_id(&self) -> &str { - &self.split_attrs.split_id + pub fn split_id_str(&self) -> &str { + self.split_attrs.split_id.as_str() } } diff --git a/quickwit/quickwit-indexing/src/models/packaged_split.rs b/quickwit/quickwit-indexing/src/models/packaged_split.rs index db9b4abae39..6c8894da79e 100644 --- a/quickwit/quickwit-indexing/src/models/packaged_split.rs +++ b/quickwit/quickwit-indexing/src/models/packaged_split.rs @@ -39,8 +39,8 @@ impl PackagedSplit { &self.split_attrs.index_uid } - pub fn split_id(&self) -> &str { - &self.split_attrs.split_id + pub fn split_id_str(&self) -> &str { + self.split_attrs.split_id.as_str() } } diff --git a/quickwit/quickwit-indexing/src/models/publisher_message.rs b/quickwit/quickwit-indexing/src/models/publisher_message.rs index 13182a8f76a..22199d98656 100644 --- a/quickwit/quickwit-indexing/src/models/publisher_message.rs +++ b/quickwit/quickwit-indexing/src/models/publisher_message.rs @@ -17,7 +17,7 @@ use std::fmt; use itertools::Itertools; use quickwit_metastore::SplitMetadata; use quickwit_metastore::checkpoint::IndexCheckpointDelta; -use quickwit_proto::types::{IndexUid, PublishToken}; +use quickwit_proto::types::{IndexUid, PublishToken, SplitId}; use tracing::Span; use crate::merge_policy::MergeTask; @@ -26,7 +26,7 @@ use crate::models::PublishLock; pub struct SplitsUpdate { pub index_uid: IndexUid, pub new_splits: Vec, - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, pub checkpoint_delta_opt: Option, pub publish_lock: PublishLock, pub publish_token_opt: Option, diff --git a/quickwit/quickwit-indexing/src/models/split_attrs.rs b/quickwit/quickwit-indexing/src/models/split_attrs.rs index 2e0504bdd35..f94150444d9 100644 --- a/quickwit/quickwit-indexing/src/models/split_attrs.rs +++ b/quickwit/quickwit-indexing/src/models/split_attrs.rs @@ -60,7 +60,7 @@ pub struct SplitAttrs { pub time_range: Option>, - pub replaced_split_ids: Vec, + pub replaced_split_ids: Vec, /// Delete opstamp. pub delete_opstamp: u64, diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 8b4db0844ad..90c8f9610e3 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -811,7 +811,7 @@ mod test_setup_helper { let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid.clone()), index_checkpoint_delta_json_opt: Some(checkpoint_delta_json), - staged_split_ids: vec![split_id.clone()], + staged_split_ids: vec![split_id.to_string()], replaced_split_ids: Vec::new(), publish_token_opt: None, }; diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index 70f19410ecf..12870939b85 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -16,13 +16,13 @@ use std::collections::BTreeMap; use std::collections::btree_map::Entry; use std::io; use std::path::{Path, PathBuf}; -use std::str::FromStr; use std::time::{Duration, SystemTime}; use anyhow::Context; use bytesize::ByteSize; use quickwit_common::split_file; use quickwit_directories::BundleDirectory; +use quickwit_proto::types::SplitId; use quickwit_storage::StorageResult; use tantivy::Directory; use tantivy::directory::MmapDirectory; @@ -66,40 +66,65 @@ async fn num_bytes_in_folder(directory_path: &Path) -> io::Result { Ok(ByteSize(total_bytes)) } +/// The base-32 string length of a ULID. +const ULID_STR_LEN: usize = 26; + +/// Recovers the ULID that terminates a split id, used to derive the split's creation time. +/// +/// TEMPORARY: the indexing split store is slated for removal once the compactor service lands. +/// Until then we recover a split's creation time from the ULID that terminates its id. A split id +/// may carry a random prefix (added to spread S3 read load), but it always ENDS with a 26-char +/// ULID, so we slice the suffix before parsing. Ids that do not end with a valid ULID fall back to +/// the epoch, which simply makes them the first eviction candidates. +fn split_creation_time(split_id: &SplitId) -> SystemTime { + let split_id_str = split_id.as_str(); + let ulid_start = split_id_str.len().saturating_sub(ULID_STR_LEN); + if let Ok(ulid) = Ulid::from_string(&split_id_str[ulid_start..]) { + ulid.datetime() + } else { + SystemTime::now() + } +} + /// The local split store is a cache for freshly indexed splits. /// /// In order to save the cost of an extra write, we store splits in the form /// of a directory and the split bundles are built upon upload. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] struct SplitFolder { - split_id: Ulid, + created_at: SystemTime, + split_id: SplitId, num_bytes: ByteSize, } +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] +struct SplitFolderKey { + // Creation time, recovered once from the trailing ULID of `split_id` at construction time + created_at: SystemTime, + split_id: SplitId, +} + impl SplitFolder { /// Creates a new `SplitFolder`. /// /// There are no specific constraints on `path`. - pub async fn create(split_id: &str, path: &Path) -> io::Result { - let split_id = Ulid::from_str(split_id).map_err(|_err| { - let error_msg = format!("split Id should be an ulid: got `{split_id}`"); - io::Error::new(io::ErrorKind::InvalidInput, error_msg) - })?; + pub async fn create(split_id: SplitId, path: &Path) -> io::Result { let num_bytes = num_bytes_in_folder(path).await?; + let created_at = split_creation_time(&split_id); Ok(SplitFolder { + created_at, split_id, num_bytes, }) } - - /// Returns the creation time as encoded in the split id ULID. - fn creation_time(&self) -> SystemTime { - self.split_id.datetime() - } } -fn split_id_from_split_folder(dir_path: &Path) -> Option<&str> { - dir_path.file_name()?.to_str()?.strip_suffix(".split") +fn split_id_from_split_folder(dir_path: &Path) -> Option { + dir_path + .file_name()? + .to_str()? + .strip_suffix(".split") + .map(SplitId::from) } /// The [`IndexingSplitCache`] is a local cache used to improve the performance of indexing nodes. @@ -145,13 +170,11 @@ struct InnerSplitCache { struct SplitFolderRegistry { /// Splits owned by the local split store, which reside in the split_store_folder. /// - /// Splits ids are generated using ULID, so that they are sorted - /// according to their creation date. - /// - /// We evict the oldest split first. Note this is not an LRU strategy - /// because we do not care about the last access time, but we only - /// consider the creation time. - split_folders: BTreeMap, + /// `SplitFolder`s are ordered by creation time (see their `Ord` impl), so iteration order + /// matches creation order and the oldest split is the set's first element. We evict the oldest + /// split first. Note this is not an LRU strategy because we do not care about the last access + /// time, but we only consider the creation time. + split_folders: BTreeMap, /// The split store quota shared among all indexing split stores. split_store_quota: SplitStoreQuota, } @@ -164,20 +187,25 @@ impl SplitFolderRegistry { } } - /// Returns an iterator over the split folders sorted by ULID. + /// Returns an iterator over the split folders sorted by creation time. #[cfg(any(test, feature = "testsuite"))] fn iter(&self) -> impl Iterator + '_ { self.split_folders .iter() - .map(|(&split_id, &num_bytes)| SplitFolder { - split_id, - num_bytes, + .map(|(key, num_bytes)| SplitFolder { + created_at: key.created_at, + split_id: key.split_id.clone(), + num_bytes: *num_bytes, }) } /// Returns whether the element was inserted or was already present fn insert(&mut self, split_folder: SplitFolder) -> bool { - if let Entry::Vacant(entry) = self.split_folders.entry(split_folder.split_id) { + let split_folder_key = SplitFolderKey { + created_at: split_folder.created_at, + split_id: split_folder.split_id.clone(), + }; + if let Entry::Vacant(entry) = self.split_folders.entry(split_folder_key) { entry.insert(split_folder.num_bytes); self.split_store_quota.add_split(split_folder.num_bytes); true @@ -186,26 +214,36 @@ impl SplitFolderRegistry { } } - /// Returns true if the split was indeed present in the registry - fn remove(&mut self, split_id: Ulid) -> Option { - let num_bytes = self.split_folders.remove(&split_id)?; + /// Returns the split folder if it was indeed present in the registry. + fn remove(&mut self, split_id: &SplitId) -> Option { + // A `SplitFolder` carries its own ordering key, so to find one by split id we build a + // lower-bound probe — `num_bytes` is the last and only unknown component of the key — and + // take the first entry that actually matches the id. `created_at` is derived + // deterministically from the id (see `split_creation_time`). + let created_at = split_creation_time(split_id); + let split_folder_key = SplitFolderKey { + created_at, + split_id: split_id.clone(), + }; + let num_bytes = self.split_folders.remove(&split_folder_key)?; self.split_store_quota.remove_split(num_bytes); Some(SplitFolder { num_bytes, - split_id, + split_id: split_id.clone(), + created_at, }) } - /// Returns the oldest split (oldest in the sense of the ULID = creation time). - fn oldest_split(&self) -> Option { - let (split_id, _) = self.split_folders.first_key_value()?; - Some(*split_id) + /// Returns the oldest split (oldest in the sense of the creation time). + fn oldest_split(&self) -> Option { + let (key, _num_bytes) = self.split_folders.first_key_value()?; + Some(key.clone()) } /// Removes the oldest split. fn pop_oldest(&mut self) -> Option { - let oldest_split_id = self.oldest_split()?; - self.remove(oldest_split_id) + let oldest_split_folder = self.oldest_split()?; + self.remove(&oldest_split_folder.split_id) } fn quota(&self) -> &SplitStoreQuota { @@ -219,7 +257,7 @@ impl InnerSplitCache { /// Returns `None` if the split is not available in the cache. async fn move_out( &mut self, - split_id: Ulid, + split_id: &SplitId, to_folder: &Path, ) -> StorageResult> { let Some(split_folder) = self.split_registry.remove(split_id) else { @@ -264,7 +302,7 @@ impl InnerSplitCache { } /// Returns the directory filepath of a split in cache. - fn split_path(&self, split_id: Ulid) -> PathBuf { + fn split_path(&self, split_id: &SplitId) -> PathBuf { let split_file = split_file(split_id); self.split_store_folder.join(split_file) } @@ -274,11 +312,11 @@ impl InnerSplitCache { /// # Panics /// Panics if there are no remaining splits. async fn evict_one_split(&mut self) -> io::Result<()> { - let evicted_split = self + let evicted_split: SplitFolder = self .split_registry .pop_oldest() .expect("split cache should not be empty"); - let result = tokio::fs::remove_dir_all(&self.split_path(evicted_split.split_id)).await; + let result = tokio::fs::remove_dir_all(&self.split_path(&evicted_split.split_id)).await; if let Err(io_err) = result { if io_err.kind() == io::ErrorKind::NotFound { // This could happen if some files have been manually deleted @@ -299,17 +337,17 @@ impl InnerSplitCache { /// If the cache capacity does not allow it returns Ok(false). /// /// Ok(true) means the file was effectively accepted. - async fn move_into_cache(&mut self, split_id_str: &str, split_path: &Path) -> io::Result { - let split_folder = SplitFolder::create(split_id_str, split_path).await?; - let split_id = split_folder.split_id; + async fn move_into_cache(&mut self, split_id: SplitId, split_path: &Path) -> io::Result { + let split_folder = SplitFolder::create(split_id, split_path).await?; + let split_id = split_folder.split_id.clone(); let should_move_split = self.make_room_and_record_split(split_folder).await?; if !should_move_split { return Ok(false); } - let to_full_path = self.split_path(split_id); + let to_full_path = self.split_path(&split_id); if let Err(io_err) = tokio::fs::rename(split_path, &to_full_path).await { // keep the registry stats accurate - self.split_registry.remove(split_id); + self.split_registry.remove(&split_id); return Err(io_err); } Ok(true) @@ -317,8 +355,8 @@ impl InnerSplitCache { /// Removes all splits that have a creation date older than `limit`. async fn remove_splits_older_than_limit(&mut self, limit: SystemTime) -> io::Result<()> { - while let Some(split_id) = self.split_registry.oldest_split() { - if split_id.datetime() >= limit { + while let Some(oldest_split) = self.split_registry.oldest_split() { + if oldest_split.created_at >= limit { break; } self.evict_one_split().await?; @@ -343,7 +381,7 @@ impl InnerSplitCache { self.evict_one_split().await?; } - if let Some(creation_time_limit) = split_folder.creation_time().checked_sub(SPLIT_MAX_AGE) { + if let Some(creation_time_limit) = split_folder.created_at.checked_sub(SPLIT_MAX_AGE) { self.remove_splits_older_than_limit(creation_time_limit) .await?; }; @@ -412,16 +450,16 @@ impl IndexingSplitCache { split_registry: SplitFolderRegistry::with_quota(space_quota), }; - split_folders.sort_by_key(SplitFolder::creation_time); + split_folders.sort_by_key(|split_folder| split_folder.created_at); // We record all `split_folder`, sorted by `creation_time`. for split_folder in split_folders { - let split_id = split_folder.split_id; + let split_id = split_folder.split_id.clone(); if !inner_local_split_store .make_room_and_record_split(split_folder) .await? { - let split_dir = inner_local_split_store.split_path(split_id); + let split_dir = inner_local_split_store.split_path(&split_id); tokio::fs::remove_dir_all(&split_dir).await?; } } @@ -432,13 +470,13 @@ impl IndexingSplitCache { } #[cfg(any(test, feature = "testsuite"))] - pub async fn inspect_registry(&self) -> std::collections::HashMap { + pub async fn inspect_registry(&self) -> std::collections::HashMap { self.inner .lock() .await .split_registry .iter() - .map(|split_folder| (split_folder.split_id.to_string(), split_folder.num_bytes)) + .map(|split_folder| (split_folder.split_id.clone(), split_folder.num_bytes)) .collect() } @@ -462,20 +500,14 @@ impl IndexingSplitCache { /// storage. pub(super) async fn get_cached_split( &self, - split_id: &str, + split_id: &SplitId, output_dir_path: &Path, ) -> StorageResult> { let mut split_store_lock = self.inner.lock().await; - let split_ulid = if let Ok(split_ulid) = Ulid::from_str(split_id) { - split_ulid - } else { - return Ok(None); - }; - let split_file_opt: Option = split_store_lock - .move_out(split_ulid, output_dir_path) - .await?; + let split_file_opt: Option = + split_store_lock.move_out(split_id, output_dir_path).await?; if split_file_opt.is_none() { - debug!(split_id, "split folder not in cache"); + debug!(split_id=%split_id, "split folder not in cache"); } Ok(split_file_opt) } @@ -490,7 +522,7 @@ impl IndexingSplitCache { /// Ok(true) means the file was effectively accepted. pub(super) async fn move_into_cache( &self, - split_id: &str, + split_id: SplitId, split_path: &Path, ) -> io::Result { assert!(split_path.is_dir()); @@ -505,7 +537,7 @@ mod tests { use std::io; use std::io::Write; use std::path::Path; - use std::time::Duration; + use std::time::{Duration, SystemTime}; use bytesize::ByteSize; use quickwit_directories::BundleDirectory; @@ -516,9 +548,32 @@ mod tests { use tokio::fs; use ulid::Ulid; - use super::SPLIT_MAX_AGE; + use super::{SPLIT_MAX_AGE, SplitId, split_creation_time}; use crate::split_store::{IndexingSplitCache, SplitStoreQuota}; + #[test] + fn test_split_creation_time() { + let ulid_str = "01GF5449X7DA53TK9F9W2ZJST2"; + let expected = Ulid::from_string(ulid_str).unwrap().datetime(); + + // A bare ULID split id (no prefix) resolves to that ULID's creation time. + assert_eq!(split_creation_time(&SplitId::from(ulid_str)), expected); + + // A split id carrying a random prefix still resolves to its trailing ULID's time. + let prefixed_split_id = SplitId::from(format!("deadbeef{ulid_str}")); + assert_eq!(split_creation_time(&prefixed_split_id), expected); + + // An id whose trailing 26 chars are not a valid ULID (or that is shorter than a ULID) + // falls back to ~now, so a malformed split is treated as freshly created rather than as + // the first eviction candidate. + let before = SystemTime::now(); + let invalid = split_creation_time(&SplitId::from("this-is-not-a-valid-ulid!!")); + let short = split_creation_time(&SplitId::from("short")); + let after = SystemTime::now(); + assert!(invalid >= before && invalid <= after); + assert!(short >= before && short <= after); + } + async fn create_fake_split( split_cache_path: &Path, split_id: &str, @@ -627,7 +682,10 @@ mod tests { .await .unwrap(); local_split_store - .move_into_cache("01GFCZJBMBMEPMAQSFD09VTST2", extra_split.path()) + .move_into_cache( + SplitId::from("01GFCZJBMBMEPMAQSFD09VTST2"), + extra_split.path(), + ) .await .unwrap(); assert_eq!(local_split_store.inspect_registry().await.len(), 1); @@ -672,7 +730,10 @@ mod tests { let extra_split = tempdir().unwrap(); local_split_store // 2022-10-15T4:48:49.803Z - .move_into_cache("01GFCZJBMBMEPMAQSFD09VTST2", extra_split.path()) + .move_into_cache( + SplitId::from("01GFCZJBMBMEPMAQSFD09VTST2"), + extra_split.path(), + ) .await .unwrap(); let cache_content = local_split_store.inspect_registry().await; @@ -685,7 +746,10 @@ mod tests { let extra_split = tempdir().unwrap(); let was_accepted = local_split_store // 2025-01-13T14:28:17.364Z - .move_into_cache("01JHG11FAM8F2XPWHY24R3HF6M", extra_split.path()) + .move_into_cache( + SplitId::from("01JHG11FAM8F2XPWHY24R3HF6M"), + extra_split.path(), + ) .await .unwrap(); assert!(was_accepted); @@ -722,7 +786,7 @@ mod tests { #[tokio::test] async fn test_store_and_fetch() { let temp_dir_in = tempfile::tempdir().unwrap(); - let split_id = Ulid::default().to_string(); + let split_id: SplitId = Ulid::default().to_string().into(); let cache_dir = tempfile::tempdir().unwrap(); let quota = SplitStoreQuota::default(); let local_store = IndexingSplitCache::open(cache_dir.path().to_path_buf(), quota) @@ -733,7 +797,7 @@ mod tests { tokio::fs::create_dir(&split_dir).await.unwrap(); assert!( local_store - .move_into_cache(&split_id, &split_dir) + .move_into_cache(split_id.clone(), &split_dir) .await .unwrap() ); @@ -787,7 +851,10 @@ mod tests { let target_dir = tempdir().unwrap(); let path_opt = local_split_store - .get_cached_split("01GF5215TMV48JT7GZ543BV193", target_dir.path()) + .get_cached_split( + &SplitId::from("01GF5215TMV48JT7GZ543BV193"), + target_dir.path(), + ) .await .unwrap(); assert_eq!(path_opt, None); @@ -815,7 +882,10 @@ mod tests { let extra_split = tempdir().unwrap(); let was_accepted = local_split_store // 2022-10-12T02:14:54.347Z - .move_into_cache("01GF4ZJBMBMEPMAQSFD09VTST2", extra_split.path()) + .move_into_cache( + SplitId::from("01GF4ZJBMBMEPMAQSFD09VTST2"), + extra_split.path(), + ) .await .unwrap(); assert!(was_accepted); @@ -843,7 +913,7 @@ mod tests { extra_split_file.write_all(&[0u8; 15]).unwrap(); let was_accepted = split_store - .move_into_cache(split_id, extra_split.path()) + .move_into_cache(split_id.into(), extra_split.path()) .await .unwrap(); assert!(!was_accepted); diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs index 88904cbd6dd..a881b5dc693 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs @@ -24,6 +24,7 @@ use bytesize::ByteSize; use quickwit_common::io::{IoControls, IoControlsAccess}; use quickwit_common::uri::Uri; use quickwit_metastore::SplitMetadata; +use quickwit_proto::types::SplitId; use quickwit_storage::{PutPayload, Storage, StorageResult}; use tantivy::Directory; use tantivy::directory::{Advice, MmapDirectory}; @@ -93,7 +94,7 @@ impl IndexingSplitStore { self.inner.remote_storage.uri() } - fn split_path(&self, split_id: &str) -> PathBuf { + fn split_path(&self, split_id: &SplitId) -> PathBuf { PathBuf::from(quickwit_common::split_file(split_id)) } @@ -149,7 +150,7 @@ impl IndexingSplitStore { if self .inner .split_cache - .move_into_cache(split.split_id(), split_folder_path) + .move_into_cache(split.split_id().clone(), split_folder_path) .await? { return Ok(()); @@ -178,15 +179,15 @@ impl IndexingSplitStore { #[instrument(skip(self, output_dir_path, io_controls), fields(cache_hit))] pub async fn fetch_and_open_split( &self, - split_id: &str, + split_id: SplitId, output_dir_path: &Path, io_controls: &IoControls, ) -> StorageResult> { - let path = PathBuf::from(quickwit_common::split_file(split_id)); + let path = PathBuf::from(quickwit_common::split_file(&split_id)); if let Some(split_path) = self .inner .split_cache - .get_cached_split(split_id, output_dir_path) + .get_cached_split(&split_id, output_dir_path) .await? { tracing::Span::current().record("cache_hit", true); @@ -211,7 +212,7 @@ impl IndexingSplitStore { /// Takes a snapshot of the cache view (only used for testing). #[cfg(any(test, feature = "testsuite"))] - pub async fn inspect_split_cache(&self) -> HashMap { + pub async fn inspect_split_cache(&self) -> HashMap { self.inner.split_cache.inspect_registry().await } } @@ -224,18 +225,18 @@ mod tests { use bytesize::ByteSize; use quickwit_common::io::IoControls; use quickwit_metastore::{SplitMaturity, SplitMetadata}; + use quickwit_proto::types::SplitId; use quickwit_storage::{PutPayload, RamStorage, SplitPayloadBuilder}; use tempfile::tempdir; use time::OffsetDateTime; use tokio::fs; - use ulid::Ulid; use super::IndexingSplitStore; use crate::split_store::{IndexingSplitCache, SplitStoreQuota}; - fn create_test_split_metadata(split_id: &str) -> SplitMetadata { + fn create_test_split_metadata(split_id: &SplitId) -> SplitMetadata { SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.clone(), create_timestamp: OffsetDateTime::now_utc().unix_timestamp(), maturity: SplitMaturity::Immature { maturation_period: Duration::from_secs(3600), @@ -257,8 +258,8 @@ mod tests { let remote_storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::new(remote_storage, Arc::new(split_cache)); - let split_id1 = Ulid::new().to_string(); - let split_id2 = Ulid::new().to_string(); + let split_id1: SplitId = SplitId::new(); + let split_id2: SplitId = SplitId::new(); { let split1_dir = temp_dir.path().join(&split_id1); @@ -314,7 +315,7 @@ mod tests { { let output = tempfile::tempdir()?; let split1 = split_store - .fetch_and_open_split(&split_id1, output.path(), &io_controls) + .fetch_and_open_split(split_id1.clone(), output.path(), &io_controls) .await?; let local_store_stats = split_store.inspect_split_cache().await; assert_eq!(local_store_stats.len(), 1); @@ -323,7 +324,7 @@ mod tests { { let output = tempfile::tempdir()?; let split2 = split_store - .fetch_and_open_split(&split_id2, output.path(), &io_controls) + .fetch_and_open_split(split_id2.clone(), output.path(), &io_controls) .await?; let local_store_stats = split_store.inspect_split_cache().await; assert_eq!(local_store_stats.len(), 0); @@ -347,9 +348,9 @@ mod tests { let remote_storage = Arc::new(RamStorage::default()); let split_store = IndexingSplitStore::new(remote_storage, Arc::new(split_cache)); - let split_id1 = Ulid::new().to_string(); + let split_id1 = SplitId::new(); let split_payload1 = SplitPayloadBuilder::get_split_payload(&[], &[], &[5, 5, 5])?; - let split_id2 = Ulid::new().to_string(); + let split_id2 = SplitId::new(); let split_payload2 = SplitPayloadBuilder::get_split_payload(&[], &[], &[5, 5, 5, 5])?; { @@ -410,7 +411,7 @@ mod tests { // get from remote storage because split_id1 was evicted by split_id2 let output = tempfile::tempdir()?; let _split1 = split_store - .fetch_and_open_split(&split_id1, output.path(), &io_controls) + .fetch_and_open_split(split_id1.clone(), output.path(), &io_controls) .await?; assert_eq!(io_controls.num_bytes(), split_payload1.len()); } @@ -418,7 +419,7 @@ mod tests { // get from cache let output = tempfile::tempdir()?; let _split2 = split_store - .fetch_and_open_split(&split_id2, output.path(), &io_controls) + .fetch_and_open_split(split_id2.clone(), output.path(), &io_controls) .await?; // the number of downloaded by didn't change (still the size of split_payload1) assert_eq!(io_controls.num_bytes(), split_payload1.len()); @@ -427,7 +428,7 @@ mod tests { // get from remote because getting from cache removes the split from the cache let output = tempfile::tempdir()?; let _split2 = split_store - .fetch_and_open_split(&split_id2, output.path(), &io_controls) + .fetch_and_open_split(split_id2.clone(), output.path(), &io_controls) .await?; assert_eq!( io_controls.num_bytes(), diff --git a/quickwit/quickwit-indexing/src/test_utils.rs b/quickwit/quickwit-indexing/src/test_utils.rs index 42014752a7c..7f5dfcfc56d 100644 --- a/quickwit/quickwit-indexing/src/test_utils.rs +++ b/quickwit/quickwit-indexing/src/test_utils.rs @@ -280,7 +280,7 @@ pub fn mock_split(split_id: &str) -> Split { pub fn mock_split_meta(split_id: &str, index_uid: &IndexUid) -> SplitMetadata { SplitMetadata { index_uid: index_uid.clone(), - split_id: split_id.to_string(), + split_id: split_id.into(), partition_id: 13u64, num_docs: if split_id == "split1" { 1_000_000 } else { 10 }, uncompressed_docs_size_in_bytes: 256, diff --git a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs index f293d938410..6937fb3f517 100644 --- a/quickwit/quickwit-janitor/src/actors/garbage_collector.rs +++ b/quickwit/quickwit-janitor/src/actors/garbage_collector.rs @@ -377,7 +377,7 @@ mod tests { .iter() .map(|split_id| Split { split_metadata: SplitMetadata { - split_id: split_id.to_string(), + split_id: (*split_id).into(), index_uid: IndexUid::for_test(index_id, 0), footer_offsets: 5..20, ..Default::default() diff --git a/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs b/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs index 3b79f5e489b..da43bb7f371 100644 --- a/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs +++ b/quickwit/quickwit-janitor/src/actors/retention_policy_executor.rs @@ -343,7 +343,7 @@ mod tests { fn make_split(split_id: &str, time_range: Option>) -> Split { Split { split_metadata: SplitMetadata { - split_id: split_id.to_string(), + split_id: split_id.into(), footer_offsets: 5..20, time_range, ..Default::default() diff --git a/quickwit/quickwit-janitor/src/retention_policy_execution.rs b/quickwit/quickwit-janitor/src/retention_policy_execution.rs index bf06dd1b032..97109cec427 100644 --- a/quickwit/quickwit-janitor/src/retention_policy_execution.rs +++ b/quickwit/quickwit-janitor/src/retention_policy_execution.rs @@ -63,9 +63,9 @@ pub async fn run_execute_retention_policy( .partition(|split_metadata| split_metadata.time_range.is_some()); if !ignored_splits.is_empty() { - let ignored_split_ids: Vec = ignored_splits - .into_iter() - .map(|split_metadata| split_metadata.split_id) + let ignored_split_ids: Vec<&str> = ignored_splits + .iter() + .map(|split_metadata| split_metadata.split_id().as_str()) .collect(); warn!( index_id=%index_uid.index_id, @@ -80,7 +80,7 @@ pub async fn run_execute_retention_policy( // Mark the expired splits for deletion. let expired_split_ids: Vec = expired_splits .iter() - .map(|split_metadata| split_metadata.split_id.to_string()) + .map(|split_metadata| split_metadata.split_id.clone()) .collect(); info!( index_id=%index_uid.index_id, diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs index 7bb73f71e52..a1d204adc63 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/file_backed_index/mod.rs @@ -232,9 +232,9 @@ impl FileBackedIndex { .map(|delete_task| delete_task.opstamp) .max() .unwrap_or(0) as usize; - let splits = splits + let splits: HashMap = splits .into_iter() - .map(|split| (split.split_id().to_string(), split)) + .map(|split| (split.split_id().clone(), split)) .collect(); let metrics_splits = metrics_splits .into_iter() @@ -330,7 +330,7 @@ impl FileBackedIndex { publish_timestamp: None, split_metadata, }; - self.splits.insert(split.split_id().to_string(), split); + self.splits.insert(split.split_id().clone(), split); Ok(()) } @@ -1331,7 +1331,7 @@ mod tests { [ Split { split_metadata: SplitMetadata { - split_id: "split-1".to_string(), + split_id: "split-1".into(), delete_opstamp: 9, time_range: Some(32..=40), tags: BTreeSet::from(["tag-1".to_string()]), @@ -1345,7 +1345,7 @@ mod tests { }, Split { split_metadata: SplitMetadata { - split_id: "split-2".to_string(), + split_id: "split-2".into(), delete_opstamp: 4, time_range: None, tags: BTreeSet::from(["tag-2".to_string(), "tag-3".to_string()]), @@ -1359,7 +1359,7 @@ mod tests { }, Split { split_metadata: SplitMetadata { - split_id: "split-3".to_string(), + split_id: "split-3".into(), delete_opstamp: 0, time_range: Some(0..=90), tags: BTreeSet::from(["tag-2".to_string(), "tag-4".to_string()]), diff --git a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs index aa5e90f466f..3939d207563 100644 --- a/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs +++ b/quickwit/quickwit-metastore/src/metastore/file_backed/mod.rs @@ -1834,7 +1834,7 @@ mod tests { let split_id = "split-one"; let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: split_id.to_string(), + split_id: split_id.into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(RangeInclusive::new(0, 99)), @@ -1936,7 +1936,7 @@ mod tests { let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: "split1".to_string(), + split_id: "split1".into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(0..=99), @@ -1999,7 +1999,7 @@ mod tests { let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: "split1".to_string(), + split_id: "split1".into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(0..=99), @@ -2058,7 +2058,7 @@ mod tests { async move { let split_metadata = SplitMetadata { footer_offsets: 1000..2000, - split_id: format!("split-{i}"), + split_id: format!("split-{i}").into(), num_docs: 1, uncompressed_docs_size_in_bytes: 2, time_range: Some(RangeInclusive::new(0, 99)), diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs index 43bd995e1ba..de080bfcc4e 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/metastore.rs @@ -625,7 +625,7 @@ impl MetastoreService for PostgresqlMetastore { let tags: Vec = split_metadata.tags.into_iter().collect(); tags_list.push(sqlx::types::Json(tags)); - split_ids.push(split_metadata.split_id); + split_ids.push(split_metadata.split_id.to_string()); delete_opstamps.push(split_metadata.delete_opstamp as i64); node_ids.push(split_metadata.node_id); } @@ -3375,7 +3375,7 @@ mod tests { let query = ListSplitsQuery::for_index(index_uid.clone()).after_split(&crate::SplitMetadata { index_uid: index_uid.clone(), - split_id: "my_split".to_string(), + split_id: "my_split".into(), ..Default::default() }); append_query_filters_and_order_by(sql, &query); diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/model.rs b/quickwit/quickwit-metastore/src/metastore/postgres/model.rs index 117f37047d9..8aac26a8c7e 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/model.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/model.rs @@ -19,7 +19,7 @@ use std::str::FromStr; use quickwit_proto::ingest::{Shard, ShardState}; use quickwit_proto::metastore::{DeleteQuery, DeleteTask, MetastoreError, MetastoreResult}; -use quickwit_proto::types::{DocMappingUid, IndexId, IndexUid, ShardId, SourceId, SplitId}; +use quickwit_proto::types::{DocMappingUid, IndexId, IndexUid, ShardId, SourceId}; use sea_query::{Iden, Write}; use tracing::error; @@ -101,8 +101,9 @@ impl Iden for ToTimestampFunc { /// A model structure for handling split metadata in a database. #[derive(sqlx::FromRow)] pub(super) struct PgSplit { - /// Split ID. - pub split_id: SplitId, + /// It is only used for logging here (the authoritative split id lives in + /// `split_metadata_json`). + pub split_id: String, /// The state of the split. With `update_timestamp`, this is the only mutable attribute of the /// split. pub split_state: String, diff --git a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs index 28519b8a294..153096cc0b4 100644 --- a/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs +++ b/quickwit/quickwit-metastore/src/metastore/postgres/utils.rs @@ -198,7 +198,10 @@ pub(super) fn append_query_filters_and_order_by( Expr::col(Splits::IndexUid).into(), Expr::col(Splits::SplitId).into(), ]) - .gt(Expr::tuple([Expr::value(index_uid), Expr::value(split_id)])), + .gt(Expr::tuple([ + Expr::value(index_uid), + Expr::value(split_id.as_str()), + ])), ); } diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index fe88fe379d3..f3b6e868b7e 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -47,7 +47,7 @@ pub struct Split { impl Split { /// Returns the split_id. - pub fn split_id(&self) -> &str { + pub fn split_id(&self) -> &SplitId { &self.split_metadata.split_id } } @@ -201,7 +201,7 @@ impl SplitMetadata { } /// Returns the split_id. - pub fn split_id(&self) -> &str { + pub fn split_id(&self) -> &SplitId { &self.split_id } @@ -264,7 +264,7 @@ pub struct SplitInfo { impl quickwit_config::TestableForRegression for SplitMetadata { fn sample_for_regression() -> Self { SplitMetadata { - split_id: "split".to_string(), + split_id: "split".into(), index_uid: IndexUid::for_test("my-index", 1), source_id: "source".to_string(), node_id: "node".to_string(), @@ -398,7 +398,7 @@ mod tests { #[test] fn test_split_metadata_debug() { let split_metadata = SplitMetadata { - split_id: "split-1".to_string(), + split_id: "split-1".into(), index_uid: IndexUid::for_test("00000000-0000-0000-0000-000000000000", 0), partition_id: 0, source_id: "source-1".to_string(), diff --git a/quickwit/quickwit-metastore/src/tests/index.rs b/quickwit/quickwit-metastore/src/tests/index.rs index c093c7b1a6c..de991a89dca 100644 --- a/quickwit/quickwit-metastore/src/tests/index.rs +++ b/quickwit/quickwit-metastore/src/tests/index.rs @@ -800,7 +800,7 @@ pub async fn test_metastore_delete_index< let split_id = format!("{index_id}--split"); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -844,7 +844,7 @@ pub async fn test_metastore_list_index_stats< let split_id_1 = format!("{index_id_1}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), footer_offsets: 0..2048, ..Default::default() @@ -852,7 +852,7 @@ pub async fn test_metastore_list_index_stats< let split_id_2 = format!("{index_id_1}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_1.clone(), footer_offsets: 0..2048, ..Default::default() @@ -860,7 +860,7 @@ pub async fn test_metastore_list_index_stats< let split_id_3 = format!("{index_id_1}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_2.clone(), footer_offsets: 0..1000, ..Default::default() @@ -884,7 +884,7 @@ pub async fn test_metastore_list_index_stats< let publish_splits_request = PublishSplitsRequest { index_uid: Some(index_uid_1.clone()), - staged_split_ids: vec![split_id_1.clone(), split_id_2.clone()], + staged_split_ids: vec![split_id_1, split_id_2], ..Default::default() }; metastore diff --git a/quickwit/quickwit-metastore/src/tests/list_splits.rs b/quickwit/quickwit-metastore/src/tests/list_splits.rs index 01145612dd0..3f95b5172fa 100644 --- a/quickwit/quickwit-metastore/src/tests/list_splits.rs +++ b/quickwit/quickwit-metastore/src/tests/list_splits.rs @@ -33,7 +33,7 @@ use crate::metastore::MetastoreServiceStreamSplitsExt; use crate::tests::{cleanup_index, collect_split_ids}; use crate::{ CreateIndexRequestExt, ListSplitsQuery, ListSplitsRequestExt, ListSplitsResponseExt, - MetastoreServiceExt, SplitMaturity, SplitMetadata, SplitState, StageSplitsRequestExt, + MetastoreServiceExt, Split, SplitMaturity, SplitMetadata, SplitState, StageSplitsRequestExt, }; pub async fn test_metastore_list_all_splits< @@ -48,37 +48,37 @@ pub async fn test_metastore_list_all_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_4 = format!("{index_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_5 = format!("{index_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; let split_id_6 = format!("{index_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -180,16 +180,16 @@ pub async fn test_metastore_stream_splits4}"); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; split_metadatas_to_create.push(split_metadata); if split_idx > 0 && split_idx % 100 == 0 { - let staged_split_ids: Vec = split_metadatas_to_create + let staged_split_ids: Vec = split_metadatas_to_create .iter() - .map(|split_metadata| split_metadata.split_id.clone()) + .map(|split_metadata| split_metadata.split_id.to_string()) .collect(); let stage_splits_request = StageSplitsRequest::try_from_splits_metadata( index_uid.clone(), @@ -226,10 +226,13 @@ pub async fn test_metastore_stream_splits = splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .sorted() .collect(); assert_eq!(split_ids, &[&split_id_1, &split_id_5]); @@ -726,7 +729,7 @@ pub async fn test_metastore_list_splits = splits + let split_ids: Vec<&SplitId> = splits .iter() .map(|split| &split.split_metadata.split_id) .sorted() @@ -914,7 +917,7 @@ pub async fn test_metastore_list_splits_by_node_id< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 20, @@ -923,7 +926,7 @@ pub async fn test_metastore_list_splits_by_node_id< }; let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 10, @@ -968,7 +971,7 @@ pub async fn test_metastore_list_stale_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 20, @@ -976,7 +979,7 @@ pub async fn test_metastore_list_stale_splits< }; let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 10, @@ -984,7 +987,7 @@ pub async fn test_metastore_list_stale_splits< }; let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 0, @@ -992,7 +995,7 @@ pub async fn test_metastore_list_stale_splits< }; let split_id_4 = format!("{index_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, delete_opstamp: 20, @@ -1001,7 +1004,7 @@ pub async fn test_metastore_list_stale_splits< // immature split let split_id_5 = format!("{index_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, maturity: SplitMaturity::Immature { @@ -1100,7 +1103,7 @@ pub async fn test_metastore_list_stale_splits< delete_opstamp: 100, num_splits: 4, }; - let splits = metastore + let splits: Vec = metastore .list_stale_splits(list_stale_splits_request) .await .unwrap() @@ -1169,42 +1172,42 @@ pub async fn test_metastore_list_sorted_splits< let split_id_1 = format!("{split_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), delete_opstamp: 5, ..Default::default() }; let split_id_2 = format!("{split_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_2.clone(), delete_opstamp: 3, ..Default::default() }; let split_id_3 = format!("{split_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_1.clone(), delete_opstamp: 1, ..Default::default() }; let split_id_4 = format!("{split_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid_2.clone(), delete_opstamp: 0, ..Default::default() }; let split_id_5 = format!("{split_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid_1.clone(), delete_opstamp: 2, ..Default::default() }; let split_id_6 = format!("{split_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid_2.clone(), delete_opstamp: 4, ..Default::default() @@ -1287,9 +1290,9 @@ pub async fn test_metastore_list_sorted_splits< .await .unwrap(); // we don't use collect_split_ids because it sorts splits internally - let split_ids = splits + let split_ids: Vec<&str> = splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .collect::>(); assert_eq!( split_ids, @@ -1315,9 +1318,9 @@ pub async fn test_metastore_list_sorted_splits< .await .unwrap(); // we don't use collect_split_ids because it sorts splits internally - let split_ids = splits + let split_ids: Vec<&str> = splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .collect::>(); assert_eq!( split_ids, @@ -1366,37 +1369,37 @@ pub async fn test_metastore_list_after_split< let split_id_1 = format!("{split_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_2 = format!("{split_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_3 = format!("{split_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_4 = format!("{split_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_5 = format!("{split_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_6 = format!("{split_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; @@ -1535,37 +1538,37 @@ pub async fn test_metastore_list_splits_from_all_indexes< let split_id_1 = format!("{split_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_2 = format!("{split_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_3 = format!("{split_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_4 = format!("{split_id}--split-4"); let split_metadata_4 = SplitMetadata { - split_id: split_id_4.clone(), + split_id: split_id_4.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; let split_id_5 = format!("{split_id}--split-5"); let split_metadata_5 = SplitMetadata { - split_id: split_id_5.clone(), + split_id: split_id_5.clone().into(), index_uid: index_uid_1.clone(), ..Default::default() }; let split_id_6 = format!("{split_id}--split-6"); let split_metadata_6 = SplitMetadata { - split_id: split_id_6.clone(), + split_id: split_id_6.clone().into(), index_uid: index_uid_2.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-metastore/src/tests/mod.rs b/quickwit/quickwit-metastore/src/tests/mod.rs index 377c763a606..deda34fac5d 100644 --- a/quickwit/quickwit-metastore/src/tests/mod.rs +++ b/quickwit/quickwit-metastore/src/tests/mod.rs @@ -113,7 +113,7 @@ async fn create_channel(client: tokio::io::DuplexStream) -> anyhow::Result Vec<&str> { splits .iter() - .map(|split| split.split_id()) + .map(|split| split.split_id().as_str()) .sorted() .collect() } diff --git a/quickwit/quickwit-metastore/src/tests/source.rs b/quickwit/quickwit-metastore/src/tests/source.rs index e67f6bdbb3f..157c3360e75 100644 --- a/quickwit/quickwit-metastore/src/tests/source.rs +++ b/quickwit/quickwit-metastore/src/tests/source.rs @@ -459,7 +459,7 @@ pub async fn test_metastore_reset_checkpoint< .unwrap(); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; diff --git a/quickwit/quickwit-metastore/src/tests/split.rs b/quickwit/quickwit-metastore/src/tests/split.rs index 9e6d45265e3..a66e91a84c4 100644 --- a/quickwit/quickwit-metastore/src/tests/split.rs +++ b/quickwit/quickwit-metastore/src/tests/split.rs @@ -134,7 +134,7 @@ pub async fn test_metastore_publish_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), time_range: Some(0..=99), create_timestamp: current_timestamp, @@ -143,7 +143,7 @@ pub async fn test_metastore_publish_splits< let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), time_range: Some(30..=99), create_timestamp: current_timestamp, @@ -715,7 +715,7 @@ pub async fn test_metastore_publish_splits_concurrency< async move { let split_id = format!("{index_id}--split-{partition_id}"); let split_metadata = SplitMetadata { - split_id: split_id.clone(), + split_id: split_id.clone().into(), index_uid: index_uid.clone(), ..Default::default() }; @@ -784,7 +784,7 @@ pub async fn test_metastore_replace_splits< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), time_range: None, create_timestamp: current_timestamp, @@ -793,7 +793,7 @@ pub async fn test_metastore_replace_splits< let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), time_range: None, create_timestamp: current_timestamp, @@ -802,7 +802,7 @@ pub async fn test_metastore_replace_splits< let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), time_range: None, create_timestamp: current_timestamp, @@ -1135,7 +1135,7 @@ pub async fn test_metastore_mark_splits_for_deletion< "index-not-found:00000000000000000000000000" .parse() .unwrap(), - Vec::new(), + Vec::::new(), ); let error = metastore .mark_splits_for_deletion(mark_splits_for_deletion_request) @@ -1155,7 +1155,7 @@ pub async fn test_metastore_mark_splits_for_deletion< let split_id_1 = format!("{index_id}--split-1"); let split_metadata_1 = SplitMetadata { - split_id: split_id_1.clone(), + split_id: split_id_1.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, ..Default::default() @@ -1166,7 +1166,7 @@ pub async fn test_metastore_mark_splits_for_deletion< let split_id_2 = format!("{index_id}--split-2"); let split_metadata_2 = SplitMetadata { - split_id: split_id_2.clone(), + split_id: split_id_2.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, ..Default::default() @@ -1186,7 +1186,7 @@ pub async fn test_metastore_mark_splits_for_deletion< let split_id_3 = format!("{index_id}--split-3"); let split_metadata_3 = SplitMetadata { - split_id: split_id_3.clone(), + split_id: split_id_3.clone().into(), index_uid: index_uid.clone(), create_timestamp: current_timestamp, ..Default::default() @@ -1228,7 +1228,7 @@ pub async fn test_metastore_mark_splits_for_deletion< .unwrap(); assert_eq!(marked_splits.len(), 1); - assert_eq!(marked_splits[0].split_id(), split_id_3); + assert_eq!(marked_splits[0].split_id().as_str(), split_id_3); let split_3_update_timestamp = marked_splits[0].update_timestamp; assert!(current_timestamp < split_3_update_timestamp); @@ -1267,13 +1267,13 @@ pub async fn test_metastore_mark_splits_for_deletion< assert_eq!(marked_splits.len(), 3); - assert_eq!(marked_splits[0].split_id(), split_id_1); + assert_eq!(marked_splits[0].split_id().as_str(), split_id_1); assert!(current_timestamp + 2 <= marked_splits[0].update_timestamp); - assert_eq!(marked_splits[1].split_id(), split_id_2); + assert_eq!(marked_splits[1].split_id().as_str(), split_id_2); assert!(current_timestamp + 2 <= marked_splits[1].update_timestamp); - assert_eq!(marked_splits[2].split_id(), split_id_3); + assert_eq!(marked_splits[2].split_id().as_str(), split_id_3); assert_eq!(marked_splits[2].update_timestamp, split_3_update_timestamp); cleanup_index(&mut metastore, index_uid).await; @@ -1335,7 +1335,7 @@ pub async fn test_metastore_delete_splits(&self, this: &T, other: &T) -> Ordering { + pub fn compare(&self, this: &T, other: &T) -> Ordering { if self == &search::SortOrder::Desc { this.cmp(other) } else { diff --git a/quickwit/quickwit-proto/src/metastore/mod.rs b/quickwit/quickwit-proto/src/metastore/mod.rs index 4f53b9abb5c..47cd4b50580 100644 --- a/quickwit/quickwit-proto/src/metastore/mod.rs +++ b/quickwit/quickwit-proto/src/metastore/mod.rs @@ -19,7 +19,7 @@ use quickwit_common::retry::Retryable; use quickwit_common::tower::{MakeLoadShedError, TimeoutExceeded}; use serde::{Deserialize, Serialize}; -use crate::types::{IndexId, IndexUid, QueueId, SourceId, SplitId}; +use crate::types::{IndexId, IndexUid, QueueId, SourceId}; use crate::{GrpcServiceError, ServiceError, ServiceErrorCode}; pub mod events; @@ -67,7 +67,7 @@ pub enum EntityKind { /// A split. Split { /// Split ID. - split_id: SplitId, + split_id: String, }, /// A set of splits. Splits { @@ -371,10 +371,13 @@ impl IndexMetadataRequest { } impl MarkSplitsForDeletionRequest { - pub fn new(index_uid: IndexUid, split_ids: Vec) -> Self { + pub fn new( + index_uid: IndexUid, + split_ids: impl IntoIterator>, + ) -> Self { Self { index_uid: index_uid.into(), - split_ids, + split_ids: split_ids.into_iter().map(Into::into).collect(), } } } diff --git a/quickwit/quickwit-proto/src/types/mod.rs b/quickwit/quickwit-proto/src/types/mod.rs index 7fea3fae4f8..a7e5f8c23a9 100644 --- a/quickwit/quickwit-proto/src/types/mod.rs +++ b/quickwit/quickwit-proto/src/types/mod.rs @@ -28,6 +28,7 @@ mod index_uid; mod pipeline_uid; mod position; mod shard_id; +mod split_id; pub use doc_mapping_uid::DocMappingUid; pub use doc_uid::{DocUid, DocUidGenerator}; @@ -35,6 +36,7 @@ pub use index_uid::IndexUid; pub use pipeline_uid::PipelineUid; pub use position::Position; pub use shard_id::ShardId; +pub use split_id::SplitId; /// The size of an ULID in bytes. Use `ULID_LEN` for the length of Base32 encoded ULID strings. pub(crate) const ULID_SIZE: usize = 16; @@ -43,8 +45,6 @@ pub type IndexId = String; pub type SourceId = String; -pub type SplitId = String; - pub type SubrequestId = u32; /// See the file `ingest.proto` for more details. diff --git a/quickwit/quickwit-proto/src/types/split_id.rs b/quickwit/quickwit-proto/src/types/split_id.rs new file mode 100644 index 00000000000..43ec4b547dc --- /dev/null +++ b/quickwit/quickwit-proto/src/types/split_id.rs @@ -0,0 +1,168 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Borrow; +use std::convert::Infallible; +use std::fmt; +use std::path::Path; +use std::str::FromStr; +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + +/// Identifies a split. +/// +/// Joined with the index URI (`/`), this ID is enough to +/// uniquely identify a split. +/// +/// Split IDs are currently generated as ULIDs (see +/// `quickwit_indexing::new_split_id`), but the rest of the codebase must treat +/// them as **opaque** strings: no part of the system may assume the id is a +/// ULID or extract information (such as a creation timestamp) from it. This +/// indirection is what allows the generator to prepend a random prefix for S3 +/// load distribution without rippling through the codebase. +/// +/// The inner storage is an [`Arc`] so that cloning is cheap (a split id is +/// frequently used as a cache key and copied around the search and indexing +/// pipelines). +#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct SplitId(Arc); + +impl SplitId { + /// Generates a new split id. + pub fn new() -> Self { + SplitId::from(ulid::Ulid::new().to_string()) + } + + /// Returns the id as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl AsRef for SplitId { + fn as_ref(&self) -> &Path { + Path::new(self.as_str()) + } +} + +// `Debug` delegates to the inner string so a split id renders identically to the +// `String` it replaced (e.g. `"01HAV29D4XY3D462FS3D8K5Q2H"`, not +// `SplitId("01HAV29D4XY3D462FS3D8K5Q2H")`). +impl fmt::Debug for SplitId { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self.0, formatter) + } +} + +impl fmt::Display for SplitId { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(&self.0) + } +} + +impl From for SplitId { + fn from(split_id: String) -> Self { + SplitId(split_id.into()) + } +} + +impl From for String { + fn from(split_id: SplitId) -> Self { + split_id.0.as_ref().to_owned() + } +} + +impl From<&SplitId> for String { + fn from(split_id: &SplitId) -> Self { + split_id.0.as_ref().to_owned() + } +} + +impl From<&str> for SplitId { + fn from(split_id: &str) -> Self { + SplitId(Arc::from(split_id)) + } +} + +impl FromStr for SplitId { + // A split id imposes no constraint on its content, so parsing never fails. + type Err = Infallible; + + fn from_str(split_id_str: &str) -> Result { + Ok(SplitId::from(split_id_str)) + } +} + +impl AsRef for SplitId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +// Enables looking up a `HashMap` or `BTreeMap` with a `&str` key. +impl Borrow for SplitId { + fn borrow(&self) -> &str { + &self.0 + } +} + +impl PartialEq for SplitId { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for SplitId { + fn eq(&self, other: &String) -> bool { + self.as_str() == other.as_str() + } +} + +impl PartialEq<&str> for SplitId { + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_id_serde_is_transparent() { + let split_id = SplitId::from("01HAV29D4XY3D462FS3D8K5Q2H"); + let serialized = serde_json::to_string(&split_id).unwrap(); + assert_eq!(serialized, "\"01HAV29D4XY3D462FS3D8K5Q2H\""); + let deserialized: SplitId = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, split_id); + } + + #[test] + fn test_split_id_borrow_as_str() { + use std::collections::HashMap; + + let mut map: HashMap = HashMap::new(); + map.insert(SplitId::from("split-1"), 42); + assert_eq!(map.get("split-1"), Some(&42)); + } + + #[test] + fn test_split_id_ordering_is_lexicographic() { + let split_id_a = SplitId::from("aaa"); + let split_id_b = SplitId::from("bbb"); + assert!(split_id_a < split_id_b); + } +} diff --git a/quickwit/quickwit-query/src/query_ast/cache_node.rs b/quickwit/quickwit-query/src/query_ast/cache_node.rs index 9c449aef4fb..e9a9fc3c907 100644 --- a/quickwit/quickwit-query/src/query_ast/cache_node.rs +++ b/quickwit/quickwit-query/src/query_ast/cache_node.rs @@ -15,7 +15,6 @@ use std::sync::Arc; use bitpacking::{BitPacker, BitPacker1x}; -use quickwit_proto::types::SplitId; use serde::{Deserialize, Serialize}; use super::{BuildTantivyAst, BuildTantivyAstContext, TantivyQueryAst}; @@ -480,9 +479,9 @@ impl crate::query_ast::QueryAstTransformer for PredicateCacheInjector { // we use a trait to dodge circular dependancies with quickwit-storage pub trait PredicateCache: Send + Sync + 'static { - fn get(&self, split_id: SplitId, query_ast_json: String) -> Option<(SegmentId, HitSet)>; + fn get(&self, split_id: String, query_ast_json: String) -> Option<(SegmentId, HitSet)>; - fn put(&self, split_id: SplitId, query_ast_json: String, segment: SegmentId, results: HitSet); + fn put(&self, split_id: String, query_ast_json: String, segment: SegmentId, results: HitSet); } #[cfg(test)] @@ -499,8 +498,8 @@ mod tests { BuildTantivyAstContext, QueryAstTransformer, QueryAstVisitor, TermQuery, }; - impl PredicateCache for Mutex> { - fn get(&self, split_id: SplitId, query_ast_json: String) -> Option<(SegmentId, HitSet)> { + impl PredicateCache for Mutex> { + fn get(&self, split_id: String, query_ast_json: String) -> Option<(SegmentId, HitSet)> { self.lock() .unwrap() .get(&(split_id, query_ast_json)) @@ -509,7 +508,7 @@ mod tests { fn put( &self, - split_id: SplitId, + split_id: String, query_ast_json: String, segment: SegmentId, results: HitSet, diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index daa01f2a4b0..66a0ef56046 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -490,7 +490,7 @@ pub(crate) struct SegmentPartialHit { impl SegmentPartialHit { pub fn into_partial_hit( self, - split_id: SplitId, + split_id: &SplitId, segment_ord: SegmentOrdinal, first: &SortingFieldExtractorComponent, second: &Option, @@ -514,7 +514,7 @@ impl SegmentPartialHit { sort_value: Some(sort_value), }), doc_id: self.doc_id, - split_id, + split_id: split_id.to_string(), segment_ord, } } @@ -670,7 +670,7 @@ impl QuickwitIncrementalAggregations { sort_value: Some(SortValue::I64(timestamp)), }), sort_value2: None, - split_id: SplitId::new(), + split_id: String::new(), segment_ord: 0, doc_id: 0, }); @@ -1069,7 +1069,7 @@ pub(crate) fn make_merge_collector( }; let sort_by = sort_by_from_request(search_request); Ok(QuickwitCollector { - split_id: SplitId::default(), + split_id: SplitId::from(""), start_offset: search_request.start_offset as usize, max_hits: search_request.max_hits as usize, sort_by, @@ -1320,6 +1320,7 @@ mod tests { LeafResourceStats, LeafSearchResponse, PartialHit, SearchRequest, SortByValue, SortField, SortOrder, SortValue, SplitResourceStats, SplitSearchError, }; + use quickwit_proto::types::SplitId; use tantivy::TantivyDocument; use tantivy::aggregation::agg_req::Aggregations; use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults; @@ -1594,7 +1595,7 @@ mod tests { // Check increasing slice sizes of the dataset for slice_len in 0..dataset.len() { let collector = super::make_collector_for_split( - "fake_split_id".to_string(), + SplitId::from("fake_split_id"), &make_request(slice_len as u64, sort_str), Default::default(), ) @@ -1690,7 +1691,7 @@ mod tests { ..SearchRequest::default() }; let collector = super::make_collector_for_split( - "fake_split_id".to_string(), + SplitId::from("fake_split_id"), &request, Default::default(), ) @@ -1728,7 +1729,7 @@ mod tests { }; let collector = super::make_collector_for_split( - "fake_split_id1".to_string(), + SplitId::from("fake_split_id1"), &request, Default::default(), ) @@ -1742,7 +1743,7 @@ mod tests { assert_eq!(res.partial_hits.len(), dataset.len()); let collector = super::make_collector_for_split( - "fake_split_id2".to_string(), + SplitId::from("fake_split_id2"), &request, Default::default(), ) @@ -1755,7 +1756,7 @@ mod tests { assert_eq!(res.partial_hits.len(), 5); let collector = super::make_collector_for_split( - "fake_split_id3".to_string(), + SplitId::from("fake_split_id3"), &request, Default::default(), ) diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index c752afc8999..5be249011ce 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -37,6 +37,7 @@ use quickwit_proto::search::{ CountHits, LeafResourceStats, LeafSearchRequest, LeafSearchResponse, PartialHit, SearchRequest, SortOrder, SortValue, SplitIdAndFooterOffsets, SplitResourceStats, SplitSearchError, }; +use quickwit_proto::types::SplitId; use quickwit_query::query_ast::{ BoolQuery, CacheNode, QueryAst, QueryAstTransformer, RangeQuery, TermQuery, }; @@ -686,8 +687,11 @@ async fn leaf_search_single_split( limits: ctx.searcher_context.get_aggregation_limits(), tokenizers: ctx.doc_mapper.tokenizer_manager().tantivy_manager().clone(), }; - let mut collector = - make_collector_for_split(split_id.clone(), &search_request, agg_context_params)?; + let mut collector = make_collector_for_split( + SplitId::from(split_id.as_str()), + &search_request, + agg_context_params, + )?; let predicate_cache = if collector.requires_scoring() { // at the moment the predicate cache doesn't support scoring diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index ad0a30cc4e6..4cb9f37d6c5 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -19,7 +19,6 @@ use quickwit_config::CacheConfig; use quickwit_proto::search::{ CountHits, LeafResourceStats, LeafSearchResponse, SearchRequest, SplitIdAndFooterOffsets, }; -use quickwit_proto::types::SplitId; use quickwit_storage::{MemorySizedCache, OwnedBytes}; use tantivy::index::SegmentId; @@ -86,7 +85,7 @@ impl LeafSearchCache { #[derive(Debug, Hash, Clone, PartialEq, Eq)] struct CacheKey { /// The split this entry refers to - split_id: SplitId, + split_id: String, /// The request this matches. The timerange of the request was removed. request: SearchRequest, /// The effective time range of the request, that is, the intersection of the timerange @@ -196,7 +195,7 @@ impl RangeBounds for HalfOpenRange { } pub struct PredicateCacheImpl { - content: MemorySizedCache<(SplitId, String)>, + content: MemorySizedCache<(String, String)>, } impl PredicateCacheImpl { @@ -213,7 +212,7 @@ impl PredicateCacheImpl { impl quickwit_query::query_ast::PredicateCache for PredicateCacheImpl { fn get( &self, - split_id: SplitId, + split_id: String, query_ast_json: String, ) -> Option<(SegmentId, quickwit_query::query_ast::HitSet)> { let encoded_result = self.content.get(&(split_id, query_ast_json))?; @@ -226,7 +225,7 @@ impl quickwit_query::query_ast::PredicateCache for PredicateCacheImpl { fn put( &self, - split_id: SplitId, + split_id: String, query_ast_json: String, segment: SegmentId, hits: quickwit_query::query_ast::HitSet, diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index 8d2ad0f924d..cde7597b186 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -164,7 +164,7 @@ impl std::str::FromStr for GlobalDocAddress { fn extract_split_and_footer_offsets(split_metadata: &SplitMetadata) -> SplitIdAndFooterOffsets { SplitIdAndFooterOffsets { - split_id: split_metadata.split_id.clone(), + split_id: split_metadata.split_id.to_string(), split_footer_start: split_metadata.footer_offsets.start, split_footer_end: split_metadata.footer_offsets.end, timestamp_start: split_metadata diff --git a/quickwit/quickwit-search/src/list_fields/cache.rs b/quickwit/quickwit-search/src/list_fields/cache.rs index bf9611cb9fb..b29091fcfa3 100644 --- a/quickwit/quickwit-search/src/list_fields/cache.rs +++ b/quickwit/quickwit-search/src/list_fields/cache.rs @@ -13,11 +13,10 @@ // limitations under the License. use quickwit_config::CacheConfig; -use quickwit_proto::types::SplitId; use quickwit_storage::{MemorySizedCache, OwnedBytes}; pub struct ListFieldsCache { - cache: MemorySizedCache, + cache: MemorySizedCache, } impl ListFieldsCache { @@ -30,11 +29,11 @@ impl ListFieldsCache { } } - pub fn get(&self, split_id: &SplitId) -> Option { + pub fn get(&self, split_id: &String) -> Option { self.cache.get(split_id) } - pub fn put(&self, split_id: SplitId, serialized_split_fields: OwnedBytes) { + pub fn put(&self, split_id: String, serialized_split_fields: OwnedBytes) { self.cache.put(split_id, serialized_split_fields); } } diff --git a/quickwit/quickwit-search/src/list_fields/leaf.rs b/quickwit/quickwit-search/src/list_fields/leaf.rs index 09d02314d6d..c3b79742b80 100644 --- a/quickwit/quickwit-search/src/list_fields/leaf.rs +++ b/quickwit/quickwit-search/src/list_fields/leaf.rs @@ -21,7 +21,7 @@ use quickwit_common::shared_consts::{FIELD_PRESENCE_FIELD_NAME, SPLIT_FIELDS_FIL use quickwit_proto::search::{ ListFieldsEntry, ListFieldsMetadata, ListFieldsResponse, SplitIdAndFooterOffsets, }; -use quickwit_proto::types::{IndexId, SplitId}; +use quickwit_proto::types::IndexId; use quickwit_storage::{OwnedBytes, Storage}; use tracing::{Span, instrument}; @@ -154,7 +154,7 @@ async fn fetch_fields_metadata( async fn process_fields_metadata( serialized_entries: OwnedBytes, index_id: IndexId, - split_id: SplitId, + split_id: String, field_patterns: FieldPatterns, ) -> anyhow::Result> { let parent_span = Span::current(); diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 7624c7c8e30..f04bb437851 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -947,11 +947,11 @@ pub(crate) async fn fetch_docs_phase( // Build map of Split ID > index ID to add the index ID to the hits. // Used for ES compatibility. - let split_id_to_index_id_map: HashMap<&SplitId, &str> = split_metadatas + let split_id_to_index_id_map: HashMap = split_metadatas .iter() .map(|split_metadata| { ( - &split_metadata.split_id, + split_metadata.split_id().clone(), split_metadata.index_uid.index_id.as_str(), ) }) @@ -984,7 +984,7 @@ pub(crate) async fn fetch_docs_phase( fn build_hit_with_position( mut leaf_hit: LeafHit, - split_id_to_index_id_map: &HashMap<&SplitId, &str>, + split_id_to_index_id_map: &HashMap, hit_order: &HashMap<(String, u32, u32), usize>, sort_field_1_datetime_format_opt: &Option, sort_field_2_datetime_format_opt: &Option, @@ -1018,7 +1018,7 @@ fn build_hit_with_position( } let position = *hit_order.get(&key).expect("hit order must be present"); let index_id = split_id_to_index_id_map - .get(&partial_hit_ref.split_id) + .get(partial_hit_ref.split_id.as_str()) .map(|split_id| split_id.to_string()) .unwrap_or_default(); diff --git a/quickwit/quickwit-search/src/top_k_collector.rs b/quickwit/quickwit-search/src/top_k_collector.rs index f36eb6370e2..5ac84769c9f 100644 --- a/quickwit/quickwit-search/src/top_k_collector.rs +++ b/quickwit/quickwit-search/src/top_k_collector.rs @@ -597,7 +597,7 @@ impl< .map(|el| el.into_segment_partial_hit()) .map(|segment_partial_hit: SegmentPartialHit| { segment_partial_hit.into_partial_hit( - self.split_id.clone(), + &self.split_id, self.segment_ord, &self.hit_fetcher.first, &self.hit_fetcher.second, @@ -634,7 +634,7 @@ impl GenericQuickwitSegmentTopKCollector { let sort_key_mapper = HitSortingMapper { order1, order2 }; let precomp_search_after_order = match &search_after_option { Some(search_after) if !search_after.split_id.is_empty() => order1 - .compare(&split_id, &search_after.split_id) + .compare(split_id.as_str(), search_after.split_id.as_str()) .then_with(|| order1.compare(&segment_ord, &search_after.segment_ord)), // This value isn't actually used. _ => Ordering::Equal, @@ -807,7 +807,7 @@ impl QuickwitSegmentTopKCollector for GenericQuickwitSegmentTopKCollector { .into_iter() .map(|segment_partial_hit: SegmentPartialHit| { segment_partial_hit.into_partial_hit( - self.split_id.clone(), + &self.split_id, self.segment_ord, &self.score_extractor.first, &self.score_extractor.second, diff --git a/quickwit/quickwit-serve/src/index_api/split_resource.rs b/quickwit/quickwit-serve/src/index_api/split_resource.rs index a062186e551..241fd4455a2 100644 --- a/quickwit/quickwit-serve/src/index_api/split_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/split_resource.rs @@ -20,7 +20,7 @@ use quickwit_proto::metastore::{ IndexMetadataRequest, ListSplitsRequest, MarkSplitsForDeletionRequest, MetastoreResult, MetastoreService, MetastoreServiceClient, }; -use quickwit_proto::types::{IndexId, IndexUid}; +use quickwit_proto::types::{IndexId, IndexUid, SplitId}; use serde::{Deserialize, Serialize}; use tracing::info; use warp::{Filter, Rejection}; @@ -180,10 +180,10 @@ pub async fn mark_splits_for_deletion( .deserialize_index_metadata()? .index_uid; info!(index_id = %index_id, splits_ids = ?splits_for_deletion.split_ids, "mark-splits-for-deletion"); - let split_ids: Vec = splits_for_deletion + let split_ids: Vec = splits_for_deletion .split_ids .iter() - .map(|split_id| split_id.to_string()) + .map(|split_id| SplitId::from(split_id.as_str())) .collect(); let mark_splits_for_deletion_request = MarkSplitsForDeletionRequest::new(index_uid, split_ids.clone()); diff --git a/quickwit/quickwit-storage/src/file_descriptor_cache.rs b/quickwit/quickwit-storage/src/file_descriptor_cache.rs index 7cc855b1822..c95f04b91cc 100644 --- a/quickwit/quickwit-storage/src/file_descriptor_cache.rs +++ b/quickwit/quickwit-storage/src/file_descriptor_cache.rs @@ -19,14 +19,14 @@ use std::ops::Range; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; +use quickwit_proto::types::SplitId; use tantivy::directory::OwnedBytes; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; -use ulid::Ulid; use crate::metrics::{FD_CACHE_METRICS, SingleCacheMetrics}; pub struct FileDescriptorCache { - fd_cache: Mutex>, + fd_cache: Mutex>, fd_semaphore: Arc, fd_cache_metrics: SingleCacheMetrics, } @@ -41,7 +41,7 @@ struct SplitFileInner { _fd_semaphore_guard: OwnedSemaphorePermit, } -fn get_split_file_path(root_path: &Path, split_id: Ulid) -> PathBuf { +fn get_split_file_path(root_path: &Path, split_id: &SplitId) -> PathBuf { let split_filename = quickwit_common::split_file(split_id); root_path.join(split_filename) } @@ -92,11 +92,11 @@ impl FileDescriptorCache { ) } - fn get_split_file(&self, split_id: Ulid) -> Option { - self.fd_cache.lock().unwrap().get(&split_id).cloned() + fn get_split_file(&self, split_id: &SplitId) -> Option { + self.fd_cache.lock().unwrap().get(split_id).cloned() } - fn put_split_file(&self, split_id: Ulid, split_file: SplitFile) { + fn put_split_file(&self, split_id: SplitId, split_file: SplitFile) { let mut fd_cache_lock = self.fd_cache.lock().unwrap(); fd_cache_lock.push(split_id, split_file); self.fd_cache_metrics @@ -106,7 +106,7 @@ impl FileDescriptorCache { /// Evicts the given list of split ids from the file descriptor cache. /// This method does NOT remove the actual files. - pub fn evict_split_files(&self, split_ids: &[Ulid]) { + pub fn evict_split_files(&self, split_ids: &[SplitId]) { let mut fd_cache_lock = self.fd_cache.lock().unwrap(); for split_id in split_ids { fd_cache_lock.pop(split_id); @@ -122,16 +122,16 @@ impl FileDescriptorCache { pub async fn get_or_open_split_file( &self, root_path: &Path, - split_id: Ulid, + split_id: SplitId, num_bytes: u64, ) -> std::io::Result { - if let Some(split_file) = self.get_split_file(split_id) { + if let Some(split_file) = self.get_split_file(&split_id) { self.fd_cache_metrics.hits_num_items.inc(); return Ok(split_file); } else { self.fd_cache_metrics.misses_num_items.inc(); } - let split_path = get_split_file_path(root_path, split_id); + let split_path = get_split_file_path(root_path, &split_id); let fd_semaphore_guard = Semaphore::acquire_owned(self.fd_semaphore.clone()) .await .expect("fd_semaphore acquire failed. please report"); @@ -177,12 +177,17 @@ impl SplitFile { mod tests { use std::num::NonZeroU32; + use quickwit_proto::types::SplitId; use tokio::fs; use ulid::Ulid; use super::FileDescriptorCache; use crate::metrics::CacheMetrics; + fn new_test_split_id() -> SplitId { + SplitId::from(Ulid::new().to_string()) + } + #[tokio::test] async fn test_fd_cache_big_cache() { let cache_metrics = CacheMetrics::for_component("fdtest").cache_metrics; @@ -192,28 +197,30 @@ mod tests { cache_metrics.clone(), ); let tempdir = tempfile::tempdir().unwrap(); - let split_ids: Vec = std::iter::repeat_with(Ulid::new).take(100).collect(); - for &split_id in &split_ids { + let split_ids: Vec = std::iter::repeat_with(new_test_split_id) + .take(100) + .collect(); + for split_id in &split_ids { let split_filepath = super::get_split_file_path(tempdir.path(), split_id); let content = split_id.to_string(); assert_eq!(content.len(), 26); fs::write(split_filepath, content.as_bytes()).await.unwrap(); } - for &split_id in &split_ids[0..10] { + for split_id in &split_ids[0..10] { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } - for &split_id in &split_ids[0..10] { + for split_id in &split_ids[0..10] { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } - for &split_id in &split_ids[0..10] { + for split_id in &split_ids[0..10] { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } @@ -234,17 +241,19 @@ mod tests { cache_metrics.clone(), ); let tempdir = tempfile::tempdir().unwrap(); - let split_ids: Vec = std::iter::repeat_with(Ulid::new).take(100).collect(); - for &split_id in &split_ids { + let split_ids: Vec = std::iter::repeat_with(new_test_split_id) + .take(100) + .collect(); + for split_id in &split_ids { let split_filepath = super::get_split_file_path(tempdir.path(), split_id); let content = split_id.to_string(); assert_eq!(content.len(), 26); fs::write(split_filepath, content.as_bytes()).await.unwrap(); } - for &split_id in &split_ids[0..100] { + for split_id in &split_ids[0..100] { for _ in 0..10 { fd_cache - .get_or_open_split_file(tempdir.path(), split_id, 26) + .get_or_open_split_file(tempdir.path(), split_id.clone(), 26) .await .unwrap(); } @@ -258,8 +267,8 @@ mod tests { async fn test_split_file() { let fd_cache = FileDescriptorCache::with_fd_cache_capacity(NonZeroU32::new(20).unwrap()); let tempdir = tempfile::tempdir().unwrap(); - let split_id: Ulid = Ulid::new(); - let split_filepath = super::get_split_file_path(tempdir.path(), split_id); + let split_id: SplitId = new_test_split_id(); + let split_filepath = super::get_split_file_path(tempdir.path(), &split_id); let content = split_id.to_string(); assert_eq!(content.len(), 26); fs::write(split_filepath, content.as_bytes()).await.unwrap(); diff --git a/quickwit/quickwit-storage/src/split_cache/download_task.rs b/quickwit/quickwit-storage/src/split_cache/download_task.rs index 9af0390bfb5..d7336d76aba 100644 --- a/quickwit/quickwit-storage/src/split_cache/download_task.rs +++ b/quickwit/quickwit-storage/src/split_cache/download_task.rs @@ -29,11 +29,11 @@ async fn download_split( storage_resolver: StorageResolver, ) -> anyhow::Result { let CandidateSplit { - split_ulid, + split_id, storage_uri, living_token: _, } = candidate_split; - let split_filename = split_file(*split_ulid); + let split_filename = split_file(split_id); let target_filepath = root_path.join(&split_filename); let storage = storage_resolver.resolve(storage_uri).await?; let num_bytes = storage @@ -52,7 +52,7 @@ async fn perform_eviction_and_download( splits_to_delete, split_to_download, } = download_opportunity; - let split_ulid = split_to_download.split_ulid; + let split_id = split_to_download.split_id.clone(); // tokio io runs on `spawn_blocking` threads anyway. let split_cache_clone = split_cache.clone(); let _ = tokio::task::spawn_blocking(move || { @@ -62,7 +62,7 @@ async fn perform_eviction_and_download( let num_bytes = download_split(&split_cache.root_path, &split_to_download, storage_resolver).await?; let mut shared_split_table_lock = split_cache.split_table.lock().unwrap(); - shared_split_table_lock.register_as_downloaded(split_ulid, num_bytes); + shared_split_table_lock.register_as_downloaded(split_id, num_bytes); Ok(()) } diff --git a/quickwit/quickwit-storage/src/split_cache/mod.rs b/quickwit/quickwit-storage/src/split_cache/mod.rs index 1c0de124d73..a02cdd3d484 100644 --- a/quickwit/quickwit-storage/src/split_cache/mod.rs +++ b/quickwit/quickwit-storage/src/split_cache/mod.rs @@ -28,9 +28,9 @@ use quickwit_common::split_file; use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; use quickwit_proto::search::ReportSplit; +use quickwit_proto::types::SplitId; use tantivy::directory::OwnedBytes; use tracing::{error, info, instrument, warn}; -use ulid::Ulid; use crate::file_descriptor_cache::{FileDescriptorCache, SplitFile}; use crate::split_cache::download_task::spawn_download_task; @@ -59,7 +59,7 @@ impl SplitCache { limits: SplitCacheLimits, ) -> io::Result> { std::fs::create_dir_all(&root_path)?; - let mut existing_splits: BTreeMap = Default::default(); + let mut existing_splits: BTreeMap = Default::default(); for dir_entry_res in std::fs::read_dir(&root_path)? { let dir_entry = dir_entry_res?; let path = dir_entry.path(); @@ -80,10 +80,10 @@ impl SplitCache { } } "split" => { - if let Some(split_ulid) = split_id_from_path(&path) { - existing_splits.insert(split_ulid, meta.len()); + if let Some(split_id) = split_id_from_path(&path) { + existing_splits.insert(split_id, meta.len()); } else { - warn!(path=%path.display(), ".split file with invalid ulid in split cache directory, ignoring"); + warn!(path=%path.display(), ".split file with invalid name in split cache directory, ignoring"); } } _ => { @@ -120,7 +120,7 @@ impl SplitCache { /// Remove splits from both the fd cache and the split cache. /// This method does NOT update the split table. - pub(crate) fn evict(&self, splits_to_evict: &[Ulid]) { + pub(crate) fn evict(&self, splits_to_evict: &[SplitId]) { self.fd_cache.evict_split_files(splits_to_evict); delete_evicted_splits(&self.root_path, splits_to_evict); } @@ -138,28 +138,26 @@ impl SplitCache { pub fn report_splits(&self, report_splits: Vec) { let mut split_table = self.split_table.lock().unwrap(); for report_split in report_splits { - let Ok(split_ulid) = Ulid::from_str(&report_split.split_id) else { - error!(split_id=%report_split.split_id, "received invalid split ulid: ignoring"); - continue; - }; + // Split ids are opaque: any non-empty string is accepted (no ULID validation). + let split_id = SplitId::from(report_split.split_id); let Ok(storage_uri) = Uri::from_str(&report_split.storage_uri) else { error!(storage_uri=%report_split.storage_uri, "received invalid storage uri: ignoring"); continue; }; - split_table.report(split_ulid, storage_uri); + split_table.report(split_id, storage_uri); } } // Returns a split guard object. As long as it is not dropped, the // split won't be evinced from the cache. - async fn get_split_file(&self, split_id: Ulid, storage_uri: &Uri) -> Option { + async fn get_split_file(&self, split_id: SplitId, storage_uri: &Uri) -> Option { // We touch before even checking the fd cache in order to update the file's last access time // for the file cache. let num_bytes_opt: Option = self .split_table .lock() .unwrap() - .touch(split_id, storage_uri); + .touch(split_id.clone(), storage_uri); let num_bytes = num_bytes_opt?; self.fd_cache @@ -175,8 +173,8 @@ impl SplitCache { /// At this point, the disk space is already accounted as released, /// so the error could result in a "disk space leak". #[instrument] -fn delete_evicted_splits(root_path: &Path, splits_to_delete: &[Ulid]) { - for &split_to_delete in splits_to_delete { +fn delete_evicted_splits(root_path: &Path, splits_to_delete: &[SplitId]) { + for split_to_delete in splits_to_delete { let split_file_path = root_path.join(split_file(split_to_delete)); if let Err(_io_err) = std::fs::remove_file(&split_file_path) { // This is an pretty critical error. The split size is not tracked anymore at this @@ -186,10 +184,12 @@ fn delete_evicted_splits(root_path: &Path, splits_to_delete: &[Ulid]) { } } -fn split_id_from_path(split_path: &Path) -> Option { +fn split_id_from_path(split_path: &Path) -> Option { let split_filename = split_path.file_name()?.to_str()?; let split_id_str = split_filename.strip_suffix(".split")?; - Ulid::from_str(split_id_str).ok() + // The split id is opaque: we take the whole file stem (a random prefix, if any, is part of + // the id) rather than parsing it as a ULID. + Some(SplitId::from(split_id_str)) } struct SplitCacheBackingStorage { diff --git a/quickwit/quickwit-storage/src/split_cache/split_table.rs b/quickwit/quickwit-storage/src/split_cache/split_table.rs index ee24a393e84..4e191bc47fa 100644 --- a/quickwit/quickwit-storage/src/split_cache/split_table.rs +++ b/quickwit/quickwit-storage/src/split_cache/split_table.rs @@ -19,7 +19,7 @@ use std::time::{Duration, Instant}; use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; -use ulid::Ulid; +use quickwit_proto::types::SplitId; use crate::metrics::SEARCHER_SPLIT_CACHE; @@ -31,10 +31,10 @@ const MAX_NUM_CANDIDATES: usize = 1_000; /// Splits that are freshly reported get a last access time of `now - NEWLY_REPORT_SPLIT_LAST_TIME`. const NEWLY_REPORTED_SPLIT_LAST_TIME: Duration = Duration::from_secs(60 * 10); // 10mn -#[derive(Clone, Copy)] +#[derive(Clone)] pub(crate) struct SplitKey { pub last_accessed: LastAccessDate, - pub split_ulid: Ulid, + pub split_id: SplitId, } impl PartialOrd for SplitKey { @@ -45,13 +45,13 @@ impl PartialOrd for SplitKey { impl Ord for SplitKey { fn cmp(&self, other: &Self) -> Ordering { - (self.last_accessed, &self.split_ulid).cmp(&(other.last_accessed, &other.split_ulid)) + (self.last_accessed, &self.split_id).cmp(&(other.last_accessed, &other.split_id)) } } impl PartialEq for SplitKey { fn eq(&self, other: &Self) -> bool { - (self.last_accessed, &self.split_ulid) == (other.last_accessed, &other.split_ulid) + (self.last_accessed, &self.split_id) == (other.last_accessed, &other.split_id) } } @@ -103,7 +103,7 @@ pub struct SplitTable { on_disk_splits: BTreeSet, downloading_splits: BTreeSet, candidate_splits: BTreeSet, - split_to_status: HashMap, + split_to_status: HashMap, origin_time: Instant, limits: SplitCacheLimits, on_disk_bytes: u64, @@ -112,7 +112,7 @@ pub struct SplitTable { impl SplitTable { pub(crate) fn with_limits_and_existing_splits( limits: SplitCacheLimits, - existing_filepaths: BTreeMap, + existing_filepaths: BTreeMap, ) -> SplitTable { let origin_time = Instant::now() - NEWLY_REPORTED_SPLIT_LAST_TIME; let mut split_table = SplitTable { @@ -128,12 +128,12 @@ impl SplitTable { split_table } - fn acknowledge_on_disk_splits(&mut self, existing_filepaths: BTreeMap) { - for (split_ulid, num_bytes) in existing_filepaths { + fn acknowledge_on_disk_splits(&mut self, existing_filepaths: BTreeMap) { + for (split_id, num_bytes) in existing_filepaths { let split_info = SplitInfo { split_key: SplitKey { last_accessed: 0, - split_ulid, + split_id, }, status: Status::OnDisk { num_bytes }, }; @@ -147,8 +147,8 @@ fn compute_timestamp(start: Instant) -> LastAccessDate { } impl SplitTable { - fn remove(&mut self, split_ulid: Ulid) -> Option { - let split_info = self.split_to_status.remove(&split_ulid)?; + fn remove(&mut self, split_id: &SplitId) -> Option { + let split_info = self.split_to_status.remove(split_id)?; let split_queue: &mut BTreeSet = match split_info.status { Status::Candidate { .. } => &mut self.candidate_splits, Status::Downloading { .. } => &mut self.downloading_splits, @@ -185,15 +185,15 @@ impl SplitTable { } let mut splits_to_remove = Vec::new(); for split in &self.downloading_splits { - if let Some(split_info) = self.split_to_status.get(&split.split_ulid) + if let Some(split_info) = self.split_to_status.get(&split.split_id) && let Status::Downloading { alive_token } = &split_info.status && alive_token.strong_count() == 0 { - splits_to_remove.push(split.split_ulid); + splits_to_remove.push(split.split_id.clone()); } } for split in splits_to_remove { - self.remove(split); + self.remove(&split); } } @@ -207,9 +207,11 @@ impl SplitTable { // we truncate *before* inserting, otherwise way may end up in an inconsistent // state which make truncate_candidate_list loop indefinitely self.truncate_candidate_list(); - self.candidate_splits.insert(split_info.split_key) + self.candidate_splits.insert(split_info.split_key.clone()) + } + Status::Downloading { .. } => { + self.downloading_splits.insert(split_info.split_key.clone()) } - Status::Downloading { .. } => self.downloading_splits.insert(split_info.split_key), Status::OnDisk { num_bytes } => { self.on_disk_bytes += num_bytes; SEARCHER_SPLIT_CACHE.cache_metrics.in_cache_count.inc(); @@ -217,18 +219,18 @@ impl SplitTable { .cache_metrics .in_cache_num_bytes .inc_by(num_bytes as f64); - self.on_disk_splits.insert(split_info.split_key) + self.on_disk_splits.insert(split_info.split_key.clone()) } }; // this is fine to do in an inconsistent state, the last entry will just be ignored while // gcing self.gc_downloading_splits_if_necessary(); assert!(was_not_in_queue); - let split_ulid_was_absent = self + let split_id_was_absent = self .split_to_status - .insert(split_info.split_key.split_ulid, split_info) + .insert(split_info.split_key.split_id.clone(), split_info) .is_none(); - assert!(split_ulid_was_absent); + assert!(split_id_was_absent); } /// Touch the file, updating its last access time, possibly extending its life in the @@ -237,21 +239,21 @@ impl SplitTable { /// If the file is already on the disk cache, return `Some(num_bytes)`. /// If the file is not in cache, return `None`, and register the file in the candidate for /// download list. - pub fn touch(&mut self, split_ulid: Ulid, storage_uri: &Uri) -> Option { + pub fn touch(&mut self, split_id: SplitId, storage_uri: &Uri) -> Option { let timestamp = compute_timestamp(self.origin_time); - let status = self.mutate_split(split_ulid, |old_split_info| { + let status = self.mutate_split(split_id, |old_split_info, split_id| { if let Some(mut split_info) = old_split_info { split_info.split_key.last_accessed = timestamp; split_info } else { SplitInfo { split_key: SplitKey { - split_ulid, + split_id: split_id.clone(), last_accessed: timestamp, }, status: Status::Candidate(CandidateSplit { storage_uri: storage_uri.clone(), - split_ulid, + split_id, living_token: Arc::new(()), }), } @@ -264,25 +266,27 @@ impl SplitTable { } } - /// Mutates a split ulid. + /// Mutates the split with the given id. /// /// By design this function maintains the invariant. - /// It removes the split with the given ulid, modifies, and re + /// It removes the split with the given id, modifies it, and re-inserts it. + /// The owned `split_id` is handed to `mutate_fn` so it can be reused when building a fresh + /// `SplitInfo` (avoiding an extra allocation). fn mutate_split( &mut self, - split_ulid: Ulid, - mutate_fn: impl FnOnce(Option) -> SplitInfo, + split_id: SplitId, + mutate_fn: impl FnOnce(Option, SplitId) -> SplitInfo, ) -> Status { - let split_info_opt = self.remove(split_ulid); - let new_split: SplitInfo = mutate_fn(split_info_opt); + let split_info_opt = self.remove(&split_id); + let new_split: SplitInfo = mutate_fn(split_info_opt, split_id); let new_status = new_split.status.clone(); self.insert(new_split); new_status } - fn change_split_status(&mut self, split_ulid: Ulid, status: Status) { + fn change_split_status(&mut self, split_id: SplitId, status: Status) { let start_time = self.origin_time; - self.mutate_split(split_ulid, move |split_info_opt| { + self.mutate_split(split_id, move |split_info_opt, split_id| { if let Some(mut split_info) = split_info_opt { split_info.status = status; split_info @@ -290,7 +294,7 @@ impl SplitTable { SplitInfo { split_key: SplitKey { last_accessed: compute_timestamp(start_time), - split_ulid, + split_id, }, status, } @@ -298,9 +302,9 @@ impl SplitTable { }); } - pub(crate) fn report(&mut self, split_ulid: Ulid, storage_uri: Uri) { + pub(crate) fn report(&mut self, split_id: SplitId, storage_uri: Uri) { let origin_time = self.origin_time; - self.mutate_split(split_ulid, move |split_info_opt| { + self.mutate_split(split_id, move |split_info_opt, split_id| { if let Some(split_info) = split_info_opt { return split_info; } @@ -308,11 +312,11 @@ impl SplitTable { split_key: SplitKey { last_accessed: compute_timestamp(origin_time) .saturating_sub(NEWLY_REPORTED_SPLIT_LAST_TIME.as_micros() as u64), - split_ulid, + split_id: split_id.clone(), }, status: Status::Candidate(CandidateSplit { storage_uri, - split_ulid, + split_id, living_token: Arc::new(()), }), } @@ -323,13 +327,13 @@ impl SplitTable { fn truncate_candidate_list(&mut self) { // we remove one more to make place for one candidate about to be inserted while self.candidate_splits.len() >= MAX_NUM_CANDIDATES { - let worst_candidate = self.candidate_splits.first().unwrap().split_ulid; - self.remove(worst_candidate); + let worst_candidate = self.candidate_splits.first().unwrap().split_id.clone(); + self.remove(&worst_candidate); } } - pub(crate) fn register_as_downloaded(&mut self, split_ulid: Ulid, num_bytes: u64) { - self.change_split_status(split_ulid, Status::OnDisk { num_bytes }); + pub(crate) fn register_as_downloaded(&mut self, split_id: SplitId, num_bytes: u64) { + self.change_split_status(split_id, Status::OnDisk { num_bytes }); } /// Change the state of the given split from candidate to downloading state, @@ -337,8 +341,8 @@ impl SplitTable { /// /// This function does NOT trigger the download itself. It is up to /// the caller to actually initiate the download. - pub(crate) fn start_download(&mut self, split_ulid: Ulid) -> Option { - let split_info = self.remove(split_ulid)?; + pub(crate) fn start_download(&mut self, split_id: &SplitId) -> Option { + let split_info = self.remove(split_id)?; let Status::Candidate(candidate_split) = split_info.status else { self.insert(split_info); return None; @@ -352,7 +356,7 @@ impl SplitTable { } fn best_candidate(&self) -> Option { - self.candidate_splits.last().copied() + self.candidate_splits.last().cloned() } fn is_out_of_limits(&self) -> bool { @@ -380,15 +384,18 @@ impl SplitTable { pub(crate) fn make_room_for_split_if_necessary( &mut self, last_access_date: LastAccessDate, - ) -> Result, NoRoomAvailable> { + ) -> Result, NoRoomAvailable> { let mut split_infos = Vec::new(); while self.is_out_of_limits() { - if let Some(first_split) = self.on_disk_splits.first() { - if first_split.last_accessed > last_access_date { + // We clone the oldest split's key so we can drop the immutable borrow on + // `on_disk_splits` before calling `remove`, which needs `&mut self`. + let oldest_split_key_opt: Option = self.on_disk_splits.first().cloned(); + if let Some(oldest_split_key) = oldest_split_key_opt { + if oldest_split_key.last_accessed > last_access_date { // This is not worth doing the eviction. break; } - split_infos.extend(self.remove(first_split.split_ulid)); + split_infos.extend(self.remove(&oldest_split_key.split_id)); } else { break; } @@ -403,18 +410,18 @@ impl SplitTable { } else { Ok(split_infos .into_iter() - .map(|split_info| split_info.split_key.split_ulid) + .map(|split_info| split_info.split_key.split_id) .collect()) } } pub(crate) fn find_download_opportunity(&mut self) -> Option { let best_candidate_split_key = self.best_candidate()?; - let splits_to_delete: Vec = self + let splits_to_delete: Vec = self .make_room_for_split_if_necessary(best_candidate_split_key.last_accessed) .ok()?; let split_to_download: CandidateSplit = - self.start_download(best_candidate_split_key.split_ulid)?; + self.start_download(&best_candidate_split_key.split_id)?; Some(DownloadOpportunity { splits_to_delete, split_to_download, @@ -433,14 +440,14 @@ pub(crate) struct NoRoomAvailable; #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct CandidateSplit { pub storage_uri: Uri, - pub split_ulid: Ulid, + pub split_id: SplitId, pub living_token: Arc<()>, } pub(crate) struct DownloadOpportunity { // At this point, the split have already been removed from the split table. // The file however need to be deleted. - pub splits_to_delete: Vec, + pub splits_to_delete: Vec, pub split_to_download: CandidateSplit, } @@ -452,6 +459,7 @@ mod tests { use bytesize::ByteSize; use quickwit_common::uri::Uri; use quickwit_config::SplitCacheLimits; + use quickwit_proto::types::SplitId; use ulid::Ulid; use crate::split_cache::split_table::{ @@ -460,11 +468,19 @@ mod tests { const TEST_STORAGE_URI: &str = "s3://test"; - fn sorted_split_ulids(num_splits: usize) -> Vec { - let mut split_ulids: Vec = - std::iter::repeat_with(Ulid::new).take(num_splits).collect(); - split_ulids.sort(); - split_ulids + /// Generates split ids backed by ULIDs. We keep using ULIDs in tests because the cache's + /// ordering relies on lexicographic ordering of the ids, and ULID strings happen to be + /// time-sortable — which makes for readable, deterministic tests. + fn new_test_split_id() -> SplitId { + SplitId::from(Ulid::new().to_string()) + } + + fn sorted_split_ids(num_splits: usize) -> Vec { + let mut split_ids: Vec = std::iter::repeat_with(new_test_split_id) + .take(num_splits) + .collect(); + split_ids.sort(); + split_ids } #[test] @@ -478,13 +494,13 @@ mod tests { }, Default::default(), ); - let ulids = sorted_split_ulids(2); - let ulid1 = ulids[0]; - let ulid2 = ulids[1]; - split_table.report(ulid1, Uri::for_test(TEST_STORAGE_URI)); - split_table.report(ulid2, Uri::for_test(TEST_STORAGE_URI)); + let split_ids = sorted_split_ids(2); + let split_id1 = split_ids[0].clone(); + let split_id2 = split_ids[1].clone(); + split_table.report(split_id1, Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id2.clone(), Uri::for_test(TEST_STORAGE_URI)); let candidate = split_table.best_candidate().unwrap(); - assert_eq!(candidate.split_ulid, ulid2); + assert_eq!(candidate.split_id, split_id2); } #[test] @@ -498,15 +514,15 @@ mod tests { }, Default::default(), ); - let ulids = sorted_split_ulids(2); - let ulid1 = ulids[0]; - let ulid2 = ulids[1]; - split_table.report(ulid1, Uri::for_test(TEST_STORAGE_URI)); - split_table.report(ulid2, Uri::for_test(TEST_STORAGE_URI)); - let num_bytes_opt = split_table.touch(ulid1, &Uri::for_test("s3://test1/")); + let split_ids = sorted_split_ids(2); + let split_id1 = split_ids[0].clone(); + let split_id2 = split_ids[1].clone(); + split_table.report(split_id1.clone(), Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id2, Uri::for_test(TEST_STORAGE_URI)); + let num_bytes_opt = split_table.touch(split_id1.clone(), &Uri::for_test("s3://test1/")); assert!(num_bytes_opt.is_none()); let candidate = split_table.best_candidate().unwrap(); - assert_eq!(candidate.split_ulid, ulid1); + assert_eq!(candidate.split_id, split_id1); } #[test] @@ -520,25 +536,25 @@ mod tests { }, Default::default(), ); - let ulid1 = Ulid::new(); - split_table.report(ulid1, Uri::for_test(TEST_STORAGE_URI)); + let split_id1 = new_test_split_id(); + split_table.report(split_id1.clone(), Uri::for_test(TEST_STORAGE_URI)); assert_eq!(split_table.num_bytes(), 0); - let download = split_table.start_download(ulid1); + let download = split_table.start_download(&split_id1); assert!(download.is_some()); - assert!(split_table.start_download(ulid1).is_none()); - split_table.register_as_downloaded(ulid1, 10_000_000); + assert!(split_table.start_download(&split_id1).is_none()); + split_table.register_as_downloaded(split_id1.clone(), 10_000_000); assert_eq!(split_table.num_bytes(), 10_000_000); assert_eq!( - split_table.touch(ulid1, &Uri::for_test(TEST_STORAGE_URI)), + split_table.touch(split_id1, &Uri::for_test(TEST_STORAGE_URI)), Some(10_000_000) ); - let ulid2 = Ulid::new(); - split_table.report(ulid2, Uri::for_test("s3://test`/")); - let download = split_table.start_download(ulid2); + let split_id2 = new_test_split_id(); + split_table.report(split_id2.clone(), Uri::for_test("s3://test`/")); + let download = split_table.start_download(&split_id2); assert!(download.is_some()); - assert!(split_table.start_download(ulid2).is_none()); + assert!(split_table.start_download(&split_id2).is_none()); assert_eq!(split_table.num_bytes(), 10_000_000); - split_table.register_as_downloaded(ulid2, 3_000_000); + split_table.register_as_downloaded(split_id2, 3_000_000); assert_eq!(split_table.num_bytes(), 13_000_000); } @@ -553,31 +569,34 @@ mod tests { }, Default::default(), ); - let mut split_ulids: Vec = std::iter::repeat_with(Ulid::new).take(6).collect(); - split_ulids.sort(); + let split_ids = sorted_split_ids(6); let splits = [ - (split_ulids[0], 10_000), - (split_ulids[1], 20_000), - (split_ulids[2], 300_000), - (split_ulids[3], 400_000), - (split_ulids[4], 100_000), - (split_ulids[5], 300_000), + (split_ids[0].clone(), 10_000), + (split_ids[1].clone(), 20_000), + (split_ids[2].clone(), 300_000), + (split_ids[3].clone(), 400_000), + (split_ids[4].clone(), 100_000), + (split_ids[5].clone(), 300_000), ]; - for (split_ulid, num_bytes) in splits { - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); - split_table.register_as_downloaded(split_ulid, num_bytes); + for (split_id, num_bytes) in &splits { + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); + split_table.register_as_downloaded(split_id.clone(), *num_bytes); } - let new_ulid = Ulid::new(); - split_table.report(new_ulid, Uri::for_test(TEST_STORAGE_URI)); + let new_split_id = new_test_split_id(); + split_table.report(new_split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); let DownloadOpportunity { splits_to_delete, split_to_download, } = split_table.find_download_opportunity().unwrap(); assert_eq!( &splits_to_delete[..], - &[splits[0].0, splits[1].0, splits[2].0][..] + &[ + splits[0].0.clone(), + splits[1].0.clone(), + splits[2].0.clone() + ][..] ); - assert_eq!(split_to_download.split_ulid, new_ulid); + assert_eq!(split_to_download.split_id, new_split_id); } #[test] @@ -591,28 +610,30 @@ mod tests { }, Default::default(), ); - let mut split_ulids: Vec = std::iter::repeat_with(Ulid::new).take(6).collect(); - split_ulids.sort(); + let split_ids = sorted_split_ids(6); let splits = [ - (split_ulids[0], 10_000), - (split_ulids[1], 20_000), - (split_ulids[2], 300_000), - (split_ulids[3], 400_000), - (split_ulids[4], 100_000), - (split_ulids[5], 300_000), + (split_ids[0].clone(), 10_000), + (split_ids[1].clone(), 20_000), + (split_ids[2].clone(), 300_000), + (split_ids[3].clone(), 400_000), + (split_ids[4].clone(), 100_000), + (split_ids[5].clone(), 300_000), ]; - for (split_ulid, num_bytes) in splits { - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); - split_table.register_as_downloaded(split_ulid, num_bytes); + for (split_id, num_bytes) in &splits { + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); + split_table.register_as_downloaded(split_id.clone(), *num_bytes); } - let new_ulid = Ulid::new(); - split_table.report(new_ulid, Uri::for_test(TEST_STORAGE_URI)); + let new_split_id = new_test_split_id(); + split_table.report(new_split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); let DownloadOpportunity { splits_to_delete, split_to_download, } = split_table.find_download_opportunity().unwrap(); - assert_eq!(&splits_to_delete[..], &[splits[0].0, splits[1].0]); - assert_eq!(split_to_download.split_ulid, new_ulid); + assert_eq!( + &splits_to_delete[..], + &[splits[0].0.clone(), splits[1].0.clone()] + ); + assert_eq!(split_to_download.split_id, new_split_id); } #[test] @@ -626,23 +647,23 @@ mod tests { }, Default::default(), ); - let split_ulid = Ulid::new(); - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); - let candidate = split_table.start_download(split_ulid).unwrap(); + let split_id = new_test_split_id(); + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); + let candidate = split_table.start_download(&split_id).unwrap(); // This report should be cancelled as we have a download currently running. - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); - assert!(split_table.start_download(split_ulid).is_none()); + assert!(split_table.start_download(&split_id).is_none()); std::mem::drop(candidate); // Still not possible to start a download. - assert!(split_table.start_download(split_ulid).is_none()); + assert!(split_table.start_download(&split_id).is_none()); // This report should be considered as our candidate (and its alive token has been dropped) - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); + split_table.report(split_id.clone(), Uri::for_test(TEST_STORAGE_URI)); - let candidate2 = split_table.start_download(split_ulid).unwrap(); - assert_eq!(candidate2.split_ulid, split_ulid); + let candidate2 = split_table.start_download(&split_id).unwrap(); + assert_eq!(candidate2.split_id, split_id); } #[test] @@ -657,8 +678,8 @@ mod tests { Default::default(), ); for i in 1..2_000 { - let split_ulid = Ulid::new(); - split_table.report(split_ulid, Uri::for_test(TEST_STORAGE_URI)); + let split_id = new_test_split_id(); + split_table.report(split_id, Uri::for_test(TEST_STORAGE_URI)); assert_eq!( split_table.candidate_splits.len(), i.min(super::MAX_NUM_CANDIDATES) @@ -679,16 +700,18 @@ mod tests { Default::default(), ); for i in (0u128..=super::MAX_NUM_CANDIDATES as u128).rev() { - let split_ulid = Ulid(i); + // ULID strings preserve the ordering of the underlying u128, so the lexicographic + // ordering of these split ids matches the numeric ordering of `i`. + let split_id = SplitId::from(Ulid(i).to_string()); let candidate_split = CandidateSplit { storage_uri: Uri::for_test(TEST_STORAGE_URI), - split_ulid, + split_id: split_id.clone(), living_token: Arc::new(()), }; let split_info = SplitInfo { split_key: SplitKey { last_accessed: 0u64, - split_ulid, + split_id, }, status: Status::Candidate(candidate_split), }; From 24e2d16a2d2a4ac6efe16896e35aeb0f64c87dec Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 30 Jun 2026 17:09:10 +0200 Subject: [PATCH 2/4] Added support for s3 prefixing. This is only done in the split id generation phase and ruled by a QW_RANDOM_SPLIT_PREFIX env variable. --- quickwit/quickwit-proto/src/types/split_id.rs | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-proto/src/types/split_id.rs b/quickwit/quickwit-proto/src/types/split_id.rs index 43ec4b547dc..3c1fdd5199f 100644 --- a/quickwit/quickwit-proto/src/types/split_id.rs +++ b/quickwit/quickwit-proto/src/types/split_id.rs @@ -42,8 +42,37 @@ pub struct SplitId(Arc); impl SplitId { /// Generates a new split id. + /// + /// When the `QW_RANDOM_SPLIT_PREFIX` environment variable is set to `true`, + /// the ID is prefixed with 4 random characters taken from the tail of the + /// ULID. This spreads S3 key prefixes across partitions and avoids hot-spots + /// when writes concentrate on the same time window. + /// + /// Layout with prefix enabled: `<4 random chars>_<22 remaining ULID chars>` + /// (27 chars total vs the usual 26). pub fn new() -> Self { - SplitId::from(ulid::Ulid::new().to_string()) + static RANDOM_PREFIX_ENABLED: std::sync::LazyLock = std::sync::LazyLock::new(|| { + quickwit_common::get_bool_from_env("QW_RANDOM_SPLIT_PREFIX", false) + }); + Self::from_ulid(ulid::Ulid::new(), *RANDOM_PREFIX_ENABLED) + } + + /// Constructs a `SplitId` from a `Ulid`, optionally prepending a random prefix. + /// + /// The prefix is the last 4 characters of the ULID string (pure random bits), + /// separated from the remaining 22 characters by `_`. + fn from_ulid(ulid: ulid::Ulid, random_prefix: bool) -> Self { + let ulid_str = ulid.to_string(); // 26 Crockford base32 chars + if random_prefix { + // The last 4 characters of a ULID are random bits. + // Moving them to a prefix spreads S3 key prefixes while keeping the + // full ULID entropy in the string. + let prefix = &ulid_str[22..]; // chars 22-25: random section + let body = &ulid_str[..22]; // chars 0-21: timestamp + most of random + SplitId::from(format!("{prefix}_{body}")) + } else { + SplitId::from(ulid_str) + } } /// Returns the id as a string slice. @@ -165,4 +194,20 @@ mod tests { let split_id_b = SplitId::from("bbb"); assert!(split_id_a < split_id_b); } + + #[test] + fn test_split_id_from_ulid_without_prefix() { + let ulid = ulid::Ulid::from_string("01HAV29D4XY3D462FS3D8K5Q2H").unwrap(); + let split_id = SplitId::from_ulid(ulid, false); + assert_eq!(split_id.as_str(), "01HAV29D4XY3D462FS3D8K5Q2H"); + } + + #[test] + fn test_split_id_from_ulid_with_prefix() { + // "01HAV29D4XY3D462FS3D8K5Q2H": last 4 chars = "5Q2H", first 22 = "01HAV29D4XY3D462FS3D8K". + let ulid = ulid::Ulid::from_string("01HAV29D4XY3D462FS3D8K5Q2H").unwrap(); + let split_id = SplitId::from_ulid(ulid, true); + assert_eq!(split_id.as_str(), "5Q2H_01HAV29D4XY3D462FS3D8K"); + assert_eq!(split_id.as_str().len(), 27); + } } From 4e55baf50f116a1c95f88f741cbad89ae794c380 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 30 Jun 2026 17:45:22 +0200 Subject: [PATCH 3/4] Adapting the timestamp from split id code to work on both format. --- .../src/split_store/indexing_split_cache.rs | 50 ++++++++++--------- quickwit/quickwit-proto/src/types/split_id.rs | 2 +- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index 12870939b85..f70bd992010 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -66,20 +66,25 @@ async fn num_bytes_in_folder(directory_path: &Path) -> io::Result { Ok(ByteSize(total_bytes)) } -/// The base-32 string length of a ULID. -const ULID_STR_LEN: usize = 26; - -/// Recovers the ULID that terminates a split id, used to derive the split's creation time. +/// Recovers the creation time of a split from its id. /// /// TEMPORARY: the indexing split store is slated for removal once the compactor service lands. -/// Until then we recover a split's creation time from the ULID that terminates its id. A split id -/// may carry a random prefix (added to spread S3 read load), but it always ENDS with a 26-char -/// ULID, so we slice the suffix before parsing. Ids that do not end with a valid ULID fall back to -/// the epoch, which simply makes them the first eviction candidates. +/// +/// Split IDs are either a plain 26-char ULID or the prefixed form produced when +/// `QW_RANDOM_SPLIT_PREFIX` is enabled: `<4 random chars>_<22 body chars>` (27 chars). +/// In the prefixed form the original ULID is reconstructed as `body + prefix` before parsing. +/// IDs that do not parse as a valid ULID fall back to now, treating them as freshly created. fn split_creation_time(split_id: &SplitId) -> SystemTime { - let split_id_str = split_id.as_str(); - let ulid_start = split_id_str.len().saturating_sub(ULID_STR_LEN); - if let Ok(ulid) = Ulid::from_string(&split_id_str[ulid_start..]) { + let s = split_id.as_str(); + // Detect the prefixed format by checking for `_` at byte position 4. + let ulid_str: std::borrow::Cow = if s.len() == 27 && s.as_bytes()[4] == b'_' { + let prefix = &s[..4]; // chars 22-25 of the original ULID + let body = &s[5..]; // chars 0-21 of the original ULID + format!("{body}{prefix}").into() + } else { + s.into() + }; + if let Ok(ulid) = Ulid::from_string(&ulid_str) { ulid.datetime() } else { SystemTime::now() @@ -554,24 +559,23 @@ mod tests { #[test] fn test_split_creation_time() { let ulid_str = "01GF5449X7DA53TK9F9W2ZJST2"; - let expected = Ulid::from_string(ulid_str).unwrap().datetime(); + let ulid = Ulid::from_string(ulid_str).unwrap(); + let expected = ulid.datetime(); // A bare ULID split id (no prefix) resolves to that ULID's creation time. assert_eq!(split_creation_time(&SplitId::from(ulid_str)), expected); - // A split id carrying a random prefix still resolves to its trailing ULID's time. - let prefixed_split_id = SplitId::from(format!("deadbeef{ulid_str}")); + // A prefixed split id (QW_RANDOM_SPLIT_PREFIX format) resolves to the same creation time. + let prefixed_split_id = SplitId::from_ulid(ulid, true); assert_eq!(split_creation_time(&prefixed_split_id), expected); - // An id whose trailing 26 chars are not a valid ULID (or that is shorter than a ULID) - // falls back to ~now, so a malformed split is treated as freshly created rather than as - // the first eviction candidate. - let before = SystemTime::now(); - let invalid = split_creation_time(&SplitId::from("this-is-not-a-valid-ulid!!")); - let short = split_creation_time(&SplitId::from("short")); - let after = SystemTime::now(); - assert!(invalid >= before && invalid <= after); - assert!(short >= before && short <= after); + // An id that does not parse as a valid ULID falls back to ~now, so a malformed split is + // treated as freshly created rather than as the first eviction candidate. + let now = SystemTime::now(); + let date_from_invalid = split_creation_time(&SplitId::from("this-is-not-a-valid-ulid!!")); + let date_from_short = split_creation_time(&SplitId::from("short")); + assert!(date_from_invalid >= now); + assert!(date_from_short >= now); } async fn create_fake_split( diff --git a/quickwit/quickwit-proto/src/types/split_id.rs b/quickwit/quickwit-proto/src/types/split_id.rs index 3c1fdd5199f..e3dbd22cc43 100644 --- a/quickwit/quickwit-proto/src/types/split_id.rs +++ b/quickwit/quickwit-proto/src/types/split_id.rs @@ -61,7 +61,7 @@ impl SplitId { /// /// The prefix is the last 4 characters of the ULID string (pure random bits), /// separated from the remaining 22 characters by `_`. - fn from_ulid(ulid: ulid::Ulid, random_prefix: bool) -> Self { + pub fn from_ulid(ulid: ulid::Ulid, random_prefix: bool) -> Self { let ulid_str = ulid.to_string(); // 26 Crockford base32 chars if random_prefix { // The last 4 characters of a ULID are random bits. From fecf26e410ca02235f09e5db4015bc6db00a0d3c Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 1 Jul 2026 14:14:08 +0200 Subject: [PATCH 4/4] Code review --- quickwit/Cargo.lock | 1 - .../quickwit-indexing/src/actors/indexer.rs | 4 +- .../quickwit-indexing/src/actors/packager.rs | 8 +- .../merge_policy/const_write_amplification.rs | 4 +- .../merge_policy/stable_log_merge_policy.rs | 2 +- .../src/models/indexed_split.rs | 8 +- quickwit/quickwit-indexing/src/source/mod.rs | 5 +- .../src/split_store/indexing_split_cache.rs | 126 +++++++++++------- .../src/split_store/indexing_split_store.rs | 8 +- quickwit/quickwit-proto/src/types/split_id.rs | 7 - quickwit/quickwit-query/Cargo.toml | 1 - 11 files changed, 98 insertions(+), 76 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 7d9ac914da6..3e9f2194b69 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9234,7 +9234,6 @@ dependencies = [ "proptest", "quickwit-common", "quickwit-datetime", - "quickwit-proto", "regex", "rustc-hash", "serde", diff --git a/quickwit/quickwit-indexing/src/actors/indexer.rs b/quickwit/quickwit-indexing/src/actors/indexer.rs index 552362a8e71..52941167016 100644 --- a/quickwit/quickwit-indexing/src/actors/indexer.rs +++ b/quickwit/quickwit-indexing/src/actors/indexer.rs @@ -134,7 +134,7 @@ impl IndexerState { io_controls, )?; info!( - split_id=indexed_split.split_id_str(), + split_id=%indexed_split.split_id(), partition_id=%partition_id, "new-split" ); @@ -665,7 +665,7 @@ impl Indexer { return Ok(()); } let num_splits = splits.len() as u64; - let split_ids = splits.iter().map(|split| split.split_id_str()).join(","); + let split_ids: String = splits.iter().map(|split| split.split_id()).join(","); info!( index=%self.indexer_state.pipeline_id.index_uid, source=self.indexer_state.pipeline_id.source_id.as_str(), diff --git a/quickwit/quickwit-indexing/src/actors/packager.rs b/quickwit/quickwit-indexing/src/actors/packager.rs index 5ac42845ab1..df94e76ff93 100644 --- a/quickwit/quickwit-indexing/src/actors/packager.rs +++ b/quickwit/quickwit-indexing/src/actors/packager.rs @@ -126,7 +126,7 @@ impl Handler for Packager { let split_ids: Vec = batch .splits .iter() - .map(|split| split.split_id_str().to_string()) + .map(|split| split.split_id().to_string()) .collect_vec(); debug!( split_ids=?split_ids, @@ -272,12 +272,12 @@ fn create_packaged_split( tag_fields: &[NamedField], ctx: &ActorContext, ) -> anyhow::Result { - debug!(split_id = split.split_id_str(), "create-packaged-split"); + debug!(split_id = %split.split_id(), "create-packaged-split"); let split_files = list_split_files(segment_metas, &split.split_scratch_directory)?; // Extracts tag values from inverted indexes only when a field cardinality is less // than `MAX_VALUES_PER_TAG_FIELD`. - debug!(split_id = split.split_id_str(), tag_fields =? tag_fields, "extract-tags-values"); + debug!(split_id = %split.split_id(), tag_fields =? tag_fields, "extract-tags-values"); let index_reader = split .index .reader_builder() @@ -307,7 +307,7 @@ fn create_packaged_split( ctx.record_progress(); - debug!(split_id = split.split_id_str(), "build-hotcache"); + debug!(split_id = %split.split_id(), "build-hotcache"); let mut hotcache_bytes = Vec::new(); build_hotcache(split.split_scratch_directory.path(), &mut hotcache_bytes)?; ctx.record_progress(); diff --git a/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs b/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs index 0fc1dcc813b..2ac0fa64f24 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/const_write_amplification.rs @@ -123,7 +123,7 @@ impl ConstWriteAmplificationMergePolicy { splits.sort_by(|left, right| { left.create_timestamp .cmp(&right.create_timestamp) - .then_with(|| left.split_id().cmp(&right.split_id())) + .then_with(|| left.split_id().cmp(right.split_id())) }); let mut merge_operations = Vec::new(); while let Some(merge_op) = @@ -194,7 +194,7 @@ impl MergePolicy for ConstWriteAmplificationMergePolicy { left.create_timestamp .cmp(&right.create_timestamp) .reverse() - .then_with(|| left.split_id().cmp(&right.split_id())) + .then_with(|| left.split_id().cmp(right.split_id())) }); let mut merge_operations = Vec::new(); while merge_operations.len() < self.config.max_finalize_merge_operations { diff --git a/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs b/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs index 878965c500a..43f2e352001 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/stable_log_merge_policy.rs @@ -173,7 +173,7 @@ fn cmp_splits_by_reverse_time_end(left: &SplitMetadata, right: &SplitMetadata) - .reverse() .then_with(|| left.num_docs.cmp(&right.num_docs)) .then_with(|| { - left.split_id().cmp(&right.split_id()) //< for determinism. + left.split_id().cmp(right.split_id()) //< for determinism. }) } diff --git a/quickwit/quickwit-indexing/src/models/indexed_split.rs b/quickwit/quickwit-indexing/src/models/indexed_split.rs index 8b30bada3fb..6d96d223dcf 100644 --- a/quickwit/quickwit-indexing/src/models/indexed_split.rs +++ b/quickwit/quickwit-indexing/src/models/indexed_split.rs @@ -44,8 +44,8 @@ pub struct IndexedSplit { } impl IndexedSplit { - pub fn split_id_str(&self) -> &str { - &self.split_attrs.split_id.as_str() + pub fn split_id(&self) -> &SplitId { + &self.split_attrs.split_id } } @@ -144,8 +144,8 @@ impl IndexedSplitBuilder { self.split_scratch_directory.path() } - pub fn split_id_str(&self) -> &str { - self.split_attrs.split_id.as_str() + pub fn split_id(&self) -> &SplitId { + &self.split_attrs.split_id } } diff --git a/quickwit/quickwit-indexing/src/source/mod.rs b/quickwit/quickwit-indexing/src/source/mod.rs index 90c8f9610e3..94ee76de711 100644 --- a/quickwit/quickwit-indexing/src/source/mod.rs +++ b/quickwit/quickwit-indexing/src/source/mod.rs @@ -762,10 +762,9 @@ mod test_setup_helper { use quickwit_metastore::checkpoint::{IndexCheckpointDelta, PartitionId}; use quickwit_metastore::{CreateIndexRequestExt, SplitMetadata, StageSplitsRequestExt}; use quickwit_proto::metastore::{CreateIndexRequest, PublishSplitsRequest, StageSplitsRequest}; - use quickwit_proto::types::Position; + use quickwit_proto::types::{Position, SplitId}; use super::*; - use crate::new_split_id; pub async fn setup_index( metastore: MetastoreServiceClient, @@ -790,7 +789,7 @@ mod test_setup_helper { if partition_deltas.is_empty() { return index_uid; } - let split_id = new_split_id(); + let split_id = SplitId::new(); let split_metadata = SplitMetadata::for_test(split_id.clone()); let stage_splits_request = StageSplitsRequest::try_from_split_metadata(index_uid.clone(), &split_metadata) diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs index f70bd992010..ebb92977740 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_cache.rs @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; -use std::collections::btree_map::Entry; +use std::collections::{BTreeMap, HashMap}; use std::io; use std::path::{Path, PathBuf}; use std::time::{Duration, SystemTime}; @@ -102,11 +101,12 @@ struct SplitFolder { num_bytes: ByteSize, } +/// Orders `SplitFolder`s for eviction: oldest `created_at` first, ties broken by +/// insertion order (`insertion_seq`) rather than by `split_id`. #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] -struct SplitFolderKey { - // Creation time, recovered once from the trailing ULID of `split_id` at construction time +struct SplitFolderOrderKey { created_at: SystemTime, - split_id: SplitId, + insertion_seq: u64, } impl SplitFolder { @@ -175,11 +175,17 @@ struct InnerSplitCache { struct SplitFolderRegistry { /// Splits owned by the local split store, which reside in the split_store_folder. /// - /// `SplitFolder`s are ordered by creation time (see their `Ord` impl), so iteration order + /// Entries are ordered by `SplitFolderOrderKey` (see its `Ord` impl), so iteration order /// matches creation order and the oldest split is the set's first element. We evict the oldest /// split first. Note this is not an LRU strategy because we do not care about the last access /// time, but we only consider the creation time. - split_folders: BTreeMap, + split_folders: BTreeMap, + /// Index from `split_id` to the key used in `split_folders`, so that a split can be + /// removed by id without recomputing `created_at` (which is not deterministic for split ids + /// that do not parse as a ULID). + key_by_split_id: HashMap, + /// Monotonically increasing counter used as the tiebreaker component of `SplitFolderOrderKey`. + next_insertion_seq: u64, /// The split store quota shared among all indexing split stores. split_store_quota: SplitStoreQuota, } @@ -188,6 +194,8 @@ impl SplitFolderRegistry { pub fn with_quota(split_store_quota: SplitStoreQuota) -> SplitFolderRegistry { SplitFolderRegistry { split_folders: BTreeMap::default(), + key_by_split_id: HashMap::default(), + next_insertion_seq: 0, split_store_quota, } } @@ -195,60 +203,52 @@ impl SplitFolderRegistry { /// Returns an iterator over the split folders sorted by creation time. #[cfg(any(test, feature = "testsuite"))] fn iter(&self) -> impl Iterator + '_ { - self.split_folders - .iter() - .map(|(key, num_bytes)| SplitFolder { - created_at: key.created_at, - split_id: key.split_id.clone(), - num_bytes: *num_bytes, - }) + self.split_folders.values().cloned() } /// Returns whether the element was inserted or was already present fn insert(&mut self, split_folder: SplitFolder) -> bool { - let split_folder_key = SplitFolderKey { + if self.key_by_split_id.contains_key(&split_folder.split_id) { + return false; + } + let split_folder_key = SplitFolderOrderKey { created_at: split_folder.created_at, - split_id: split_folder.split_id.clone(), + insertion_seq: self.next_insertion_seq, }; - if let Entry::Vacant(entry) = self.split_folders.entry(split_folder_key) { - entry.insert(split_folder.num_bytes); - self.split_store_quota.add_split(split_folder.num_bytes); - true - } else { - false - } + self.next_insertion_seq = self.next_insertion_seq.wrapping_add(1); + self.key_by_split_id + .insert(split_folder.split_id.clone(), split_folder_key.clone()); + self.split_store_quota.add_split(split_folder.num_bytes); + self.split_folders.insert(split_folder_key, split_folder); + true } /// Returns the split folder if it was indeed present in the registry. fn remove(&mut self, split_id: &SplitId) -> Option { - // A `SplitFolder` carries its own ordering key, so to find one by split id we build a - // lower-bound probe — `num_bytes` is the last and only unknown component of the key — and - // take the first entry that actually matches the id. `created_at` is derived - // deterministically from the id (see `split_creation_time`). - let created_at = split_creation_time(split_id); - let split_folder_key = SplitFolderKey { - created_at, - split_id: split_id.clone(), - }; - let num_bytes = self.split_folders.remove(&split_folder_key)?; - self.split_store_quota.remove_split(num_bytes); - Some(SplitFolder { - num_bytes, - split_id: split_id.clone(), - created_at, - }) + let split_folder_key = self.key_by_split_id.remove(split_id)?; + let split_folder = self + .split_folders + .remove(&split_folder_key) + .expect("key_by_split_id and split_folders must stay in sync"); + self.split_store_quota.remove_split(split_folder.num_bytes); + Some(split_folder) } - /// Returns the oldest split (oldest in the sense of the creation time). - fn oldest_split(&self) -> Option { - let (key, _num_bytes) = self.split_folders.first_key_value()?; - Some(key.clone()) + /// Returns the creation time of the oldest split in the registry. + fn oldest_split_folder_key(&self) -> Option { + self.split_folders.keys().next().cloned() } /// Removes the oldest split. fn pop_oldest(&mut self) -> Option { - let oldest_split_folder = self.oldest_split()?; - self.remove(&oldest_split_folder.split_id) + let oldest_key = self.oldest_split_folder_key()?; + let split_folder = self + .split_folders + .remove(&oldest_key) + .expect("key was just read from split_folders"); + self.key_by_split_id.remove(&split_folder.split_id); + self.split_store_quota.remove_split(split_folder.num_bytes); + Some(split_folder) } fn quota(&self) -> &SplitStoreQuota { @@ -360,8 +360,8 @@ impl InnerSplitCache { /// Removes all splits that have a creation date older than `limit`. async fn remove_splits_older_than_limit(&mut self, limit: SystemTime) -> io::Result<()> { - while let Some(oldest_split) = self.split_registry.oldest_split() { - if oldest_split.created_at >= limit { + while let Some(oldest_split_folder_key) = self.split_registry.oldest_split_folder_key() { + if oldest_split_folder_key.created_at >= limit { break; } self.evict_one_split().await?; @@ -924,4 +924,36 @@ mod tests { let quota = split_store.inspect_quota().await; assert_eq!(quota.used_num_bytes(), ByteSize(15)); } + + #[tokio::test] + async fn test_store_and_fetch_non_ulid_split_id() { + // `split_creation_time` falls back to `SystemTime::now()` for split ids that do not + // parse as a ULID. Before the fix, the registry recomputed the key at removal time by + // calling `split_creation_time` again, so a split whose id does not parse as a ULID could + // never be found again: the two `SystemTime::now()` calls (at insertion and at removal) + // essentially never return the same value. + let temp_dir_in = tempfile::tempdir().unwrap(); + let split_id: SplitId = SplitId::from("this-is-not-a-valid-ulid!!"); + let cache_dir = tempfile::tempdir().unwrap(); + let quota = SplitStoreQuota::default(); + let local_store = IndexingSplitCache::open(cache_dir.path().to_path_buf(), quota) + .await + .unwrap(); + + let split_dir = temp_dir_in.path().join("scratch"); + tokio::fs::create_dir(&split_dir).await.unwrap(); + assert!( + local_store + .move_into_cache(split_id.clone(), &split_dir) + .await + .unwrap() + ); + + let split_path = local_store + .get_cached_split(&split_id, temp_dir_in.path()) + .await + .unwrap() + .unwrap(); + assert!(split_path.try_exists().unwrap()); + } } diff --git a/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs b/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs index a881b5dc693..df157ce8d29 100644 --- a/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs +++ b/quickwit/quickwit-indexing/src/split_store/indexing_split_store.rs @@ -262,7 +262,7 @@ mod tests { let split_id2: SplitId = SplitId::new(); { - let split1_dir = temp_dir.path().join(&split_id1); + let split1_dir = temp_dir.path().join(split_id1.as_str()); fs::create_dir_all(&split1_dir).await?; let split_metadata1 = create_test_split_metadata(&split_id1); fs::write(split1_dir.join("splitfile"), b"1234").await?; @@ -284,7 +284,7 @@ mod tests { ); } { - let split2_dir = temp_dir.path().join(&split_id2); + let split2_dir = temp_dir.path().join(split_id2.as_str()); fs::create_dir_all(&split2_dir).await?; fs::write(split2_dir.join("splitfile"), b"567").await?; let split_metadata2 = create_test_split_metadata(&split_id2); @@ -354,7 +354,7 @@ mod tests { let split_payload2 = SplitPayloadBuilder::get_split_payload(&[], &[], &[5, 5, 5, 5])?; { - let split_path = temp_dir.path().join(&split_id1); + let split_path = temp_dir.path().join(split_id1.as_str()); fs::create_dir_all(&split_path).await?; fs::write(split_path.join("splitdatafile"), b"hello-world").await?; let split_metadata1 = create_test_split_metadata(&split_id1); @@ -380,7 +380,7 @@ mod tests { ); } { - let split_path = temp_dir.path().join(&split_id2); + let split_path = temp_dir.path().join(split_id2.as_str()); fs::create_dir_all(&split_path).await?; fs::write(split_path.join("splitdatafile2"), b"hello-world2").await?; let split_metadata2 = create_test_split_metadata(&split_id2); diff --git a/quickwit/quickwit-proto/src/types/split_id.rs b/quickwit/quickwit-proto/src/types/split_id.rs index e3dbd22cc43..af8942d17fb 100644 --- a/quickwit/quickwit-proto/src/types/split_id.rs +++ b/quickwit/quickwit-proto/src/types/split_id.rs @@ -15,7 +15,6 @@ use std::borrow::Borrow; use std::convert::Infallible; use std::fmt; -use std::path::Path; use std::str::FromStr; use std::sync::Arc; @@ -81,12 +80,6 @@ impl SplitId { } } -impl AsRef for SplitId { - fn as_ref(&self) -> &Path { - Path::new(self.as_str()) - } -} - // `Debug` delegates to the inner string so a split id renders identically to the // `String` it replaced (e.g. `"01HAV29D4XY3D462FS3D8K5Q2H"`, not // `SplitId("01HAV29D4XY3D462FS3D8K5Q2H")`). diff --git a/quickwit/quickwit-query/Cargo.toml b/quickwit/quickwit-query/Cargo.toml index de9b3dd07ed..d992f75a63c 100644 --- a/quickwit/quickwit-query/Cargo.toml +++ b/quickwit/quickwit-query/Cargo.toml @@ -29,7 +29,6 @@ rustc-hash = { workspace = true } quickwit-common = { workspace = true } quickwit-datetime = { workspace = true } -quickwit-proto = { workspace = true } [dev-dependencies] criterion = { workspace = true }