Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add class FragmentID. #4607

Merged
merged 4 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion format_spec/timestamped_name.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ _Note_: The presence of `[]` is indicative of an optional parameter.
| :-: | :-: | :-: |
| 1 - 2 | 1.4 - 1.5 | `__uuid_t1[_t2]` |
| 3 - 4 | 1.6 - 1.7 | `__t1_t2_uuid` |
| 5+ | 2.0+ | `__t1_t2_uuid_[v]` |
| 5+ | 2.0+ | `__t1_t2_uuid[_v]` |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment about inconsistent/missing unit test below.

8 changes: 4 additions & 4 deletions test/src/unit-average-cell-size.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2022 TileDB Inc.
* @copyright Copyright (c) 2022-2024 TileDB Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -38,6 +38,7 @@
#include "tiledb/sm/cpp_api/tiledb"
#include "tiledb/sm/cpp_api/tiledb_experimental"
#include "tiledb/sm/enums/encryption_type.h"
#include "tiledb/sm/fragment/fragment_identifier.h"
#include "tiledb/sm/misc/constants.h"
#include "tiledb/sm/misc/utils.h"

Expand Down Expand Up @@ -166,9 +167,8 @@ struct CPPAverageCellSizeFx {
REQUIRE(query.submit() == Query::Status::COMPLETE);

auto uri = sm::URI(query.fragment_uri(0));
std::pair<uint64_t, uint64_t> timestamps;
REQUIRE(sm::utils::parse::get_timestamp_range(uri, &timestamps).ok());
return {uri, timestamps};
sm::FragmentID fragment_id{uri};
return {uri, fragment_id.timestamp_range()};
}

/**
Expand Down
15 changes: 6 additions & 9 deletions test/src/unit-capi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2017-2023 TileDB Inc.
* @copyright Copyright (c) 2017-2024 TileDB Inc.
* @copyright Copyright (c) 2016 MIT and Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -53,10 +53,10 @@
#include "tiledb/sm/cpp_api/tiledb"
#include "tiledb/sm/enums/encryption_type.h"
#include "tiledb/sm/enums/serialization_type.h"
#include "tiledb/sm/fragment/fragment_identifier.h"
#include "tiledb/sm/global_state/unit_test_config.h"
#include "tiledb/sm/serialization/array.h"
#include "tiledb/sm/serialization/fragments.h"
#include "tiledb/storage_format/uri/parse_uri.h"

#include <chrono>
#include <climits>
Expand Down Expand Up @@ -142,13 +142,10 @@ void ArrayFx::remove_temp_dir(const std::string& path) {

int ArrayFx::get_fragment_timestamps(const char* path, void* data) {
auto data_vec = (std::vector<uint64_t>*)data;
std::pair<uint64_t, uint64_t> timestamp_range;
if (tiledb::sm::utils::parse::ends_with(
path, tiledb::sm::constants::write_file_suffix)) {
auto uri = tiledb::sm::URI(path);
if (tiledb::sm::utils::parse::get_timestamp_range(uri, &timestamp_range)
.ok())
data_vec->push_back(timestamp_range.first);
if (utils::parse::ends_with(path, constants::write_file_suffix)) {
FragmentID fragment_id{path};
auto timestamp_range{fragment_id.timestamp_range()};
data_vec->push_back(timestamp_range.first);
}

return 1;
Expand Down
3 changes: 2 additions & 1 deletion tiledb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# The MIT License
#
# Copyright (c) 2017-2023 TileDB, Inc.
# Copyright (c) 2017-2024 TileDB, Inc.
# Copyright (c) 2016 MIT and Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -221,6 +221,7 @@ set(TILEDB_CORE_SOURCES
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/webp_filter.cc
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/noop_filter.cc
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/filter/positive_delta_filter.cc
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_identifier.cc
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_info.cc
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/fragment/fragment_metadata.cc
${TILEDB_CORE_INCLUDE_DIR}/tiledb/sm/global_state/global_state.cc
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# The MIT License
#
# Copyright (c) 2021 TileDB, Inc.
# Copyright (c) 2021-2024 TileDB, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -35,6 +35,7 @@ add_subdirectory(consolidator)
add_subdirectory(crypto)
add_subdirectory(filesystem)
add_subdirectory(filter)
add_subdirectory(fragment)
add_subdirectory(group)
add_subdirectory(metadata)
add_subdirectory(misc)
Expand Down
4 changes: 2 additions & 2 deletions tiledb/sm/array/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# The MIT License
#
# Copyright (c) 2022-2023 TileDB, Inc.
# Copyright (c) 2022-2024 TileDB, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -35,8 +35,8 @@ commence(object_library array)
this_target_object_libraries(
array_schema
baseline
fragment
generic_tile_io
uri_format
vfs
)
if(TILEDB_STATS)
Expand Down
76 changes: 29 additions & 47 deletions tiledb/sm/array/array_directory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -535,28 +535,24 @@ URI ArrayDirectory::get_commits_dir(uint32_t write_version) const {
}

URI ArrayDirectory::get_commit_uri(const URI& fragment_uri) const {
auto name = fragment_uri.remove_trailing_slash().last_path_part();
auto fragment_version = utils::parse::get_fragment_version(name);

if (fragment_version < 12) {
FragmentID fragment_id{fragment_uri};
if (fragment_id.array_format_version() < 12) {
return URI(fragment_uri.to_string() + constants::ok_file_suffix);
}

auto temp_uri =
uri_.join_path(constants::array_commits_dir_name).join_path(name);
auto temp_uri = uri_.join_path(constants::array_commits_dir_name)
.join_path(fragment_id.name());
return URI(temp_uri.to_string() + constants::write_file_suffix);
}

URI ArrayDirectory::get_vacuum_uri(const URI& fragment_uri) const {
auto name = fragment_uri.remove_trailing_slash().last_path_part();
auto fragment_version = utils::parse::get_fragment_version(name);

if (fragment_version < 12) {
FragmentID fragment_id{fragment_uri};
if (fragment_id.array_format_version() < 12) {
return URI(fragment_uri.to_string() + constants::vacuum_file_suffix);
}

auto temp_uri =
uri_.join_path(constants::array_commits_dir_name).join_path(name);
auto temp_uri = uri_.join_path(constants::array_commits_dir_name)
.join_path(fragment_id.name());
return URI(temp_uri.to_string() + constants::vacuum_file_suffix);
}

Expand Down Expand Up @@ -678,11 +674,8 @@ ArrayDirectory::load_commits_dir_uris_v12_or_higher(
stdx::string::ends_with(
commits_dir_uris[i].to_string(), constants::update_file_suffix)) {
// Get the start and end timestamp for this delete/update
std::pair<uint64_t, uint64_t> timestamp_range;
RETURN_NOT_OK_TUPLE(
utils::parse::get_timestamp_range(
commits_dir_uris[i], &timestamp_range),
nullopt);
FragmentID fragment_id{commits_dir_uris[i]};
auto timestamp_range{fragment_id.timestamp_range()};

// Add the delete tile location if it overlaps the open start/end times
if (timestamps_overlap(timestamp_range, false)) {
Expand Down Expand Up @@ -772,12 +765,8 @@ ArrayDirectory::load_consolidated_commit_uris(
auto pos = ss.tellg();

// Get the start and end timestamp for this delete
std::pair<uint64_t, uint64_t> delete_timestamp_range;
RETURN_NOT_OK_TUPLE(
utils::parse::get_timestamp_range(
URI(condition_marker), &delete_timestamp_range),
nullopt,
nullopt);
FragmentID fragment_id{URI(condition_marker)};
auto delete_timestamp_range{fragment_id.timestamp_range()};

// Add the delete tile location if it overlaps the open start/end
// times
Expand Down Expand Up @@ -1028,9 +1017,8 @@ ArrayDirectory::compute_uris_to_vacuum(
auto& uri = uris[i];

// Get the start and end timestamp for this fragment
std::pair<uint64_t, uint64_t> fragment_timestamp_range;
RETURN_NOT_OK(
utils::parse::get_timestamp_range(uri, &fragment_timestamp_range));
FragmentID fragment_id{uri};
auto fragment_timestamp_range{fragment_id.timestamp_range()};
if (is_vacuum_file(uri)) {
vac_file_bitmap[i] = 1;
if (timestamps_overlap(
Expand Down Expand Up @@ -1164,8 +1152,8 @@ ArrayDirectory::compute_filtered_uris(
}

// Get the start and end timestamp for this fragment
RETURN_NOT_OK(utils::parse::get_timestamp_range(
uri, &fragment_timestamp_ranges[i]));
FragmentID fragment_id{uri};
fragment_timestamp_ranges[i] = fragment_id.timestamp_range();
if (timestamps_overlap(
fragment_timestamp_ranges[i],
!full_overlap_only &&
Expand Down Expand Up @@ -1238,16 +1226,13 @@ URI ArrayDirectory::select_latest_array_schema_uri() {
uint64_t latest_ts = 0;

for (auto& uri : array_schema_uris_) {
auto name = uri.remove_trailing_slash().last_path_part();

FragmentID fragment_id{uri};
// Skip the old schema URI name since it doesn't have timestamps
if (name == constants::array_schema_filename) {
if (fragment_id.name() == constants::array_schema_filename) {
continue;
}

std::pair<uint64_t, uint64_t> ts_range;
throw_if_not_ok(utils::parse::get_timestamp_range(uri, &ts_range));

auto ts_range{fragment_id.timestamp_range()};
if (ts_range.second > latest_ts && ts_range.second <= timestamp_end_) {
latest_uri = uri;
latest_ts = ts_range.second;
Expand All @@ -1270,7 +1255,8 @@ Status ArrayDirectory::is_fragment(
const std::unordered_set<std::string>& consolidated_uris_set,
int* is_fragment) const {
// If the URI name has a suffix, then it is not a fragment
auto name = uri.remove_trailing_slash().last_path_part();
FragmentID fragment_id{uri};
auto name = fragment_id.name();
if (name.find_first_of('.') != std::string::npos) {
*is_fragment = 0;
return Status::Ok();
Expand Down Expand Up @@ -1299,10 +1285,9 @@ Status ArrayDirectory::is_fragment(
return Status::Ok();
}

// If the format version is >= 5, then the above suffices to check if
// If the array format version is >= 5, then the above suffices to check if
// the URI is indeed a fragment
auto fragment_version = utils::parse::get_fragment_version(name);
if (fragment_version >= 5) {
if (fragment_id.array_format_version() >= 5) {
*is_fragment = false;
return Status::Ok();
}
Expand All @@ -1317,15 +1302,12 @@ Status ArrayDirectory::is_fragment(

bool ArrayDirectory::consolidation_with_timestamps_supported(
const URI& uri) const {
// Get the fragment version from the uri
auto name = uri.remove_trailing_slash().last_path_part();
auto fragment_version = utils::parse::get_fragment_version(name);

// get_fragment_version returns UINT32_MAX for versions <= 2 so we should
// explicitly exclude this case when checking if consolidation with timestamps
// is supported on a fragment
// FragmentID::array_format_version() returns UINT32_MAX for versions <= 2
// so we should explicitly exclude this case when checking if consolidation
// with timestamps is supported on a fragment
FragmentID fragment_id{uri};
return mode_ == ArrayDirectoryMode::READ &&
fragment_version >=
fragment_id.array_format_version() >=
constants::consolidation_with_timestamps_min_version;
}

Expand Down
12 changes: 4 additions & 8 deletions tiledb/sm/array/array_directory.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -39,9 +39,9 @@
#include "tiledb/sm/array_schema/array_schema.h"
#include "tiledb/sm/filesystem/uri.h"
#include "tiledb/sm/filesystem/vfs.h"
#include "tiledb/sm/fragment/fragment_identifier.h"
#include "tiledb/sm/stats/stats.h"
#include "tiledb/sm/storage_manager/context_resources.h"
#include "tiledb/storage_format/uri/parse_uri.h"

#include <functional>
#include <unordered_map>
Expand Down Expand Up @@ -221,12 +221,8 @@ class ArrayDirectory {
: uri_(uri)
, condition_marker_(condition_marker)
, offset_(offset) {
std::pair<uint64_t, uint64_t> timestamps;
if (!utils::parse::get_timestamp_range(URI(condition_marker), &timestamps)
.ok()) {
throw std::logic_error("Error parsing uri.");
}

FragmentID fragment_id{condition_marker};
auto timestamps{fragment_id.timestamp_range()};
timestamp_ = timestamps.first;
}

Expand Down
4 changes: 2 additions & 2 deletions tiledb/sm/array_schema/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# The MIT License
#
# Copyright (c) 2021-2023 TileDB, Inc.
# Copyright (c) 2021-2024 TileDB, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -71,7 +71,7 @@ conclude(object_library)
commence(object_library array_schema)
this_target_sources(array_schema.cc dimension_label.cc)
this_target_object_libraries(
attribute domain enumeration time uri_format vfs)
attribute domain enumeration fragment time uri_format vfs)
conclude(object_library)

# This is linked outside the object_library scope because ContextResources
Expand Down
8 changes: 4 additions & 4 deletions tiledb/sm/array_schema/array_schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
* @copyright Copyright (c) 2016 MIT and Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -49,12 +49,12 @@
#include "tiledb/sm/enums/layout.h"
#include "tiledb/sm/filter/compression_filter.h"
#include "tiledb/sm/filter/webp_filter.h"
#include "tiledb/sm/fragment/fragment_identifier.h"
#include "tiledb/sm/misc/hilbert.h"
#include "tiledb/sm/misc/integral_type_casts.h"
#include "tiledb/sm/misc/tdb_time.h"
#include "tiledb/sm/tile/generic_tile_io.h"
#include "tiledb/storage_format/uri/generate_uri.h"
#include "tiledb/storage_format/uri/parse_uri.h"
#include "tiledb/type/apply_with_type.h"

#include <algorithm>
Expand Down Expand Up @@ -1386,8 +1386,8 @@ ArraySchema ArraySchema::deserialize(
}

// Populate timestamp range
std::pair<uint64_t, uint64_t> timestamp_range;
throw_if_not_ok(utils::parse::get_timestamp_range(uri, &timestamp_range));
FragmentID fragment_id{uri};
auto timestamp_range{fragment_id.timestamp_range()};

// Set schema name
std::string name = uri.last_path_part();
Expand Down
Loading
Loading