Skip to content

Commit

Permalink
feat(encryption): add key manager (#1706)
Browse files Browse the repository at this point in the history
This patch implements KeyProvider to manage encryption key from KMS. The KeyProvider
generates encryption key, IV and key version from KMS at the first launch of Replica
Server. And stores them in a file in a shared directory. After then, the process posts
them to KMS to decrypt key. The key is used to encrypt and decrypt data in Replica
Server.

A new config [pegasus.server]hadoop_kms_url has been introduced to provide the KMS URLs.

```diff
[pegasus.server]
+ hadoop_kms_url =
```
  • Loading branch information
Samunroyu authored Feb 1, 2024
1 parent 40c00db commit 8fd6a6e
Show file tree
Hide file tree
Showing 73 changed files with 928 additions and 13 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/lint_and_test_cpp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ jobs:
- pegasus_unit_test
- recovery_test
- restore_test
- security_test
- throttle_test
needs: build_Release
runs-on: ubuntu-latest
Expand Down Expand Up @@ -235,6 +236,7 @@ jobs:
- pegasus_unit_test
- recovery_test
- restore_test
- security_test
# TODO(yingchun): Disable it because we find it's too flaky, we will re-enable it after
# it has been optimized.
# - throttle_test
Expand Down Expand Up @@ -319,6 +321,7 @@ jobs:
# - pegasus_unit_test
# - recovery_test
# - restore_test
# - security_test
# - throttle_test
# needs: build_UBSAN
# runs-on: ubuntu-latest
Expand Down
3 changes: 3 additions & 0 deletions src/replica/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ set(MY_PROJ_SRC
set(MY_SRC_SEARCH_MODE "GLOB")

set(MY_PROJ_LIBS
absl::strings
dsn_replication_common
dsn.failure_detector
dsn.block_service
Expand All @@ -66,9 +67,11 @@ set(MY_PROJ_LIBS
dsn_nfs
dsn_dist_cmd
dsn_http
curl
dsn_runtime
dsn_aio
dsn_meta_server
dsn_security
rocksdb
lz4
zstd
Expand Down
38 changes: 38 additions & 0 deletions src/replica/kms_key_provider.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <string>

#include "replica/kms_key_provider.h"
#include "utils/errors.h"

namespace dsn {
namespace security {

dsn::error_s kms_key_provider::DecryptEncryptionKey(const dsn::replication::kms_info &info,
std::string *decrypted_key)
{
return _client.DecryptEncryptionKey(info, decrypted_key);
}

dsn::error_s kms_key_provider::GenerateEncryptionKey(dsn::replication::kms_info *info)
{
return _client.GenerateEncryptionKey(info);
}

} // namespace security
} // namespace dsn
56 changes: 56 additions & 0 deletions src/replica/kms_key_provider.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <string>
#include <utility>
#include <vector>

#include "security/kms_client.h"
#include "utils/errors.h"

namespace dsn {
namespace replication {
struct kms_info;
} // namespace replication

namespace security {
// This class generates EEK IV KV from KMS (a.k.a Key Management Service) and retrieves DEK from
// KMS.
class kms_key_provider
{
public:
~kms_key_provider() {}

kms_key_provider(const std::vector<std::string> &kms_url, std::string cluster_key_name)
: _client(kms_url, std::move(cluster_key_name))
{
}

// Decrypt the encryption key in 'kms_info' via KMS. The 'decrypted_key' will be a hex string.
dsn::error_s DecryptEncryptionKey(const dsn::replication::kms_info &info,
std::string *decrypted_key);

// Generate an encryption key from KMS.
dsn::error_s GenerateEncryptionKey(dsn::replication::kms_info *info);

private:
kms_client _client;
};
} // namespace security
} // namespace dsn
87 changes: 84 additions & 3 deletions src/replica/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* THE SOFTWARE.
*/

#include <absl/strings/str_split.h>
#include <boost/algorithm/string/replace.hpp>
// IWYU pragma: no_include <ext/alloc_traits.h>
#include <fmt/core.h>
Expand Down Expand Up @@ -54,23 +55,26 @@
#include "mutation_log.h"
#include "nfs/nfs_node.h"
#include "nfs_types.h"
#include "ranger/access_type.h"
#include "replica.h"
#include "replica/duplication/replica_follower.h"
#include "replica/kms_key_provider.h"
#include "replica/replica_context.h"
#include "replica/replica_stub.h"
#include "replica/replication_app_base.h"
#include "replica_disk_migrator.h"
#include "replica_stub.h"
#include "runtime/api_layer1.h"
#include "ranger/access_type.h"
#include "runtime/rpc/rpc_message.h"
#include "runtime/rpc/serialization.h"
#include "security/access_controller.h"
#include "runtime/task/async_calls.h"
#include "security/access_controller.h"
#include "split/replica_split_manager.h"
#include "utils/command_manager.h"
#include "utils/env.h"
#include "utils/errors.h"
#include "utils/filesystem.h"
#include "utils/fmt_logging.h"
#include "utils/load_dump_object.h"
#include "utils/ports.h"
#include "utils/process_utils.h"
#include "utils/rand.h"
Expand Down Expand Up @@ -214,7 +218,15 @@ METRIC_DEFINE_gauge_int64(server,
dsn::metric_unit::kBytes,
"The max size of copied files among all splitting replicas");

DSN_DECLARE_bool(encrypt_data_at_rest);
DSN_DECLARE_string(server_key);

namespace dsn {
DSN_DECLARE_string(cluster_name);

namespace security {
DSN_DECLARE_bool(enable_acl);
}
namespace replication {
DSN_DEFINE_bool(replication,
deny_client_on_start,
Expand Down Expand Up @@ -283,12 +295,39 @@ DSN_DEFINE_int32(
"if tcmalloc reserved but not-used memory exceed this percentage of application allocated "
"memory, replica server will release the exceeding memory back to operating system");

DSN_DEFINE_string(
pegasus.server,
hadoop_kms_url,
"",
"Provide the comma-separated list of URLs from which to retrieve the "
"file system's server key. Example format: 'hostname1:1234/kms,hostname2:1234/kms'.");

DSN_DECLARE_bool(duplication_enabled);
DSN_DECLARE_int32(fd_beacon_interval_seconds);
DSN_DECLARE_int32(fd_check_interval_seconds);
DSN_DECLARE_int32(fd_grace_seconds);
DSN_DECLARE_int32(fd_lease_seconds);
DSN_DECLARE_int32(gc_interval_ms);
DSN_DECLARE_string(data_dirs);
DSN_DEFINE_group_validator(encrypt_data_at_rest_pre_check, [](std::string &message) -> bool {
if (!dsn::security::FLAGS_enable_acl && FLAGS_encrypt_data_at_rest) {
message = fmt::format("[pegasus.server] encrypt_data_at_rest should be enabled only if "
"[security] enable_acl is enabled.");
return false;
}
return true;
});

DSN_DEFINE_group_validator(encrypt_data_at_rest_with_kms_url, [](std::string &message) -> bool {
#ifndef MOCK_TEST
if (FLAGS_encrypt_data_at_rest && utils::is_empty(FLAGS_hadoop_kms_url)) {
message = fmt::format("[security] hadoop_kms_url should not be empty when [pegasus.server] "
"encrypt_data_at_rest is enabled.");
return false;
}
#endif
return true;
});

bool replica_stub::s_not_exit_on_log_failure = false;

Expand Down Expand Up @@ -380,9 +419,51 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
}
}

const auto &kms_path =
utils::filesystem::path_combine(_options.data_dirs[0], kms_info::kKmsInfo);
// FLAGS_data_dirs may be empty when load configuration, use LOG_FATAL instead of group
// validator.
if (!FLAGS_encrypt_data_at_rest && utils::filesystem::path_exists(kms_path)) {
LOG_FATAL("The kms_info file exists at ({}), but [pegasus.server] "
"encrypt_data_at_rest is enbale."
"Encryption in Pegasus is irreversible after its initial activation.",
kms_path);
}

dsn::replication::kms_info kms_info;
if (FLAGS_encrypt_data_at_rest && !utils::is_empty(FLAGS_hadoop_kms_url)) {
_key_provider.reset(new dsn::security::kms_key_provider(
::absl::StrSplit(FLAGS_hadoop_kms_url, ",", ::absl::SkipEmpty()), FLAGS_cluster_name));
const auto &ec = dsn::utils::load_rjobj_from_file(
kms_path, dsn::utils::FileDataType::kNonSensitive, &kms_info);
if (ec != dsn::ERR_PATH_NOT_FOUND && ec != dsn::ERR_OK) {
CHECK_EQ_MSG(dsn::ERR_OK, ec, "Can't load kms key from kms-info file");
}
// Upon the first launch, the encryption key should be empty. The process will then retrieve
// EEK, IV, and KV from KMS.
// After the first launch, the encryption key, obtained from the kms-info file, should not
// be empty. The process will then acquire the DEK from KMS.
if (ec == dsn::ERR_PATH_NOT_FOUND) {
LOG_WARNING("It's normal to encounter a temporary inability to open the kms-info file "
"during the first process launch.");
CHECK_OK(_key_provider->GenerateEncryptionKey(&kms_info),
"Generate encryption key from kms failed");
}
CHECK_OK(_key_provider->DecryptEncryptionKey(kms_info, &_server_key),
"Get decryption key failed from {}",
kms_path);
FLAGS_server_key = _server_key.c_str();
}

// Initialize the file system manager.
_fs_manager.initialize(_options.data_dirs, _options.data_dir_tags);

if (_key_provider && !utils::filesystem::path_exists(kms_path)) {
const auto &err = dsn::utils::dump_rjobj_to_file(
kms_info, dsn::utils::FileDataType::kNonSensitive, kms_path);
CHECK_EQ_MSG(dsn::ERR_OK, err, "Can't store kms key to kms-info file");
}

// Check slog is not exist.
auto full_slog_path = fmt::format("{}/replica/slog/", _options.slog_dir);
if (utils::filesystem::directory_exists(full_slog_path)) {
Expand Down
7 changes: 7 additions & 0 deletions src/replica/replica_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ namespace dsn {
class command_deregister;
class message_ex;
class nfs_node;
namespace security {
class kms_key_provider;
} // namespace security

namespace service {
class copy_request;
Expand Down Expand Up @@ -459,6 +462,7 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter

std::unique_ptr<duplication_sync_timer> _duplication_sync_timer;
std::unique_ptr<replica_backup_server> _backup_server;
std::unique_ptr<dsn::security::kms_key_provider> _key_provider;

// command_handlers
std::vector<std::unique_ptr<command_deregister>> _cmds;
Expand Down Expand Up @@ -490,6 +494,9 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
// replica count executing emergency checkpoint concurrently
std::atomic_int _manual_emergency_checkpointing_count;

// replica decrypted key for rocksdb
std::string _server_key;

bool _is_running;

std::unique_ptr<dsn::security::access_controller> _access_controller;
Expand Down
1 change: 1 addition & 0 deletions src/replica/replication_app_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ namespace dsn {
namespace replication {

const std::string replica_init_info::kInitInfo = ".init-info";
const std::string kms_info::kKmsInfo = ".kms-info";

std::string replica_init_info::to_string()
{
Expand Down
18 changes: 18 additions & 0 deletions src/replica/replication_app_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ class replica_app_info
error_code store(const std::string &fname);
};

// This class stores and loads EEK, IV, and KV from KMS as a JSON file.
// To get the decrypted key, should POST EEK, IV, and KV to KMS.
struct kms_info
{
std::string encrypted_key; // a.k.a encrypted encryption key
std::string initialization_vector; // a.k.a initialization vector
std::string key_version; // a.k.a key version
DEFINE_JSON_SERIALIZATION(encrypted_key, initialization_vector, key_version)
static const std::string kKmsInfo; // json file name

kms_info(const std::string &e_key = "",
const std::string &i = "",
const std::string &k_version = "")
: encrypted_key(e_key), initialization_vector(i), key_version(k_version)
{
}
};

/// The store engine interface of Pegasus.
/// Inherited by pegasus::pegasus_server_impl
/// Inherited by apps::rrdb_service
Expand Down
5 changes: 5 additions & 0 deletions src/replica/storage/simple_kv/test/case-000.ini
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ server_list = localhost:34601
[pegasus.server]
encrypt_data_at_rest = false

[security]
enable_acl =
super_users =
meta_acl_rpc_allow_list =

[replication.app]
app_name = simple_kv.instance0
app_type = simple_kv
Expand Down
5 changes: 5 additions & 0 deletions src/replica/storage/simple_kv/test/case-001.ini
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ server_list = localhost:34601
[pegasus.server]
encrypt_data_at_rest = false

[security]
enable_acl =
super_users =
meta_acl_rpc_allow_list =

[replication.app]
app_name = simple_kv.instance0
app_type = simple_kv
Expand Down
5 changes: 5 additions & 0 deletions src/replica/storage/simple_kv/test/case-002.ini
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ server_list = localhost:34601
[pegasus.server]
encrypt_data_at_rest = false

[security]
enable_acl =
super_users =
meta_acl_rpc_allow_list =

[replication.app]
app_name = simple_kv.instance0
app_type = simple_kv
Expand Down
5 changes: 5 additions & 0 deletions src/replica/storage/simple_kv/test/case-003.ini
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ server_list = localhost:34601
[pegasus.server]
encrypt_data_at_rest = false

[security]
enable_acl =
super_users =
meta_acl_rpc_allow_list =

[replication.app]
app_name = simple_kv.instance0
app_type = simple_kv
Expand Down
Loading

0 comments on commit 8fd6a6e

Please sign in to comment.