diff --git a/documentation/sphinx/source/bulkdump.rst b/documentation/sphinx/source/bulkdump.rst index 42bb98c40d8..aadc0e5d07a 100644 --- a/documentation/sphinx/source/bulkdump.rst +++ b/documentation/sphinx/source/bulkdump.rst @@ -69,7 +69,7 @@ ManagementAPI provides following interfaces to do the operations: 1. Submit a job: submitBulkDumpJob(BulkDumpState job); // For generating the input job metadata, see the point 4. 2. Clear a job: clearBulkDumpJob(); 3. Enable the feature: setBulkDumpMode(int mode); // Set mode = 1 to enable; Set mode = 0 to disable. -4. BulkDump job metadata is generated by newBulkDumpTaskLocalSST(KeyRange range, std::string remoteRoot); // Will include more APIs to generate the metadata as the funcationality expands (sp of functionality). +4. BulkDump job metadata is generated by newBulkDumpJobLocalSST(KeyRange range, std::string remoteRoot); // Will include more APIs to generate the metadata as the funcationality expands (sp of functionality). Mechanisms ========== diff --git a/fdbcli/BulkDumpCommand.actor.cpp b/fdbcli/BulkDumpCommand.actor.cpp index 1328cc34754..deaf30fb8c9 100644 --- a/fdbcli/BulkDumpCommand.actor.cpp +++ b/fdbcli/BulkDumpCommand.actor.cpp @@ -21,7 +21,7 @@ #include #include #include "fdbcli/fdbcli.actor.h" -#include "fdbclient/BulkDumping.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/IClientApi.h" #include "fdbclient/ManagementAPI.actor.h" #include "flow/Arena.h" @@ -108,7 +108,7 @@ ACTOR Future bulkDumpCommandActor(Reference clust } std::string remoteRoot = tokens[4].toString(); KeyRange range = Standalone(KeyRangeRef(rangeBegin, rangeEnd)); - state BulkDumpState bulkDumpJob = newBulkDumpTaskLocalSST(range, remoteRoot); + state BulkDumpState bulkDumpJob = newBulkDumpJobLocalSST(range, remoteRoot); wait(submitBulkDumpJob(cx, bulkDumpJob)); return bulkDumpJob.getJobId(); diff --git a/fdbcli/BulkLoadCommand.actor.cpp b/fdbcli/BulkLoadCommand.actor.cpp index 5b01837163a..cad9c734cd3 100644 --- a/fdbcli/BulkLoadCommand.actor.cpp +++ b/fdbcli/BulkLoadCommand.actor.cpp @@ -25,38 +25,39 @@ #include "fdbclient/IClientApi.h" #include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/BulkLoading.h" +#include "fdbclient/BulkLoadAndDump.h" #include "flow/Arena.h" #include "flow/FastRef.h" +#include "flow/IRandom.h" #include "flow/ThreadHelper.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. namespace fdb_cli { -ACTOR Future getBulkLoadStateByRange(Database cx, - KeyRange rangeToRead, - size_t countLimit, - Optional phase) { +ACTOR Future getBulkLoadTaskStateByRange(Database cx, + KeyRange rangeToRead, + size_t countLimit, + Optional phase) { try { - std::vector res = wait(getValidBulkLoadTasksWithinRange(cx, rangeToRead, countLimit, phase)); + std::vector res = wait(getValidBulkLoadTasksWithinRange(cx, rangeToRead, countLimit, phase)); int64_t finishCount = 0; int64_t unfinishedCount = 0; - for (const auto& bulkLoadState : res) { - if (bulkLoadState.phase == BulkLoadPhase::Complete) { - fmt::println("[Complete]: {}", bulkLoadState.toString()); + for (const auto& bulkLoadTaskState : res) { + if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Complete) { + fmt::println("[Complete]: {}", bulkLoadTaskState.toString()); ++finishCount; - } else if (bulkLoadState.phase == BulkLoadPhase::Running) { - fmt::println("[Running]: {}", bulkLoadState.toString()); + } else if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Running) { + fmt::println("[Running]: {}", bulkLoadTaskState.toString()); ++unfinishedCount; - } else if (bulkLoadState.phase == BulkLoadPhase::Triggered) { - fmt::println("[Triggered]: {}", bulkLoadState.toString()); + } else if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Triggered) { + fmt::println("[Triggered]: {}", bulkLoadTaskState.toString()); ++unfinishedCount; - } else if (bulkLoadState.phase == BulkLoadPhase::Submitted) { - fmt::println("[Submitted] {}", bulkLoadState.toString()); + } else if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Submitted) { + fmt::println("[Submitted] {}", bulkLoadTaskState.toString()); ++unfinishedCount; - } else if (bulkLoadState.phase == BulkLoadPhase::Acknowledged) { - fmt::println("[Acknowledge] {}", bulkLoadState.toString()); + } else if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Acknowledged) { + fmt::println("[Acknowledge] {}", bulkLoadTaskState.toString()); ++finishCount; } else { UNREACHABLE(); @@ -128,7 +129,7 @@ ACTOR Future bulkLoadCommandActor(Reference clust std::string byteSampleFile = tokens[6].toString(); // TODO(BulkLoad): reject if the input bytes sampling file is // not same as the configuration as FDB cluster KeyRange range = Standalone(KeyRangeRef(rangeBegin, rangeEnd)); - state BulkLoadState bulkLoadTask = newBulkLoadTaskLocalSST(range, folder, dataFile, byteSampleFile); + state BulkLoadTaskState bulkLoadTask = newBulkLoadTaskLocalSST(UID(), range, folder, dataFile, byteSampleFile); wait(submitBulkLoadTask(cx, bulkLoadTask)); return bulkLoadTask.getTaskId(); @@ -148,25 +149,25 @@ ACTOR Future bulkLoadCommandActor(Reference clust } KeyRange range = Standalone(KeyRangeRef(rangeBegin, rangeEnd)); std::string inputPhase = tokens[4].toString(); - Optional phase; + Optional phase; if (inputPhase == "all") { - phase = Optional(); + phase = Optional(); } else if (inputPhase == "submitted") { - phase = BulkLoadPhase::Submitted; + phase = BulkLoadTaskPhase::Submitted; } else if (inputPhase == "triggered") { - phase = BulkLoadPhase::Triggered; + phase = BulkLoadTaskPhase::Triggered; } else if (inputPhase == "running") { - phase = BulkLoadPhase::Running; + phase = BulkLoadTaskPhase::Running; } else if (inputPhase == "complete") { - phase = BulkLoadPhase::Complete; + phase = BulkLoadTaskPhase::Complete; } else if (inputPhase == "acknowledged") { - phase = BulkLoadPhase::Acknowledged; + phase = BulkLoadTaskPhase::Acknowledged; } else { printUsage(tokens[0]); return UID(); } int countLimit = std::stoi(tokens[5].toString()); - wait(getBulkLoadStateByRange(cx, range, countLimit, phase)); + wait(getBulkLoadTaskStateByRange(cx, range, countLimit, phase)); return UID(); } else { diff --git a/fdbclient/BulkDumping.cpp b/fdbclient/BulkDumping.cpp deleted file mode 100644 index 8bedf5c6a17..00000000000 --- a/fdbclient/BulkDumping.cpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * BulkDumping.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/BulkDumping.h" - -BulkDumpState newBulkDumpTaskLocalSST(const KeyRange& range, const std::string& remoteRoot) { - return BulkDumpState( - range, BulkDumpFileType::SST, BulkDumpTransportMethod::CP, BulkDumpExportMethod::File, remoteRoot); -} diff --git a/fdbclient/BulkLoadAndDump.cpp b/fdbclient/BulkLoadAndDump.cpp new file mode 100644 index 00000000000..bbdf96a700c --- /dev/null +++ b/fdbclient/BulkLoadAndDump.cpp @@ -0,0 +1,110 @@ +/* + * BulkLoadAndDump.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/BulkLoadAndDump.h" +#include "flow/Platform.h" + +std::string stringRemovePrefix(std::string str, const std::string& prefix) { + if (str.compare(0, prefix.length(), prefix) == 0) { + str.erase(0, prefix.length()); + } else { + return ""; + } + return str; +} + +// A revert function of StringRef.toFullHexStringPlain() +Key getKeyFromHexString(const std::string& rawString) { + if (rawString.empty()) { + return Key(); + } + std::vector byteList; + ASSERT((rawString.size() + 1) % 3 == 0); + for (size_t i = 0; i < rawString.size(); i += 3) { + std::string byteString = rawString.substr(i, 2); + uint8_t byte = static_cast(std::stoul(byteString, nullptr, 16)); + byteList.push_back(byte); + ASSERT(i + 2 >= rawString.size() || rawString[i + 2] == ' '); + } + return Standalone(StringRef(byteList.data(), byteList.size())); +} + +std::string generateBulkLoadJobManifestFileName(const UID& jobId) { + return jobId.toString() + "-job-manifest.txt"; +} + +std::pair generateBulkLoadFileSetting(Version version, + const std::string& relativeFolder, + const std::string& rootLocal, + const std::string& rootRemote) { + // Generate file names based on data version + const std::string manifestFileName = std::to_string(version) + "-manifest.txt"; + const std::string dataFileName = std::to_string(version) + "-data.sst"; + const std::string byteSampleFileName = std::to_string(version) + "-sample.sst"; + BulkLoadFileSet fileSetLocal(rootLocal, relativeFolder, manifestFileName, dataFileName, byteSampleFileName); + BulkLoadFileSet fileSetRemote(rootRemote, relativeFolder, manifestFileName, dataFileName, byteSampleFileName); + return std::make_pair(fileSetLocal, fileSetRemote); +} + +std::string generateBulkLoadJobRoot(const std::string& root, const UID& jobId) { + return joinPath(root, jobId.toString()); +} + +std::string generateBulkLoadJobManifestFileContent(const std::map& manifests) { + std::string root = ""; + std::string manifestList; + for (const auto& [beginKey, manifest] : manifests) { + if (root.empty()) { + root = manifest.fileSet.rootPath; + } else { + ASSERT(manifest.fileSet.rootPath == root); + } + manifestList = + manifestList + + BulkDumpJobManifestEntry(manifest.getBeginKey(), + manifest.getEndKey(), + joinPath(manifest.fileSet.relativePath, manifest.fileSet.manifestFileName), + manifest.version, + manifest.bytes) + .toString() + + "\n"; + } + std::string head = BulkDumpJobManifestHeader(manifests.size(), root).toString() + "\n"; + return head + manifestList; +} + +BulkLoadTaskState newBulkLoadTaskLocalSST(UID jobID, + KeyRange range, + std::string folder, + std::string dataFile, + std::string bytesSampleFile) { + std::unordered_set dataFiles; + dataFiles.insert(dataFile); + return BulkLoadTaskState( + range, BulkLoadFileType::SST, BulkLoadTransportMethod::CP, folder, dataFiles, bytesSampleFile, jobID); +} + +BulkLoadJobState newBulkLoadJobLocalSST(const UID& jobId, const KeyRange& range, const std::string& remoteRoot) { + return BulkLoadJobState(jobId, remoteRoot, range, BulkLoadTransportMethod::CP); +} + +BulkDumpState newBulkDumpJobLocalSST(const KeyRange& range, const std::string& remoteRoot) { + return BulkDumpState(range, BulkLoadFileType::SST, BulkLoadTransportMethod::CP, remoteRoot); +} diff --git a/fdbclient/BulkLoading.cpp b/fdbclient/BulkLoading.cpp deleted file mode 100644 index 7de239afe51..00000000000 --- a/fdbclient/BulkLoading.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * BulkLoading.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/BulkLoading.h" - -BulkLoadState newBulkLoadTaskLocalSST(KeyRange range, - std::string folder, - std::string dataFile, - std::string bytesSampleFile) { - std::unordered_set dataFiles; - dataFiles.insert(dataFile); - return BulkLoadState(range, - BulkLoadType::SST, - BulkLoadTransportMethod::CP, - BulkLoadInjectMethod::File, - folder, - dataFiles, - bytesSampleFile); -} diff --git a/fdbclient/ManagementAPI.actor.cpp b/fdbclient/ManagementAPI.actor.cpp index 29ca57289f5..1bddcb2c36a 100644 --- a/fdbclient/ManagementAPI.actor.cpp +++ b/fdbclient/ManagementAPI.actor.cpp @@ -23,7 +23,7 @@ #include #include -#include "fdbclient/BulkDumping.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/GenericManagementAPI.actor.h" #include "fdbclient/RangeLock.h" #include "flow/Error.h" @@ -2829,16 +2829,15 @@ ACTOR Future setBulkLoadMode(Database cx, int mode) { } } -ACTOR Future> getValidBulkLoadTasksWithinRange( - Database cx, - KeyRange rangeToRead, - size_t limit = 10, - Optional phase = Optional()) { +ACTOR Future> getValidBulkLoadTasksWithinRange(Database cx, + KeyRange rangeToRead, + size_t limit, + Optional phase) { state Transaction tr(cx); state Key readBegin = rangeToRead.begin; state Key readEnd = rangeToRead.end; state RangeResult rangeResult; - state std::vector res; + state std::vector res; while (readBegin < readEnd) { state int retryCount = 0; loop { @@ -2848,7 +2847,7 @@ ACTOR Future> getValidBulkLoadTasksWithinRange( tr.setOption(FDBTransactionOptions::LOCK_AWARE); wait(store(rangeResult, krmGetRanges(&tr, - bulkLoadPrefix, + bulkLoadTaskPrefix, KeyRangeRef(readBegin, readEnd), CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES))); @@ -2865,14 +2864,14 @@ ACTOR Future> getValidBulkLoadTasksWithinRange( if (rangeResult[i].value.empty()) { continue; } - BulkLoadState bulkLoadState = decodeBulkLoadState(rangeResult[i].value); + BulkLoadTaskState bulkLoadTaskState = decodeBulkLoadTaskState(rangeResult[i].value); KeyRange range = Standalone(KeyRangeRef(rangeResult[i].key, rangeResult[i + 1].key)); - if (range != bulkLoadState.getRange()) { - ASSERT(bulkLoadState.getRange().contains(range)); + if (range != bulkLoadTaskState.getRange()) { + ASSERT(bulkLoadTaskState.getRange().contains(range)); continue; } - if (!phase.present() || phase.get() == bulkLoadState.phase) { - res.push_back(bulkLoadState); + if (!phase.present() || phase.get() == bulkLoadTaskState.phase) { + res.push_back(bulkLoadTaskState); } if (res.size() >= limit) { return res; @@ -2885,31 +2884,36 @@ ACTOR Future> getValidBulkLoadTasksWithinRange( } // Submit bulkload task and overwrite any existing task and lock range -ACTOR Future submitBulkLoadTask(Database cx, BulkLoadState bulkLoadTask) { +ACTOR Future setBulkLoadSubmissionTransaction(Transaction* tr, BulkLoadTaskState bulkLoadTask) { + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + if (bulkLoadTask.phase != BulkLoadTaskPhase::Submitted) { + TraceEvent(g_network->isSimulated() ? SevError : SevWarnAlways, "SubmitBulkLoadTaskError") + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Reason", "WrongPhase") + .detail("Task", bulkLoadTask.toString()); + throw bulkload_task_failed(); + } + if (!normalKeys.contains(bulkLoadTask.getRange())) { + TraceEvent(g_network->isSimulated() ? SevError : SevWarnAlways, "SubmitBulkLoadTaskError") + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Reason", "RangeOutOfScope") + .detail("Task", bulkLoadTask.toString()); + throw bulkload_task_failed(); + } + wait(turnOffUserWriteTrafficForBulkLoad(tr, bulkLoadTask.getRange())); + bulkLoadTask.submitTime = now(); + wait(krmSetRange(tr, bulkLoadTaskPrefix, bulkLoadTask.getRange(), bulkLoadTaskStateValue(bulkLoadTask))); + return Void(); +} + +ACTOR Future submitBulkLoadTask(Database cx, BulkLoadTaskState bulkLoadTask) { state Transaction tr(cx); loop { try { - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - if (bulkLoadTask.phase != BulkLoadPhase::Submitted) { - TraceEvent(g_network->isSimulated() ? SevError : SevWarnAlways, "SubmitBulkLoadTaskError") - .setMaxEventLength(-1) - .setMaxFieldLength(-1) - .detail("Reason", "WrongPhase") - .detail("Task", bulkLoadTask.toString()); - throw bulkload_task_failed(); - } - if (!normalKeys.contains(bulkLoadTask.getRange())) { - TraceEvent(g_network->isSimulated() ? SevError : SevWarnAlways, "SubmitBulkLoadTaskError") - .setMaxEventLength(-1) - .setMaxFieldLength(-1) - .detail("Reason", "RangeOutOfScope") - .detail("Task", bulkLoadTask.toString()); - throw bulkload_task_failed(); - } - wait(turnOffUserWriteTrafficForBulkLoad(&tr, bulkLoadTask.getRange())); - bulkLoadTask.submitTime = now(); - wait(krmSetRange(&tr, bulkLoadPrefix, bulkLoadTask.getRange(), bulkLoadStateValue(bulkLoadTask))); + wait(setBulkLoadSubmissionTransaction(&tr, bulkLoadTask)); wait(tr.commit()); break; } catch (Error& e) { @@ -2921,52 +2925,200 @@ ACTOR Future submitBulkLoadTask(Database cx, BulkLoadState bulkLoadTask) { // Get bulk load task metadata with range and taskId and phase selector // Throw error if the task is outdated or the task is not in any input phase at the tr read version -ACTOR Future getBulkLoadTask(Transaction* tr, - KeyRange range, - UID taskId, - std::vector phases) { - state BulkLoadState bulkLoadState; +ACTOR Future getBulkLoadTask(Transaction* tr, + KeyRange range, + UID taskId, + std::vector phases) { + state BulkLoadTaskState bulkLoadTaskState; tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); - RangeResult result = wait(krmGetRanges(tr, bulkLoadPrefix, range)); + RangeResult result = wait(krmGetRanges(tr, bulkLoadTaskPrefix, range)); if (result.size() > 2) { throw bulkload_task_outdated(); } else if (result[0].value.empty()) { throw bulkload_task_outdated(); } ASSERT(result.size() == 2); - bulkLoadState = decodeBulkLoadState(result[0].value); - ASSERT(bulkLoadState.getTaskId().isValid()); - if (taskId != bulkLoadState.getTaskId()) { + bulkLoadTaskState = decodeBulkLoadTaskState(result[0].value); + ASSERT(bulkLoadTaskState.getTaskId().isValid()); + if (taskId != bulkLoadTaskState.getTaskId()) { // This task is overwritten by a newer task throw bulkload_task_outdated(); } KeyRange currentRange = KeyRangeRef(result[0].key, result[1].key); - if (bulkLoadState.getRange() != currentRange) { + if (bulkLoadTaskState.getRange() != currentRange) { // This task is partially overwritten by a newer task - ASSERT(bulkLoadState.getRange().contains(currentRange)); + ASSERT(bulkLoadTaskState.getRange().contains(currentRange)); throw bulkload_task_outdated(); } - if (phases.size() > 0 && !bulkLoadState.onAnyPhase(phases)) { + if (phases.size() > 0 && !bulkLoadTaskState.onAnyPhase(phases)) { throw bulkload_task_outdated(); } - return bulkLoadState; + return bulkLoadTaskState; +} + +ACTOR Future setBulkLoadAcknowledgeTransaction(Transaction* tr, KeyRange range, UID taskId) { + state BulkLoadTaskState bulkLoadTaskState; + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + wait(store(bulkLoadTaskState, + getBulkLoadTask(tr, range, taskId, { BulkLoadTaskPhase::Complete, BulkLoadTaskPhase::Acknowledged }))); + bulkLoadTaskState.phase = BulkLoadTaskPhase::Acknowledged; + ASSERT(range == bulkLoadTaskState.getRange() && taskId == bulkLoadTaskState.getTaskId()); + ASSERT(normalKeys.contains(range)); + wait(krmSetRange(tr, bulkLoadTaskPrefix, bulkLoadTaskState.getRange(), bulkLoadTaskStateValue(bulkLoadTaskState))); + wait(turnOnUserWriteTrafficForBulkLoad(tr, bulkLoadTaskState.getRange())); + return Void(); } // Update bulkload task to acknowledge state and unlock the range ACTOR Future acknowledgeBulkLoadTask(Database cx, KeyRange range, UID taskId) { state Transaction tr(cx); loop { - state BulkLoadState bulkLoadState; + state BulkLoadTaskState bulkLoadTaskState; try { - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - wait(store(bulkLoadState, - getBulkLoadTask(&tr, range, taskId, { BulkLoadPhase::Complete, BulkLoadPhase::Acknowledged }))); - bulkLoadState.phase = BulkLoadPhase::Acknowledged; - ASSERT(range == bulkLoadState.getRange() && taskId == bulkLoadState.getTaskId()); - ASSERT(normalKeys.contains(range)); - wait(krmSetRange(&tr, bulkLoadPrefix, bulkLoadState.getRange(), bulkLoadStateValue(bulkLoadState))); - wait(turnOnUserWriteTrafficForBulkLoad(&tr, bulkLoadState.getRange())); + wait(setBulkLoadAcknowledgeTransaction(&tr, range, taskId)); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return Void(); +} + +ACTOR Future> getOngoingBulkLoadJob(Database cx) { + state RangeResult rangeResult; + state Transaction tr(cx); + loop { + try { + // At most one job at a time, so looking at the first returned range is sufficient + wait(store(rangeResult, + krmGetRanges(&tr, + bulkLoadJobPrefix, + normalKeys, + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES))); + for (int i = 0; i < rangeResult.size() - 1; i++) { + if (rangeResult[i].value.empty()) { + continue; + } + BulkLoadJobState job = decodeBulkLoadJobState(rangeResult[i].value); + KeyRange jobRange = job.getRange(); + ASSERT(!jobRange.empty() && jobRange.begin == rangeResult[i].key && + jobRange.end == rangeResult[i + 1].key); + return job; + } + return Optional(); + } catch (Error& e) { + wait(tr.onError(e)); + } + } +} + +ACTOR Future anyBulkLoadJobAlive(Transaction* tr) { + state RangeResult rangeResult; + // At most one job at a time, so looking at the first returned range is sufficient + wait(store(rangeResult, + krmGetRanges(tr, + bulkLoadJobPrefix, + normalKeys, + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES))); + ASSERT(rangeResult.size() >= 2); + ASSERT(rangeResult[0].key == normalKeys.begin); + if (rangeResult.size() > 2) { + return true; + } else if (rangeResult[1].key != normalKeys.end) { + return true; + } else if (!rangeResult[0].value.empty()) { + return true; + } + return false; +} + +// Define the rule of updating the bulkdump restore phase +// Return true if inputJob should update according to the phase, otherwise, return false +// Throw bulkload_task_outdated error if the input job is outdated +ACTOR Future bulkLoadJobMetadataUpdateCheck(Transaction* tr, BulkLoadJobState inputJob) { + state RangeResult rangeResult; + wait(store(rangeResult, + krmGetRanges(tr, + bulkLoadJobPrefix, + inputJob.getRange(), + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES))); + ASSERT(rangeResult.size() >= 2); + if (rangeResult.size() > 2) { + throw bulkload_task_outdated(); + } + if (rangeResult[0].value.empty()) { + if (inputJob.getPhase() == BulkLoadJobPhase::Submitted) { + return true; + } else { + throw bulkload_task_outdated(); + } + } + BulkLoadJobState currentJob = decodeBulkLoadJobState(rangeResult[0].value); + ASSERT(currentJob.getPhase() != BulkLoadJobPhase::Invalid && inputJob.getPhase() != BulkLoadJobPhase::Invalid); + ASSERT(currentJob.getPhase() == BulkLoadJobPhase::Submitted || + currentJob.getRange() == KeyRangeRef(rangeResult[0].key, rangeResult[1].key)); + // TODO(BulkDump): has restore jobId, clear new jobId at each time + if (currentJob.getJobId() != inputJob.getJobId()) { + throw bulkload_task_outdated(); + } else if (currentJob.getRange() != inputJob.getRange() && currentJob.getPhase() != BulkLoadJobPhase::Submitted) { + throw bulkload_task_outdated(); + } + if (currentJob.getPhase() == BulkLoadJobPhase::Complete) { + return false; + } else if (currentJob.getPhase() == BulkLoadJobPhase::Submitted) { + if (inputJob.getPhase() == BulkLoadJobPhase::Submitted) { + return false; + } + } else if (currentJob.getPhase() == BulkLoadJobPhase::Triggered) { + if (inputJob.getPhase() == BulkLoadJobPhase::Submitted || inputJob.getPhase() == BulkLoadJobPhase::Triggered) { + return false; + } + } + return true; +} + +ACTOR Future updateBulkLoadJobMetadata(Transaction* tr, BulkLoadJobState jobState) { + bool doUpdate = wait(bulkLoadJobMetadataUpdateCheck(tr, jobState)); + if (!doUpdate) { + return false; + } + wait(krmSetRange(tr, bulkLoadJobPrefix, jobState.getRange(), bulkLoadJobValue(jobState))); + return true; +} + +ACTOR Future submitBulkLoadJob(Database cx, BulkLoadJobState jobState) { + ASSERT(jobState.getPhase() == BulkLoadJobPhase::Submitted); + state Transaction tr(cx); + loop { + try { + // There is at most one bulkdump restore job at a time globally + bool anyAlive = wait(anyBulkLoadJobAlive(&tr)); + if (anyAlive) { + return false; + } + bool doUpdate = wait(updateBulkLoadJobMetadata(&tr, jobState)); + if (!doUpdate) { + return false; + } + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return true; +} + +ACTOR Future clearBulkLoadJob(Database cx) { + state Transaction tr(cx); + loop { + try { + tr.clear(bulkLoadJobKeys); wait(tr.commit()); break; } catch (Error& e) { diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index 5b5d87069f0..5024b5c4a7b 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -1196,18 +1196,31 @@ const UID dataDistributionModeLock = UID(6345, 3425); // Bulk loading keys const KeyRef bulkLoadModeKey = "\xff/bulkLoadMode"_sr; -const KeyRangeRef bulkLoadKeys = KeyRangeRef("\xff/bulkLoad/"_sr, "\xff/bulkLoad0"_sr); -const KeyRef bulkLoadPrefix = bulkLoadKeys.begin; +const KeyRangeRef bulkLoadTaskKeys = KeyRangeRef("\xff/bulkLoadTask/"_sr, "\xff/bulkLoadTask0"_sr); +const KeyRef bulkLoadTaskPrefix = bulkLoadTaskKeys.begin; +const KeyRangeRef bulkLoadJobKeys = KeyRangeRef("\xff/bulkLoadJob/"_sr, "\xff/bulkLoadJob0"_sr); +const KeyRef bulkLoadJobPrefix = bulkLoadJobKeys.begin; -const Value bulkLoadStateValue(const BulkLoadState& bulkLoadState) { - return ObjectWriter::toValue(bulkLoadState, IncludeVersion()); +const Value bulkLoadTaskStateValue(const BulkLoadTaskState& bulkLoadTaskState) { + return ObjectWriter::toValue(bulkLoadTaskState, IncludeVersion()); } -BulkLoadState decodeBulkLoadState(const ValueRef& value) { - BulkLoadState bulkLoadState; +BulkLoadTaskState decodeBulkLoadTaskState(const ValueRef& value) { + BulkLoadTaskState bulkLoadTaskState; ObjectReader reader(value.begin(), IncludeVersion()); - reader.deserialize(bulkLoadState); - return bulkLoadState; + reader.deserialize(bulkLoadTaskState); + return bulkLoadTaskState; +} + +const Value bulkLoadJobValue(const BulkLoadJobState& bulkLoadJobState) { + return ObjectWriter::toValue(bulkLoadJobState, IncludeVersion()); +} + +BulkLoadJobState decodeBulkLoadJobState(const ValueRef& value) { + BulkLoadJobState bulkLoadJobState; + ObjectReader reader(value.begin(), IncludeVersion()); + reader.deserialize(bulkLoadJobState); + return bulkLoadJobState; } // Bulk dumping keys diff --git a/fdbclient/include/fdbclient/BulkDumping.h b/fdbclient/include/fdbclient/BulkDumping.h deleted file mode 100644 index cc44f171033..00000000000 --- a/fdbclient/include/fdbclient/BulkDumping.h +++ /dev/null @@ -1,374 +0,0 @@ -/* - * BulkDump.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FDBCLIENT_BULKDUMPING_H -#define FDBCLIENT_BULKDUMPING_H -#include "flow/Trace.h" -#pragma once - -#include "fdbclient/FDBTypes.h" -#include "fdbrpc/fdbrpc.h" - -// Define the configuration of bytes sampling -// Use for setting manifest file -struct ByteSampleSetting { - constexpr static FileIdentifier file_identifier = 1384500; - - ByteSampleSetting() = default; - - ByteSampleSetting(int version, const std::string& method, int factor, int overhead, double minimalProbability) - : version(version), method(method), factor(factor), overhead(overhead), minimalProbability(minimalProbability) { - ASSERT(isValid()); - } - - bool isValid() const { - if (method.size() == 0) { - return false; - } - return true; - } - - std::string toString() const { - return "[ByteSampleVersion]: " + std::to_string(version) + ", [ByteSampleMethod]: " + method + - ", [ByteSampleFactor]: " + std::to_string(factor) + - ", [ByteSampleOverhead]: " + std::to_string(overhead) + - ", [ByteSampleMinimalProbability]: " + std::to_string(minimalProbability); - } - - template - void serialize(Ar& ar) { - serializer(ar, version, method, factor, overhead, minimalProbability); - } - - int version = 0; - std::string method = ""; - int factor = 0; - int overhead = 0; - double minimalProbability = 0.0; -}; - -// Definition of bulkdump files metadata -struct BulkDumpFileSet { - constexpr static FileIdentifier file_identifier = 1384501; - - BulkDumpFileSet() = default; - - BulkDumpFileSet(const std::string& rootPath, - const std::string& relativePath, - const std::string& manifestFileName, - const std::string& dataFileName, - const std::string& byteSampleFileName) - : rootPath(rootPath), relativePath(relativePath), manifestFileName(manifestFileName), dataFileName(dataFileName), - byteSampleFileName(byteSampleFileName) { - if (!isValid()) { - TraceEvent(SevError, "BulkDumpFileSetInvalid").detail("Content", toString()); - ASSERT(false); - } - } - - bool isValid() const { - if (rootPath.empty()) { - ASSERT(false); - return false; - } - if (relativePath.empty()) { - ASSERT(false); - return false; - } - if (manifestFileName.empty()) { - ASSERT(false); - return false; - } - if (dataFileName.empty() && !byteSampleFileName.empty()) { - ASSERT(false); - return false; - } - return true; - } - - std::string toString() const { - return "[RootPath]: " + rootPath + ", [RelativePath]: " + relativePath + - ", [ManifestFileName]: " + manifestFileName + ", [DataFileName]: " + dataFileName + - ", [ByteSampleFileName]: " + byteSampleFileName; - } - - template - void serialize(Ar& ar) { - serializer(ar, rootPath, relativePath, manifestFileName, dataFileName, byteSampleFileName); - } - - std::string rootPath = ""; - std::string relativePath = ""; - std::string manifestFileName = ""; - std::string dataFileName = ""; - std::string byteSampleFileName = ""; -}; - -struct BulkDumpFileFullPathSet { - BulkDumpFileFullPathSet(const BulkDumpFileSet& fileSet) { - folder = joinPath(fileSet.rootPath, fileSet.relativePath); - dataFilePath = joinPath(folder, fileSet.dataFileName); - byteSampleFilePath = joinPath(folder, fileSet.byteSampleFileName); - manifestFilePath = joinPath(folder, fileSet.manifestFileName); - } - std::string folder = ""; - std::string dataFilePath = ""; - std::string byteSampleFilePath = ""; - std::string manifestFilePath = ""; -}; - -// Define the metadata of bulkdump manifest file -// The file is uploaded along with the data files -struct BulkDumpManifest { - constexpr static FileIdentifier file_identifier = 1384502; - - BulkDumpManifest() = default; - - BulkDumpManifest(const BulkDumpFileSet& fileSet, - const Key& beginKey, - const Key& endKey, - const Version& version, - const std::string& checksum, - int64_t bytes, - const ByteSampleSetting& byteSampleSetting) - : fileSet(fileSet), beginKey(beginKey), endKey(endKey), version(version), checksum(checksum), bytes(bytes), - byteSampleSetting(byteSampleSetting) { - ASSERT(isValid()); - } - - bool isValid() const { - if (beginKey >= endKey) { - return false; - } - if (!fileSet.isValid()) { - return false; - } - if (!byteSampleSetting.isValid()) { - return false; - } - return true; - } - - std::string getBeginKeyString() const { return beginKey.toFullHexStringPlain(); } - - std::string getEndKeyString() const { return endKey.toFullHexStringPlain(); } - - // Generating human readable string to stored in the manifest file - std::string toString() const { - return fileSet.toString() + ", [BeginKey]: " + getBeginKeyString() + ", [EndKey]: " + getEndKeyString() + - ", [Version]: " + std::to_string(version) + ", [Checksum]: " + checksum + - ", [Bytes]: " + std::to_string(bytes) + ", " + byteSampleSetting.toString(); - } - - template - void serialize(Ar& ar) { - serializer(ar, fileSet, beginKey, endKey, version, checksum, bytes, byteSampleSetting); - } - - BulkDumpFileSet fileSet; - Key beginKey; - Key endKey; - Version version; - std::string checksum; - int64_t bytes; - ByteSampleSetting byteSampleSetting; -}; - -enum class BulkDumpPhase : uint8_t { - Invalid = 0, - Submitted = 1, - Complete = 2, -}; - -enum class BulkDumpFileType : uint8_t { - Invalid = 0, - SST = 1, -}; - -enum class BulkDumpTransportMethod : uint8_t { - Invalid = 0, - CP = 1, -}; - -enum class BulkDumpExportMethod : uint8_t { - Invalid = 0, - File = 1, -}; - -// Definition of bulkdump metadata -struct BulkDumpState { - constexpr static FileIdentifier file_identifier = 1384498; - - BulkDumpState() = default; - - // The only public interface to create a valid task - // This constructor is call when users submitting a task, e.g. by newBulkDumpTaskLocalSST() - BulkDumpState(KeyRange range, - BulkDumpFileType fileType, - BulkDumpTransportMethod transportMethod, - BulkDumpExportMethod exportMethod, - std::string remoteRoot) - : jobId(deterministicRandom()->randomUniqueID()), range(range), fileType(fileType), - transportMethod(transportMethod), exportMethod(exportMethod), remoteRoot(remoteRoot), - phase(BulkDumpPhase::Submitted) { - ASSERT(isValid()); - } - - bool operator==(const BulkDumpState& rhs) const { - return jobId == rhs.jobId && taskId == rhs.taskId && range == rhs.range && remoteRoot == rhs.remoteRoot; - } - - std::string toString() const { - std::string res = "BulkDumpState: [Range]: " + Traceable::toString(range) + - ", [FileType]: " + std::to_string(static_cast(fileType)) + - ", [TransportMethod]: " + std::to_string(static_cast(transportMethod)) + - ", [ExportMethod]: " + std::to_string(static_cast(exportMethod)) + - ", [Phase]: " + std::to_string(static_cast(phase)) + - ", [RemoteRoot]: " + remoteRoot + ", [JobId]: " + jobId.toString(); - if (taskId.present()) { - res = res + ", [TaskId]: " + taskId.get().toString(); - } - if (version.present()) { - res = res + ", [Version]: " + std::to_string(version.get()); - } - if (bulkDumpManifest.present()) { - res = res + ", [BulkDumpManifest]: " + bulkDumpManifest.get().toString(); - } - return res; - } - - KeyRange getRange() const { return range; } - - UID getJobId() const { return jobId; } - - Optional getTaskId() const { return taskId; } - - std::string getRemoteRoot() const { return remoteRoot; } - - BulkDumpPhase getPhase() const { return phase; } - - BulkDumpTransportMethod getTransportMethod() const { return transportMethod; } - - bool isValid() const { - if (!jobId.isValid()) { - return false; - } - if (taskId.present() && !taskId.get().isValid()) { - return false; - } - if (range.empty()) { - return false; - } - if (transportMethod == BulkDumpTransportMethod::Invalid) { - return false; - } else if (transportMethod != BulkDumpTransportMethod::CP) { - throw not_implemented(); - } - if (exportMethod == BulkDumpExportMethod::Invalid) { - return false; - } else if (exportMethod != BulkDumpExportMethod::File) { - throw not_implemented(); - } - if (remoteRoot.empty()) { - return false; - } - return true; - } - - // The user job spawns a series of ranges tasks based on shard boundary to cover the user task range. - // Those spawned tasks are executed by SSes. - // Return metadata of the task. - BulkDumpState getRangeTaskState(const KeyRange& taskRange) { - ASSERT(range.contains(taskRange)); - BulkDumpState res = *this; // the task inherits configuration from the job - UID newTaskId; - // Guarantee to have a brand new taskId for the new spawned task - int retryCount = 0; - while (true) { - newTaskId = deterministicRandom()->randomUniqueID(); - if (!res.taskId.present() || res.taskId.get() != newTaskId) { - break; - } - retryCount++; - if (retryCount > 50) { - TraceEvent(SevError, "GetRangeTaskStateRetryTooManyTimes").detail("TaskRange", taskRange); - throw bulkdump_task_failed(); - } - } - res.taskId = newTaskId; - res.range = taskRange; - return res; - } - - // Generate a metadata with Complete state. - BulkDumpState getRangeCompleteState(const KeyRange& completeRange, const BulkDumpManifest& bulkDumpManifest) { - ASSERT(range.contains(completeRange)); - ASSERT(bulkDumpManifest.isValid()); - ASSERT(taskId.present() && taskId.get().isValid()); - BulkDumpState res = *this; - res.phase = BulkDumpPhase::Complete; - res.bulkDumpManifest = bulkDumpManifest; - res.range = completeRange; - return res; - } - - Optional getManifest() const { return bulkDumpManifest; } - - template - void serialize(Ar& ar) { - serializer(ar, - jobId, - range, - fileType, - transportMethod, - exportMethod, - remoteRoot, - phase, - taskId, - version, - bulkDumpManifest); - } - -private: - UID jobId; // The unique identifier of a job. Set by user. Any task spawned by the job shares the same jobId and - // configuration. - - // File dump config: - KeyRange range; // Dump the key-value within this range "[begin, end)" from data file - BulkDumpFileType fileType = BulkDumpFileType::Invalid; - BulkDumpTransportMethod transportMethod = BulkDumpTransportMethod::Invalid; - BulkDumpExportMethod exportMethod = BulkDumpExportMethod::Invalid; - std::string remoteRoot; // remoteRoot is the root string to where the data is set to be uploaded - - // Task dynamics: - BulkDumpPhase phase = BulkDumpPhase::Invalid; - Optional taskId; // The unique identifier of a task. Any SS can do a task. If a task is failed, this remaining - // part of the task can be picked up by any SS with a changed taskId. - Optional version; - Optional bulkDumpManifest; // Resulting remote bulkDumpManifest after the dumping task completes -}; - -// User API to create bulkDump task metadata -// The dumped data is within the input range -// The data is dumped to the input remoteRoot -// The remoteRoot can be either a local root or a remote blobstore root string -BulkDumpState newBulkDumpTaskLocalSST(const KeyRange& range, const std::string& remoteRoot); - -#endif diff --git a/fdbclient/include/fdbclient/BulkLoadAndDump.h b/fdbclient/include/fdbclient/BulkLoadAndDump.h new file mode 100644 index 00000000000..cf4bb7d557f --- /dev/null +++ b/fdbclient/include/fdbclient/BulkLoadAndDump.h @@ -0,0 +1,800 @@ +/* + * BulkLoadAndDump.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FDBCLIENT_BULKLOADANDDUMP_H +#define FDBCLIENT_BULKLOADANDDUMP_H +#include "flow/Arena.h" +#pragma once + +#include "fdbclient/FDBTypes.h" +#include "fdbrpc/fdbrpc.h" + +std::string stringRemovePrefix(std::string str, const std::string& prefix); + +Key getKeyFromHexString(const std::string& rawString); + +// For configuring bulk load and dump mechanism +enum class BulkLoadFileType : uint8_t { + Invalid = 0, + SST = 1, +}; + +enum class BulkLoadTransportMethod : uint8_t { + Invalid = 0, + CP = 1, +}; + +// Define the configuration of bytes sampling +// Use when setting manifest file +struct BulkLoadByteSampleSetting { + constexpr static FileIdentifier file_identifier = 1384500; + + BulkLoadByteSampleSetting() = default; + + BulkLoadByteSampleSetting(int version, + const std::string& method, + int factor, + int overhead, + double minimalProbability) + : version(version), method(method), factor(factor), overhead(overhead), minimalProbability(minimalProbability) { + ASSERT(isValid()); + } + + bool isValid() const { + if (method.size() == 0) { + return false; + } + return true; + } + + std::string toString() const { + return "[ByteSampleVersion]: " + std::to_string(version) + ", [ByteSampleMethod]: " + method + + ", [ByteSampleFactor]: " + std::to_string(factor) + + ", [ByteSampleOverhead]: " + std::to_string(overhead) + + ", [ByteSampleMinimalProbability]: " + std::to_string(minimalProbability); + } + + template + void serialize(Ar& ar) { + serializer(ar, version, method, factor, overhead, minimalProbability); + } + + int version = 0; + std::string method = ""; + int factor = 0; + int overhead = 0; + double minimalProbability = 0.0; +}; + +// Definition of bulkload files metadata +struct BulkLoadFileSet { + constexpr static FileIdentifier file_identifier = 1384501; + + BulkLoadFileSet() = default; + + BulkLoadFileSet(const std::string& rootPath, + const std::string& relativePath, + const std::string& manifestFileName, + const std::string& dataFileName, + const std::string& byteSampleFileName) + : rootPath(rootPath), relativePath(relativePath), manifestFileName(manifestFileName), dataFileName(dataFileName), + byteSampleFileName(byteSampleFileName) { + ASSERT(isValid()); + } + + bool isValid() const { + if (rootPath.empty()) { + return false; + } + if (relativePath.empty()) { + return false; + } + if (manifestFileName.empty()) { + return false; + } + if (dataFileName.empty() && !byteSampleFileName.empty()) { + return false; + } + return true; + } + + std::string toString() const { + return "[RootPath]: " + rootPath + ", [RelativePath]: " + relativePath + + ", [ManifestFileName]: " + manifestFileName + ", [DataFileName]: " + dataFileName + + ", [ByteSampleFileName]: " + byteSampleFileName; + } + + template + void serialize(Ar& ar) { + serializer(ar, rootPath, relativePath, manifestFileName, dataFileName, byteSampleFileName); + } + + std::string rootPath = ""; + std::string relativePath = ""; + std::string manifestFileName = ""; + std::string dataFileName = ""; + std::string byteSampleFileName = ""; +}; + +struct BulkDumpFileFullPathSet { + BulkDumpFileFullPathSet(const BulkLoadFileSet& fileSet) { + folder = joinPath(fileSet.rootPath, fileSet.relativePath); + manifestFilePath = joinPath(folder, fileSet.manifestFileName); + dataFilePath = joinPath(folder, fileSet.dataFileName); + byteSampleFilePath = joinPath(folder, fileSet.byteSampleFileName); + } + std::string folder = ""; + std::string manifestFilePath = ""; + std::string dataFilePath = ""; + std::string byteSampleFilePath = ""; +}; + +// Define the metadata of bulkload manifest file +// The file is uploaded along with the data files +struct BulkLoadManifest { + constexpr static FileIdentifier file_identifier = 1384502; + + BulkLoadManifest() = default; + + // Used when dumping + BulkLoadManifest(const BulkLoadFileSet& fileSet, + const Key& beginKey, + const Key& endKey, + const Version& version, + const std::string& checksum, + int64_t bytes, + const BulkLoadByteSampleSetting& byteSampleSetting) + : fileSet(fileSet), beginKey(beginKey), endKey(endKey), version(version), checksum(checksum), bytes(bytes), + byteSampleSetting(byteSampleSetting) { + ASSERT(isValid()); + } + + // Used when loading + BulkLoadManifest(const std::string& rawString) { + std::vector parts = splitString(rawString, ", "); + ASSERT(parts.size() == 15); + std::string rootPath = stringRemovePrefix(parts[0], "[RootPath]: "); + std::string relativePath = stringRemovePrefix(parts[1], "[RelativePath]: "); + std::string manifestFileName = stringRemovePrefix(parts[2], "[ManifestFileName]: "); + std::string dataFileName = stringRemovePrefix(parts[3], "[DataFileName]: "); + std::string byteSampleFileName = stringRemovePrefix(parts[4], "[ByteSampleFileName]: "); + fileSet = BulkLoadFileSet(rootPath, relativePath, manifestFileName, dataFileName, byteSampleFileName); + beginKey = getKeyFromHexString(stringRemovePrefix(parts[5], "[BeginKey]: ")); + endKey = getKeyFromHexString(stringRemovePrefix(parts[6], "[EndKey]: ")); + version = std::stoll(stringRemovePrefix(parts[7], "[Version]: ")); + checksum = stringRemovePrefix(parts[8], "[Checksum]: "); + bytes = std::stoull(stringRemovePrefix(parts[9], "[Bytes]: ")); + int version = std::stoi(stringRemovePrefix(parts[10], "[ByteSampleVersion]: ")); + std::string method = stringRemovePrefix(parts[11], "[ByteSampleMethod]: "); + int factor = std::stoi(stringRemovePrefix(parts[12], "[ByteSampleFactor]: ")); + int overhead = std::stoi(stringRemovePrefix(parts[13], "[ByteSampleOverhead]: ")); + double minimalProbability = std::stod(stringRemovePrefix(parts[14], "[ByteSampleMinimalProbability]: ")); + byteSampleSetting = BulkLoadByteSampleSetting(version, method, factor, overhead, minimalProbability); + } + + bool isValid() const { + if (beginKey >= endKey) { + return false; + } + if (!fileSet.isValid()) { + return false; + } + if (!byteSampleSetting.isValid()) { + return false; + } + return true; + } + + bool isEmptyRange() const { return bytes == 0; } + + KeyRange getRange() const { return Standalone(KeyRangeRef(beginKey, endKey)); } + + Key getBeginKey() const { return beginKey; } + + Key getEndKey() const { return endKey; } + + // Generating human readable string to stored in the manifest file + std::string toString() const { + return fileSet.toString() + ", [BeginKey]: " + beginKey.toFullHexStringPlain() + + ", [EndKey]: " + endKey.toFullHexStringPlain() + ", [Version]: " + std::to_string(version) + + ", [Checksum]: " + checksum + ", [Bytes]: " + std::to_string(bytes) + ", " + + byteSampleSetting.toString(); + } + + template + void serialize(Ar& ar) { + serializer(ar, fileSet, beginKey, endKey, version, checksum, bytes, byteSampleSetting); + } + + BulkLoadFileSet fileSet; + Key beginKey; + Key endKey; + Version version; + std::string checksum; + int64_t bytes; + BulkLoadByteSampleSetting byteSampleSetting; +}; + +enum class BulkLoadTaskPhase : uint8_t { + Invalid = 0, // Used to distinguish if a BulkLoadTaskState is a valid task + Submitted = 1, // Set by users + Triggered = 2, // Update when DD trigger a data move for the task + Running = 3, // Update atomically with updating KeyServer dest servers in startMoveKey + Complete = 4, // Update atomically with updating KeyServer src servers in finishMoveKey + Acknowledged = 5, // Updated by users; DD automatically clear metadata with this phase +}; + +struct BulkLoadTaskState { +public: + constexpr static FileIdentifier file_identifier = 1384499; + + BulkLoadTaskState() = default; + + // for acknowledging a completed task, where only taskId and range are used + BulkLoadTaskState(UID taskId, KeyRange range) : taskId(taskId), range(range), phase(BulkLoadTaskPhase::Invalid) {} + + // for submitting a task + BulkLoadTaskState(KeyRange range, + BulkLoadFileType fileType, + BulkLoadTransportMethod transportMethod, + std::string folder, + std::unordered_set dataFiles, + Optional bytesSampleFile, + UID jobId) + : taskId(deterministicRandom()->randomUniqueID()), range(range), fileType(fileType), + transportMethod(transportMethod), folder(folder), dataFiles(dataFiles), bytesSampleFile(bytesSampleFile), + phase(BulkLoadTaskPhase::Submitted), jobId(jobId) { + ASSERT(isValid()); + } + + bool operator==(const BulkLoadTaskState& rhs) const { + return taskId == rhs.taskId && range == rhs.range && dataFiles == rhs.dataFiles && jobId == rhs.jobId; + } + + std::string toString() const { + std::string res = + "BulkLoadTaskState: [Range]: " + Traceable::toString(range) + + ", [Type]: " + std::to_string(static_cast(fileType)) + + ", [TransportMethod]: " + std::to_string(static_cast(transportMethod)) + + ", [Phase]: " + std::to_string(static_cast(phase)) + ", [Folder]: " + folder + + ", [DataFiles]: " + describe(dataFiles) + ", [SubmitTime]: " + std::to_string(submitTime) + + ", [TriggerTime]: " + std::to_string(triggerTime) + ", [StartTime]: " + std::to_string(startTime) + + ", [CompleteTime]: " + std::to_string(completeTime) + ", [RestartCount]: " + std::to_string(restartCount); + if (bytesSampleFile.present()) { + res = res + ", [ByteSampleFile]: " + bytesSampleFile.get(); + } + if (dataMoveId.present()) { + res = res + ", [DataMoveId]: " + dataMoveId.get().toString(); + } + res = res + ", [JobId]: " + jobId.toString(); + res = res + ", [TaskId]: " + taskId.toString(); + return res; + } + + KeyRange getRange() const { return range; } + + UID getTaskId() const { return taskId; } + + UID getJobId() const { return jobId; } + + std::string getFolder() const { return folder; } + + BulkLoadTransportMethod getTransportMethod() const { return transportMethod; } + + std::unordered_set getDataFiles() const { return dataFiles; } + + Optional getBytesSampleFile() const { return bytesSampleFile; } + + bool onAnyPhase(const std::vector& inputPhases) const { + for (const auto& inputPhase : inputPhases) { + if (inputPhase == phase) { + return true; + } + } + return false; + } + + void setDataMoveId(UID id) { + if (dataMoveId.present() && dataMoveId.get() != id) { + TraceEvent(SevWarn, "DDBulkLoadTaskUpdateDataMoveId") + .detail("NewId", id) + .detail("BulkLoadTask", this->toString()); + } + dataMoveId = id; + } + + inline Optional getDataMoveId() const { return dataMoveId; } + + inline void clearDataMoveId() { dataMoveId.reset(); } + + bool isValid() const { + if (!taskId.isValid()) { + return false; + } + if (range.empty()) { + return false; + } + if (transportMethod == BulkLoadTransportMethod::Invalid) { + return false; + } else if (transportMethod != BulkLoadTransportMethod::CP) { + ASSERT(false); + } + if (dataFiles.empty()) { + return false; + } + for (const auto& filePath : dataFiles) { + if (filePath.substr(0, folder.size()) != folder) { + return false; + } + } + if (bytesSampleFile.present()) { + if (bytesSampleFile.get().substr(0, folder.size()) != folder) { + return false; + } + } + // JobId can be UID() indicating no job is specified and the job is the default job + // TODO(BulkLoad): do some validation between methods and files + + return true; + } + + template + void serialize(Ar& ar) { + serializer(ar, + range, + fileType, + transportMethod, + phase, + folder, + dataFiles, + bytesSampleFile, + dataMoveId, + taskId, + submitTime, + triggerTime, + startTime, + completeTime, + restartCount, + jobId); + } + + // Updated by DD + BulkLoadTaskPhase phase = BulkLoadTaskPhase::Invalid; + double submitTime = 0; + double triggerTime = 0; + double startTime = 0; + double completeTime = 0; + int restartCount = -1; + +private: + // Set by user + UID jobId; // Unique ID of the job. A job can spawn multiple tasks. + UID taskId; // Unique ID of the task + KeyRange range; // Load the key-value within this range "[begin, end)" from data file + // File inject config + BulkLoadFileType fileType = BulkLoadFileType::Invalid; + BulkLoadTransportMethod transportMethod = BulkLoadTransportMethod::Invalid; + // Folder includes all files to be injected + std::string folder; + // Files to inject + std::unordered_set dataFiles; + Optional bytesSampleFile; + // bytesSampleFile is Optional. If bytesSampleFile is not provided, storage server will go through all keys and + // conduct byte sampling, which will slow down the bulk loading rate. + // TODO(BulkLoad): add file checksum + + // Set by DD + Optional dataMoveId; +}; + +// Define the bulkdump job manifest header +struct BulkDumpJobManifestHeader { +public: + BulkDumpJobManifestHeader() = default; + + // Used when loading + BulkDumpJobManifestHeader(size_t manifestCount, const std::string& rootFolder) + : manifestCount(manifestCount), rootFolder(rootFolder) { + ASSERT(isValid()); + } + + // Used when dumping + BulkDumpJobManifestHeader(const std::string& rawString) { + std::vector parts = splitString(rawString, ", "); + if (parts.size() != 2) { + TraceEvent(SevError, "ParseBulkDumpJobManifestHeaderError").detail("RawString", rawString); + ASSERT(false); + } + manifestCount = std::stoull(stringRemovePrefix(parts[0], "[ManifestCount]: ")); + rootFolder = stringRemovePrefix(parts[1], "[RootFolder]: "); + ASSERT(isValid()); + } + + bool isValid() const { return manifestCount > 0 && !rootFolder.empty(); } + + std::string toString() { + ASSERT(isValid()); + return "[ManifestCount]: " + std::to_string(manifestCount) + ", [RootFolder]: " + rootFolder; + } + + std::string getRootFolder() const { return rootFolder; } + +private: + size_t manifestCount = 0; + std::string rootFolder = ""; +}; + +// Define the bulkdump job manifest entry per range +struct BulkDumpJobManifestEntry { +public: + // Used when loading + BulkDumpJobManifestEntry(const std::string& rawString) { + std::vector parts = splitString(rawString, ", "); + if (parts.size() != 5) { + TraceEvent(SevError, "ParseBulkDumpJobManifestEntryError").detail("RawString", rawString); + ASSERT(false); + } + beginKey = getKeyFromHexString(stringRemovePrefix(parts[0], "[BeginKey]: ")); + endKey = getKeyFromHexString(stringRemovePrefix(parts[1], "[EndKey]: ")); + relativePath = stringRemovePrefix(parts[2], "[RelativePath]: "); + version = std::stoll(stringRemovePrefix(parts[3], "[Version]: ")); + bytes = std::stoull(stringRemovePrefix(parts[4], "[Bytes]: ")); + ASSERT(isValid()); + } + + // Used when dumping + BulkDumpJobManifestEntry(const Key& beginKey, + const Key& endKey, + const std::string& relativePath, + Version version, + size_t bytes) + : beginKey(beginKey), endKey(endKey), relativePath(relativePath), version(version), bytes(bytes) { + ASSERT(isValid()); + } + + std::string toString() const { + ASSERT(isValid()); + return "[BeginKey]: " + beginKey.toFullHexStringPlain() + ", [EndKey]: " + endKey.toFullHexStringPlain() + + ", [RelativePath]: " + relativePath + ", [Version]: " + std::to_string(version) + + ", [Bytes]: " + std::to_string(bytes); + } + + KeyRange getRange() const { return Standalone(KeyRangeRef(beginKey, endKey)); } + + std::string getRelativePath() const { return relativePath; } + + bool isValid() const { return beginKey < endKey && version != invalidVersion; } + +private: + Key beginKey; + Key endKey; + std::string relativePath; + Version version; + size_t bytes; +}; + +enum class BulkLoadJobPhase : uint8_t { + Invalid = 0, + Submitted = 1, + Triggered = 2, + Complete = 3, +}; + +struct BulkLoadJobState { +public: + constexpr static FileIdentifier file_identifier = 1384496; + + BulkLoadJobState() = default; + BulkLoadJobState(const UID& jobId, + const std::string& remoteRoot, + const KeyRange& range, + BulkLoadTransportMethod transportMethod) + : jobId(jobId), remoteRoot(remoteRoot), range(range), phase(BulkLoadJobPhase::Submitted), + transportMethod(transportMethod) { + ASSERT(isValid()); + } + + std::string toString() const { + return "[BulkLoadJobState]: [JobId]: " + jobId.toString() + ", [RemoteRoot]: " + remoteRoot + + ", [Range]: " + range.toString() + ", [Phase]: " + std::to_string(static_cast(phase)) + + ", [TransportMethod]: " + std::to_string(static_cast(transportMethod)) + + ", [ManifestPath]: " + manifestPath + ", [DataPath]: " + dataPath + + ", [ByteSamplePath]: " + byteSamplePath; + } + + std::string getRemoteRoot() const { return remoteRoot; } + + BulkLoadTransportMethod getTransportMethod() const { return transportMethod; } + + UID getJobId() const { return jobId; } + + BulkLoadJobPhase getPhase() const { return phase; } + + KeyRange getRange() const { return range; } + + void markComplete() { + ASSERT(phase == BulkLoadJobPhase::Triggered || phase == BulkLoadJobPhase::Complete); + phase = BulkLoadJobPhase::Complete; + return; + } + + bool isValid() const { + if (!jobId.isValid()) { + return false; + } + if (range.empty()) { + return false; + } + if (transportMethod == BulkLoadTransportMethod::Invalid) { + return false; + } + if (remoteRoot.empty()) { + return false; + } + return true; + } + + bool isValidTask() const { + if (!isValid()) { + return false; + } + if (phase == BulkLoadJobPhase::Invalid) { + return false; + } + if (manifestPath.empty()) { + return false; + } + return true; + } + + BulkLoadJobState getTaskToTrigger(const BulkLoadManifest& manifest) const { + BulkLoadJobState res = *this; + const std::string relativePath = joinPath(manifest.fileSet.rootPath, manifest.fileSet.relativePath); + res.manifestPath = joinPath(relativePath, manifest.fileSet.manifestFileName); + ASSERT(!manifest.fileSet.dataFileName.empty()); + res.dataPath = joinPath(relativePath, manifest.fileSet.dataFileName); + if (!manifest.fileSet.byteSampleFileName.empty()) { // TODO(Bulkdump): check if the bytesampling setting + res.byteSamplePath = joinPath(relativePath, manifest.fileSet.byteSampleFileName); + } + res.range = manifest.getRange(); + res.phase = BulkLoadJobPhase::Triggered; + ASSERT(res.isValidTask()); + return res; + } + + BulkLoadJobState getEmptyTaskToComplete(const BulkLoadManifest& manifest) const { + BulkLoadJobState res = *this; + const std::string relativePath = joinPath(manifest.fileSet.rootPath, manifest.fileSet.relativePath); + res.manifestPath = joinPath(relativePath, manifest.fileSet.manifestFileName); + ASSERT(manifest.fileSet.dataFileName.empty()); + res.range = manifest.getRange(); + res.phase = BulkLoadJobPhase::Complete; + ASSERT(res.isValidTask()); + return res; + } + + std::string getDataFilePath() const { return dataPath; } + + std::string getBytesSampleFilePath() const { return byteSamplePath; } + + template + void serialize(Ar& ar) { + serializer(ar, jobId, range, transportMethod, remoteRoot, phase, manifestPath, dataPath, byteSamplePath); + } + +private: + UID jobId; + KeyRange range; + BulkLoadTransportMethod transportMethod = BulkLoadTransportMethod::Invalid; + std::string remoteRoot; + BulkLoadJobPhase phase; + std::string manifestPath; + std::string dataPath; + std::string byteSamplePath; +}; + +enum class BulkDumpPhase : uint8_t { + Invalid = 0, + Submitted = 1, + Complete = 2, +}; + +// Definition of bulkdump metadata +struct BulkDumpState { + constexpr static FileIdentifier file_identifier = 1384498; + + BulkDumpState() = default; + + // The only public interface to create a valid task + // This constructor is call when users submitting a task, e.g. by newBulkDumpJobLocalSST() + BulkDumpState(const KeyRange& range, + BulkLoadFileType fileType, + BulkLoadTransportMethod transportMethod, + const std::string& remoteRoot) + : jobId(deterministicRandom()->randomUniqueID()), range(range), fileType(fileType), + transportMethod(transportMethod), remoteRoot(remoteRoot), phase(BulkDumpPhase::Submitted) { + ASSERT(isValid()); + } + + bool operator==(const BulkDumpState& rhs) const { + return jobId == rhs.jobId && taskId == rhs.taskId && range == rhs.range && remoteRoot == rhs.remoteRoot; + } + + std::string toString() const { + std::string res = "BulkDumpState: [Range]: " + Traceable::toString(range) + + ", [FileType]: " + std::to_string(static_cast(fileType)) + + ", [TransportMethod]: " + std::to_string(static_cast(transportMethod)) + + ", [Phase]: " + std::to_string(static_cast(phase)) + + ", [RemoteRoot]: " + remoteRoot + ", [JobId]: " + jobId.toString(); + if (taskId.present()) { + res = res + ", [TaskId]: " + taskId.get().toString(); + } + if (version.present()) { + res = res + ", [Version]: " + std::to_string(version.get()); + } + if (manifest.present()) { + res = res + ", [BulkLoadManifest]: " + manifest.get().toString(); + } + return res; + } + + KeyRange getRange() const { return range; } + + UID getJobId() const { return jobId; } + + Optional getTaskId() const { return taskId; } + + std::string getRemoteRoot() const { return remoteRoot; } + + BulkDumpPhase getPhase() const { return phase; } + + BulkLoadTransportMethod getTransportMethod() const { return transportMethod; } + + bool isValid() const { + if (!jobId.isValid()) { + return false; + } + if (taskId.present() && !taskId.get().isValid()) { + return false; + } + if (range.empty()) { + return false; + } + if (transportMethod == BulkLoadTransportMethod::Invalid) { + return false; + } else if (transportMethod != BulkLoadTransportMethod::CP) { + ASSERT(false); + } + if (remoteRoot.empty()) { + return false; + } + return true; + } + + // The user job spawns a series of ranges tasks based on shard boundary to cover the user task range. + // Those spawned tasks are executed by SSes. + // Return metadata of the task. + BulkDumpState getRangeTaskState(const KeyRange& taskRange) { + ASSERT(range.contains(taskRange)); + BulkDumpState res = *this; // the task inherits configuration from the job + UID newTaskId; + // Guarantee to have a brand new taskId for the new spawned task + int retryCount = 0; + while (true) { + newTaskId = deterministicRandom()->randomUniqueID(); + if (!res.taskId.present() || res.taskId.get() != newTaskId) { + break; + } + retryCount++; + if (retryCount > 50) { + TraceEvent(SevError, "GetRangeTaskStateRetryTooManyTimes").detail("TaskRange", taskRange); + throw bulkdump_task_failed(); + } + } + res.taskId = newTaskId; + res.range = taskRange; + return res; + } + + // Generate a metadata with Complete state. + BulkDumpState getRangeCompleteState(const KeyRange& completeRange, const BulkLoadManifest& manifest) { + ASSERT(range.contains(completeRange)); + ASSERT(manifest.isValid()); + ASSERT(taskId.present() && taskId.get().isValid()); + BulkDumpState res = *this; + res.phase = BulkDumpPhase::Complete; + res.manifest = manifest; + res.range = completeRange; + return res; + } + + Optional getManifest() const { return manifest; } + + template + void serialize(Ar& ar) { + serializer(ar, jobId, range, fileType, transportMethod, remoteRoot, phase, taskId, version, manifest); + } + +private: + UID jobId; // The unique identifier of a job. Set by user. Any task spawned by the job shares the same jobId and + // configuration. + + // File dump config: + KeyRange range; // Dump the key-value within this range "[begin, end)" from data file + BulkLoadFileType fileType = BulkLoadFileType::Invalid; + BulkLoadTransportMethod transportMethod = BulkLoadTransportMethod::Invalid; + std::string remoteRoot; // remoteRoot is the root string to where the data is set to be uploaded + + // Task dynamics: + BulkDumpPhase phase = BulkDumpPhase::Invalid; + Optional taskId; // The unique identifier of a task. Any SS can do a task. If a task is failed, this remaining + // part of the task can be picked up by any SS with a changed taskId. + Optional version; + Optional manifest; // Resulting remote manifest after the dumping task completes +}; + +// Return two file settings: first: LocalFilePaths; Second: RemoteFilePaths. +// The local file path: +// //-manifest.txt (must have) +// //-data.sst (omitted for empty range) +// //-sample.sst (omitted if data size is too small to have a sample) +// The remote file path: +// //-manifest.txt (must have) +// //-data.sst (omitted for empty range) +// //-sample.sst (omitted if data size is too small to have a sample) +std::pair generateBulkLoadFileSetting(Version dumpVersion, + const std::string& relativeFolder, + const std::string& rootLocal, + const std::string& rootRemote); + +// Define bulkload job root folder +std::string generateBulkLoadJobRoot(const std::string& root, const UID& jobId); + +// Define bulkload job manifest file name +std::string generateBulkLoadJobManifestFileName(const UID& jobId); + +// Define bulkload job manifest file content based on job's all BulkLoadManifest. +// Each row is a range sorted by the beginKey. Any two ranges do not have overlapping. +// Col: beginKey, endKey, dataVersion, dataBytes, manifestPath. +// dataVersion should be always valid. dataBytes can be 0 in case of an empty range. +std::string generateBulkLoadJobManifestFileContent(const std::map& manifests); + +// User API to create bulkLoad task metadata +// This is used for testing +BulkLoadTaskState newBulkLoadTaskLocalSST(UID jobId, + KeyRange range, + std::string folder, + std::string dataFile, + std::string bytesSampleFile); + +// User API to create bulkLoadJob job metadata +// The restore data is within the input range from the remoteRoot +// The remoteRoot can be either a local folder or a remote blobstore folder string +// JobId is the job ID of the bulkdump job +// All data of the bulkdump job is uploaded to the folder / +BulkLoadJobState newBulkLoadJobLocalSST(const UID& jobId, const KeyRange& range, const std::string& remoteRoot); + +// User API to create bulkDump job metadata +// The dumped data is within the input range +// The data is dumped to the input remoteRoot +// The remoteRoot can be either a local root or a remote blobstore root string +BulkDumpState newBulkDumpJobLocalSST(const KeyRange& range, const std::string& remoteRoot); + +#endif diff --git a/fdbclient/include/fdbclient/BulkLoading.h b/fdbclient/include/fdbclient/BulkLoading.h deleted file mode 100644 index f2a7ac67e43..00000000000 --- a/fdbclient/include/fdbclient/BulkLoading.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * BulkLoading.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FDBCLIENT_BULKLOADING_H -#define FDBCLIENT_BULKLOADING_H -#pragma once - -#include "fdbclient/FDBTypes.h" -#include "fdbrpc/fdbrpc.h" - -enum class BulkLoadPhase : uint8_t { - Invalid = 0, // Used to distinguish if a BulkLoadState is a valid task - Submitted = 1, // Set by users - Triggered = 2, // Update when DD trigger a data move for the task - Running = 3, // Update atomically with updating KeyServer dest servers in startMoveKey - Complete = 4, // Update atomically with updating KeyServer src servers in finishMoveKey - Acknowledged = 5, // Updated by users; DD automatically clear metadata with this phase -}; - -enum class BulkLoadType : uint8_t { - Invalid = 0, - SST = 1, -}; - -enum class BulkLoadTransportMethod : uint8_t { - Invalid = 0, - CP = 1, // Local file copy. Used when the data file is in the local file system for any storage server. Used for - // simulation test and local cluster test. -}; - -enum class BulkLoadInjectMethod : uint8_t { - Invalid = 0, - File = 1, -}; - -struct BulkLoadState { - constexpr static FileIdentifier file_identifier = 1384499; - - BulkLoadState() = default; - - // for acknowledging a completed task, where only taskId and range are used - BulkLoadState(UID taskId, KeyRange range) : taskId(taskId), range(range), phase(BulkLoadPhase::Invalid) {} - - // for submitting a task - BulkLoadState(KeyRange range, - BulkLoadType loadType, - BulkLoadTransportMethod transportMethod, - BulkLoadInjectMethod injectMethod, - std::string folder, - std::unordered_set dataFiles, - Optional bytesSampleFile) - : taskId(deterministicRandom()->randomUniqueID()), range(range), loadType(loadType), - transportMethod(transportMethod), injectMethod(injectMethod), folder(folder), dataFiles(dataFiles), - bytesSampleFile(bytesSampleFile), phase(BulkLoadPhase::Submitted) { - ASSERT(isValid()); - } - - bool operator==(const BulkLoadState& rhs) const { - return taskId == rhs.taskId && range == rhs.range && dataFiles == rhs.dataFiles; - } - - std::string toString() const { - std::string res = - "BulkLoadState: [Range]: " + Traceable::toString(range) + - ", [Type]: " + std::to_string(static_cast(loadType)) + - ", [TransportMethod]: " + std::to_string(static_cast(transportMethod)) + - ", [InjectMethod]: " + std::to_string(static_cast(injectMethod)) + - ", [Phase]: " + std::to_string(static_cast(phase)) + ", [Folder]: " + folder + - ", [DataFiles]: " + describe(dataFiles) + ", [SubmitTime]: " + std::to_string(submitTime) + - ", [TriggerTime]: " + std::to_string(triggerTime) + ", [StartTime]: " + std::to_string(startTime) + - ", [CompleteTime]: " + std::to_string(completeTime) + ", [RestartCount]: " + std::to_string(restartCount); - if (bytesSampleFile.present()) { - res = res + ", [ByteSampleFile]: " + bytesSampleFile.get(); - } - if (dataMoveId.present()) { - res = res + ", [DataMoveId]: " + dataMoveId.get().toString(); - } - res = res + ", [TaskId]: " + taskId.toString(); - return res; - } - - KeyRange getRange() const { return range; } - - UID getTaskId() const { return taskId; } - - std::string getFolder() const { return folder; } - - BulkLoadTransportMethod getTransportMethod() const { return transportMethod; } - - std::unordered_set getDataFiles() const { return dataFiles; } - - Optional getBytesSampleFile() const { return bytesSampleFile; } - - bool onAnyPhase(const std::vector& inputPhases) const { - for (const auto& inputPhase : inputPhases) { - if (inputPhase == phase) { - return true; - } - } - return false; - } - - void setDataMoveId(UID id) { - if (dataMoveId.present() && dataMoveId.get() != id) { - TraceEvent(SevWarn, "DDBulkLoadTaskUpdateDataMoveId") - .detail("NewId", id) - .detail("BulkLoadTask", this->toString()); - } - dataMoveId = id; - } - - inline Optional getDataMoveId() const { return dataMoveId; } - - inline void clearDataMoveId() { dataMoveId.reset(); } - - bool isValid() const { - if (!taskId.isValid()) { - return false; - } - if (range.empty()) { - return false; - } - if (transportMethod == BulkLoadTransportMethod::Invalid) { - return false; - } else if (transportMethod != BulkLoadTransportMethod::CP) { - throw not_implemented(); - } - if (injectMethod == BulkLoadInjectMethod::Invalid) { - return false; - } else if (injectMethod != BulkLoadInjectMethod::File) { - throw not_implemented(); - } - if (dataFiles.empty()) { - return false; - } - for (const auto& filePath : dataFiles) { - if (filePath.substr(0, folder.size()) != folder) { - return false; - } - } - if (bytesSampleFile.present()) { - if (bytesSampleFile.get().substr(0, folder.size()) != folder) { - return false; - } - } - // TODO(BulkLoad): do some validation between methods and files - - return true; - } - - template - void serialize(Ar& ar) { - serializer(ar, - range, - loadType, - transportMethod, - injectMethod, - phase, - folder, - dataFiles, - bytesSampleFile, - dataMoveId, - taskId, - submitTime, - triggerTime, - startTime, - completeTime, - restartCount); - } - - // Updated by DD - BulkLoadPhase phase = BulkLoadPhase::Invalid; - double submitTime = 0; - double triggerTime = 0; - double startTime = 0; - double completeTime = 0; - int restartCount = -1; - -private: - // Set by user - UID taskId; // Unique ID of the task - KeyRange range; // Load the key-value within this range "[begin, end)" from data file - // File inject config - BulkLoadType loadType = BulkLoadType::Invalid; - BulkLoadTransportMethod transportMethod = BulkLoadTransportMethod::Invalid; - BulkLoadInjectMethod injectMethod = BulkLoadInjectMethod::Invalid; - // Folder includes all files to be injected - std::string folder; - // Files to inject - std::unordered_set dataFiles; - Optional bytesSampleFile; - // bytesSampleFile is Optional. If bytesSampleFile is not provided, storage server will go through all keys and - // conduct byte sampling, which will slow down the bulk loading rate. - // TODO(BulkLoad): add file checksum - - // Set by DD - Optional dataMoveId; -}; - -BulkLoadState newBulkLoadTaskLocalSST(KeyRange range, - std::string folder, - std::string dataFile, - std::string bytesSampleFile); - -#endif diff --git a/fdbclient/include/fdbclient/ManagementAPI.actor.h b/fdbclient/include/fdbclient/ManagementAPI.actor.h index 7cb41ddc298..2fa1d76930e 100644 --- a/fdbclient/include/fdbclient/ManagementAPI.actor.h +++ b/fdbclient/include/fdbclient/ManagementAPI.actor.h @@ -34,6 +34,7 @@ standard API and some knowledge of the contents of the system key space. #include #include +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/GenericManagementAPI.actor.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/RangeLock.h" @@ -174,22 +175,45 @@ ACTOR Future cancelAuditStorage(Reference cluster ACTOR Future setBulkLoadMode(Database cx, int mode); // Get valid bulk load task state within the input range -ACTOR Future> getValidBulkLoadTasksWithinRange(Database cx, - KeyRange rangeToRead, - size_t limit, - Optional phase); +ACTOR Future> getValidBulkLoadTasksWithinRange( + Database cx, + KeyRange rangeToRead, + size_t limit = 10, + Optional phase = Optional()); -// Submit a bulk load task -ACTOR Future submitBulkLoadTask(Database cx, BulkLoadState bulkLoadTask); +// Submit a bulkload task +ACTOR Future submitBulkLoadTask(Database cx, BulkLoadTaskState bulkLoadTask); + +// Create a bulkload task submission transaction without commit +// Used by ManagementAPI and bulkdumpRestore at DD +ACTOR Future setBulkLoadSubmissionTransaction(Transaction* tr, BulkLoadTaskState bulkLoadTask); // Acknowledge a bulk load task if it has been completed ACTOR Future acknowledgeBulkLoadTask(Database cx, KeyRange range, UID taskId); -// Get bulk load task for the input range and taskId -ACTOR Future getBulkLoadTask(Transaction* tr, - KeyRange range, - UID taskId, - std::vector phases); +// Create an bulkload task acknowledge transaction without commit +// Used by ManagementAPI and bulkdumpRestore at DD +ACTOR Future setBulkLoadAcknowledgeTransaction(Transaction* tr, KeyRange range, UID taskId); + +// Get bulkload task for the input range and taskId +ACTOR Future getBulkLoadTask(Transaction* tr, + KeyRange range, + UID taskId, + std::vector phases); + +// Submit a bulkLoadJob job: bulkload job data from a remote folder using bulkloading mechanism +// There is at most one bulkLoadJob job at a time +ACTOR Future submitBulkLoadJob(Database cx, BulkLoadJobState jobState); + +// Create a transaction for updating bulkload metadata +// Return true if did the update, otherwise, return false +ACTOR Future updateBulkLoadJobMetadata(Transaction* tr, BulkLoadJobState jobState); + +// TODO(BulkLoad): Cancel or clear a bulkLoadJob job +ACTOR Future clearBulkLoadJob(Database cx); + +// Get ongoing bulkLoadJob job Id +ACTOR Future> getOngoingBulkLoadJob(Database cx); // Set bulk dump mode. When the mode is on, DD will periodically check if there is any bulkdump task to do by scaning // the metadata. diff --git a/fdbclient/include/fdbclient/StorageCheckpoint.h b/fdbclient/include/fdbclient/StorageCheckpoint.h index 573f5dd4c22..2b459979db9 100644 --- a/fdbclient/include/fdbclient/StorageCheckpoint.h +++ b/fdbclient/include/fdbclient/StorageCheckpoint.h @@ -22,7 +22,7 @@ #define FDBCLIENT_STORAGCHECKPOINT_H #pragma once -#include "fdbclient/BulkLoading.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/FDBTypes.h" const std::string checkpointBytesSampleFileName = "metadata_bytes.sst"; @@ -174,7 +174,7 @@ struct DataMoveMetaData { std::set checkpoints; int16_t phase; // DataMoveMetaData::Phase. int8_t mode; - Optional bulkLoadState; // set if the data move is a bulk load data move + Optional bulkLoadTaskState; // set if the data move is a bulk load data move DataMoveMetaData() = default; DataMoveMetaData(UID id, Version version, KeyRange range) : id(id), version(version), priority(0), mode(0) { @@ -194,15 +194,15 @@ struct DataMoveMetaData { ", [Phase]: " + std::to_string(static_cast(phase)) + ", [Source Servers]: " + describe(src) + ", [Destination Servers]: " + describe(dest) + ", [Checkpoints]: " + describe(checkpoints); - if (bulkLoadState.present()) { - res = res + ", [BulkLoadState]: " + bulkLoadState.get().toString(); + if (bulkLoadTaskState.present()) { + res = res + ", [BulkLoadTaskState]: " + bulkLoadTaskState.get().toString(); } return res; } template void serialize(Ar& ar) { - serializer(ar, id, version, ranges, priority, src, dest, checkpoints, phase, mode, bulkLoadState); + serializer(ar, id, version, ranges, priority, src, dest, checkpoints, phase, mode, bulkLoadTaskState); } }; diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index 669767787b5..c97187e76c3 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -23,7 +23,7 @@ #pragma once #include "fdbclient/Audit.h" -#include "fdbclient/BulkDumping.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/StorageCheckpoint.h" #include "fdbclient/StorageServerShard.h" diff --git a/fdbclient/include/fdbclient/SystemData.h b/fdbclient/include/fdbclient/SystemData.h index 7ee910f7afe..022ee600e94 100644 --- a/fdbclient/include/fdbclient/SystemData.h +++ b/fdbclient/include/fdbclient/SystemData.h @@ -25,8 +25,7 @@ // Functions and constants documenting the organization of the reserved keyspace in the database beginning with "\xFF" #include "fdbclient/AccumulativeChecksum.h" -#include "fdbclient/BulkLoading.h" -#include "fdbclient/BulkDumping.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/BlobWorkerInterface.h" // TODO move the functions that depend on this out of here and into BlobWorkerInterface.h to remove this dependency #include "fdbclient/FDBTypes.h" #include "fdbclient/RangeLock.h" @@ -524,10 +523,14 @@ extern const KeyRef dataDistributionModeKey; extern const UID dataDistributionModeLock; extern const KeyRef bulkLoadModeKey; -extern const KeyRangeRef bulkLoadKeys; -extern const KeyRef bulkLoadPrefix; -const Value bulkLoadStateValue(const BulkLoadState& bulkLoadState); -BulkLoadState decodeBulkLoadState(const ValueRef& value); +extern const KeyRangeRef bulkLoadTaskKeys; +extern const KeyRef bulkLoadTaskPrefix; +extern const KeyRangeRef bulkLoadJobKeys; +extern const KeyRef bulkLoadJobPrefix; +const Value bulkLoadTaskStateValue(const BulkLoadTaskState& bulkLoadTaskState); +BulkLoadTaskState decodeBulkLoadTaskState(const ValueRef& value); +const Value bulkLoadJobValue(const BulkLoadJobState& bulkLoadJobState); +BulkLoadJobState decodeBulkLoadJobState(const ValueRef& value); extern const KeyRef bulkDumpModeKey; extern const KeyRangeRef bulkDumpKeys; diff --git a/fdbserver/BulkDumpUtil.actor.cpp b/fdbserver/BulkDumpUtil.actor.cpp deleted file mode 100644 index 5d012142d56..00000000000 --- a/fdbserver/BulkDumpUtil.actor.cpp +++ /dev/null @@ -1,373 +0,0 @@ -/* - * BulkDumpUtils.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/BulkDumping.h" -#include "fdbclient/FDBTypes.h" -#include "fdbclient/KeyRangeMap.h" -#include "fdbrpc/FlowTransport.h" -#include "fdbserver/BulkDumpUtil.actor.h" -#include "fdbserver/Knobs.h" -#include "fdbserver/RocksDBCheckpointUtils.actor.h" -#include "fdbserver/StorageMetrics.actor.h" -#include "flow/Buggify.h" -#include "flow/Error.h" -#include "flow/Optional.h" -#include "flow/Platform.h" -#include "flow/Trace.h" -#include "flow/actorcompiler.h" // has to be last include -#include "flow/flow.h" -#include - -SSBulkDumpTask getSSBulkDumpTask(const std::map>& locations, - const BulkDumpState& bulkDumpState) { - StorageServerInterface targetServer; - std::vector checksumServers; - int dcid = 0; - for (const auto& [_, dcServers] : locations) { - if (dcid == 0) { - const int idx = deterministicRandom()->randomInt(0, dcServers.size()); - targetServer = dcServers[idx]; - } - for (int i = 0; i < dcServers.size(); i++) { - if (dcServers[i].id() == targetServer.id()) { - ASSERT_WE_THINK(dcid == 0); - } else { - checksumServers.push_back(dcServers[i].id()); - } - } - dcid++; - } - return SSBulkDumpTask(targetServer, checksumServers, bulkDumpState); -} - -std::string generateBulkDumpManifestFileName(Version version) { - return std::to_string(version) + "-manifest.txt"; -} - -std::string generateBulkDumpDataFileName(Version version) { - return std::to_string(version) + "-data.sst"; -} - -std::string generateBulkDumpByteSampleFileName(Version version) { - return std::to_string(version) + "-sample.sst"; -} - -std::string getJobManifestFileName(const UID& jobId) { - return jobId.toString() + "-job-manifest.txt"; -} - -std::string getBulkDumpTaskFolder(const UID& taskId) { - return taskId.toString(); -} - -std::string getBulkDumpJobRoot(const std::string& root, const UID& jobId) { - return joinPath(root, jobId.toString()); -} - -std::pair getLocalRemoteFileSetSetting(Version dumpVersion, - const std::string& relativeFolder, - const std::string& rootLocal, - const std::string& rootRemote) { - // Generate file names based on data version - const std::string manifestFileName = generateBulkDumpManifestFileName(dumpVersion); - const std::string dataFileName = generateBulkDumpDataFileName(dumpVersion); - const std::string byteSampleFileName = generateBulkDumpByteSampleFileName(dumpVersion); - BulkDumpFileSet fileSetLocal(rootLocal, relativeFolder, manifestFileName, dataFileName, byteSampleFileName); - BulkDumpFileSet fileSetRemote(rootRemote, relativeFolder, manifestFileName, dataFileName, byteSampleFileName); - return std::make_pair(fileSetLocal, fileSetRemote); -} - -// Generate SST file given the input sortedKVS to the input filePath -void writeKVSToSSTFile(std::string filePath, const std::map& sortedKVS, UID logId) { - const std::string absFilePath = abspath(filePath); - // Check file - if (fileExists(absFilePath)) { - TraceEvent(SevWarn, "SSBulkDumpRetriableError", logId) - .detail("Reason", "exist old File when writeKVSToSSTFile") - .detail("DataFilePathLocal", absFilePath); - ASSERT_WE_THINK(false); - throw retry(); - } - // Dump data to file - std::unique_ptr sstWriter = newRocksDBSstFileWriter(); - sstWriter->open(absFilePath); - for (const auto& [key, value] : sortedKVS) { - sstWriter->write(key, value); // assuming sorted - } - if (!sstWriter->finish()) { - // Unexpected: having data but failed to finish - TraceEvent(SevWarn, "SSBulkDumpRetriableError", logId) - .detail("Reason", "failed to finish data sst writer when writeKVSToSSTFile") - .detail("DataFilePath", absFilePath); - ASSERT_WE_THINK(false); - throw retry(); - } - return; -} - -void writeStringToFile(const std::string& path, const std::string& content) { - return writeFile(abspath(path), content); -} - -void clearFileFolder(const std::string& folderPath) { - platform::eraseDirectoryRecursive(abspath(folderPath)); - return; -} - -void resetFileFolder(const std::string& folderPath, const UID& logId) { - clearFileFolder(abspath(folderPath)); - platform::createDirectory(abspath(folderPath)); - return; -} - -void bulkDumpFileCopy(std::string fromFile, std::string toFile, size_t fileBytesMax, UID logId) { - const std::string content = readFileBytes(abspath(fromFile), fileBytesMax); - writeStringToFile(toFile, content); - TraceEvent(SevInfo, "SSBulkDumpSSTFileCopied", logId) - .detail("FromFile", abspath(fromFile)) - .detail("ToFile", abspath(toFile)) - .detail("ContentSize", content.size()); - return; -} - -// Generate key-value data, byte sampling data, and manifest file given a range at a version with a certain bytes -// Return BulkDumpManifest metadata (equivalent to content of the manifest file) -// TODO(BulkDump): can cause slow tasks, do the task in a separate thread in the future. -BulkDumpManifest dumpDataFileToLocalDirectory(UID logId, - const std::map& sortedData, - const std::map& sortedSample, - const BulkDumpFileSet& localFileSetConfig, - const BulkDumpFileSet& remoteFileSetConfig, - const ByteSampleSetting& byteSampleSetting, - Version dumpVersion, - const KeyRange& dumpRange, - int64_t dumpBytes) { - BulkDumpFileFullPathSet localFiles(localFileSetConfig); - - // Step 1: Clean up local folder - resetFileFolder((abspath(localFiles.folder)), logId); - - // Step 2: Dump data to file - bool containDataFile = false; - if (sortedData.size() > 0) { - writeKVSToSSTFile(abspath(localFiles.dataFilePath), sortedData, logId); - containDataFile = true; - } else { - ASSERT(sortedSample.empty()); - containDataFile = false; - } - - // Step 3: Dump sample to file - bool containByteSampleFile = false; - if (sortedSample.size() > 0) { - writeKVSToSSTFile(abspath(localFiles.byteSampleFilePath), sortedSample, logId); - ASSERT(containDataFile); - containByteSampleFile = true; - } else { - containByteSampleFile = false; - } - - // Step 4: Generate manifest file - if (fileExists(abspath(localFiles.manifestFilePath))) { - TraceEvent(SevWarn, "SSBulkDumpRetriableError", logId) - .detail("Reason", "exist old manifestFile") - .detail("ManifestFilePathLocal", abspath(localFiles.manifestFilePath)); - ASSERT_WE_THINK(false); - throw retry(); - } - BulkDumpFileSet fileSetRemote(remoteFileSetConfig.rootPath, - remoteFileSetConfig.relativePath, - remoteFileSetConfig.manifestFileName, - containDataFile ? remoteFileSetConfig.dataFileName : "", - containByteSampleFile ? remoteFileSetConfig.byteSampleFileName : ""); - BulkDumpManifest manifest( - fileSetRemote, dumpRange.begin, dumpRange.end, dumpVersion, "", dumpBytes, byteSampleSetting); - writeStringToFile(abspath(localFiles.manifestFilePath), manifest.toString()); - return manifest; -} - -// Validate the invariant of filenames. Source is the file stored locally. Destination is the file going to move to. -bool validateSourceDestinationFileSets(const BulkDumpFileSet& source, const BulkDumpFileSet& destination) { - // Manifest file must be present - if (source.manifestFileName.empty() || destination.manifestFileName.empty()) { - return false; - } - // Source data file and destination data file must present at same time - // If data file not present, byte sampling file must not present - if (source.dataFileName.empty() && (!destination.dataFileName.empty() || !source.byteSampleFileName.empty())) { - return false; - } - if (destination.dataFileName.empty() && (!source.dataFileName.empty() || !source.byteSampleFileName.empty())) { - return false; - } - // Data file path and byte sampling file path must have the same basename between source and destination - if (!source.dataFileName.empty() && source.dataFileName != destination.dataFileName) { - return false; - } - if (!source.byteSampleFileName.empty() && source.byteSampleFileName != destination.byteSampleFileName) { - return false; - } - return true; -} - -// Copy files between local file folders, used to mock blobstore in the test. -void bulkDumpTransportCP_impl(BulkDumpFileSet sourceFileSet, - BulkDumpFileSet destinationFileSet, - size_t fileBytesMax, - UID logId) { - BulkDumpFileFullPathSet localFiles(sourceFileSet); - BulkDumpFileFullPathSet remoteFiles(destinationFileSet); - - // Clear remote existing folder - resetFileFolder(abspath(remoteFiles.folder), logId); - // Copy bulk dump files to the remote folder - bulkDumpFileCopy(abspath(localFiles.manifestFilePath), abspath(remoteFiles.manifestFilePath), fileBytesMax, logId); - if (sourceFileSet.dataFileName.size() > 0) { - bulkDumpFileCopy(abspath(localFiles.dataFilePath), abspath(remoteFiles.dataFilePath), fileBytesMax, logId); - } - if (sourceFileSet.byteSampleFileName.size() > 0) { - ASSERT(sourceFileSet.dataFileName.size() > 0); - bulkDumpFileCopy( - abspath(localFiles.byteSampleFilePath), abspath(remoteFiles.byteSampleFilePath), fileBytesMax, logId); - } - return; -} - -ACTOR Future uploadBulkDumpFileSet(BulkDumpTransportMethod transportMethod, - BulkDumpFileSet sourceFileSet, - BulkDumpFileSet destinationFileSet, - UID logId) { - // Upload to blobstore or mock file copy - if (transportMethod != BulkDumpTransportMethod::CP) { - TraceEvent(SevWarnAlways, "SSBulkDumpUploadFilesError", logId) - .detail("Reason", "Transport method is not implemented") - .detail("TransportMethod", transportMethod); - ASSERT_WE_THINK(false); - throw bulkdump_task_failed(); - } - if (!validateSourceDestinationFileSets(sourceFileSet, destinationFileSet)) { - TraceEvent(SevWarnAlways, "SSBulkDumpUploadFilesError", logId) - .detail("SourceFileSet", sourceFileSet.toString()) - .detail("DestinationFileSet", destinationFileSet.toString()); - ASSERT_WE_THINK(false); - throw bulkdump_task_failed(); - } - bulkDumpTransportCP_impl(sourceFileSet, destinationFileSet, SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX, logId); - return Void(); -} - -void generateBulkDumpJobManifestFile(const std::string& workFolder, - const std::string& localJobManifestFilePath, - const std::string& content, - const UID& logId) { - resetFileFolder(workFolder, logId); - writeStringToFile(localJobManifestFilePath, content); - TraceEvent(SevInfo, "UploadBulkDumpJobManifestWriteLocal", logId) - .detail("LocalJobManifestFilePath", localJobManifestFilePath) - .detail("Content", content); - return; -} - -void uploadBulkDumpJobManifestFile(BulkDumpTransportMethod transportMethod, - const std::string& localJobManifestFilePath, - const std::string& remoteJobManifestFilePath, - UID logId) { - if (transportMethod != BulkDumpTransportMethod::CP) { - TraceEvent(SevWarnAlways, "UploadBulkDumpJobManifestFileError", logId) - .detail("Reason", "Transport method is not implemented") - .detail("TransportMethod", transportMethod); - ASSERT_WE_THINK(false); - throw bulkdump_task_failed(); - } - TraceEvent(SevWarn, "UploadBulkDumpJobManifestWriteLocal", logId) - .detail("RemoteJobManifestFilePath", remoteJobManifestFilePath); - bulkDumpFileCopy(abspath(localJobManifestFilePath), - abspath(remoteJobManifestFilePath), - SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX, - logId); - // TODO(BulkDump): check uploaded file exist - return; -} - -ACTOR Future persistCompleteBulkDumpRange(Database cx, BulkDumpState bulkDumpState) { - state Transaction tr(cx); - state Key beginKey = bulkDumpState.getRange().begin; - state Key endKey = bulkDumpState.getRange().end; - state KeyRange rangeToPersist; - state RangeResult result; - loop { - try { - tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - rangeToPersist = Standalone(KeyRangeRef(beginKey, endKey)); - wait(store(result, krmGetRanges(&tr, bulkDumpPrefix, rangeToPersist))); - bool anyNew = false; - for (int i = 0; i < result.size() - 1; i++) { - if (result[i].value.empty()) { - throw bulkdump_task_outdated(); - } - BulkDumpState currentBulkDumpState = decodeBulkDumpState(result[i].value); - if (currentBulkDumpState.getJobId() != bulkDumpState.getJobId()) { - throw bulkdump_task_outdated(); - } - ASSERT(bulkDumpState.getTaskId().present()); - if (currentBulkDumpState.getTaskId().present() && - currentBulkDumpState.getTaskId().get() != bulkDumpState.getTaskId().get()) { - throw bulkdump_task_outdated(); - } - if (!anyNew && currentBulkDumpState.getPhase() == BulkDumpPhase::Submitted) { - anyNew = true; - } - } - if (!anyNew) { - throw bulkdump_task_outdated(); - } - wait(krmSetRange(&tr, bulkDumpPrefix, bulkDumpState.getRange(), bulkDumpStateValue(bulkDumpState))); - wait(tr.commit()); - beginKey = result[result.size() - 1].key; - if (beginKey >= endKey) { - break; - } else { - tr.reset(); - } - } catch (Error& e) { - wait(tr.onError(e)); - } - } - return Void(); -} - -std::string generateJobManifestFileContent(const std::map& manifests) { - std::string root = ""; - std::string manifestList; - for (const auto& [beginKey, manifest] : manifests) { - if (root.empty()) { - root = manifest.fileSet.rootPath; - } else { - ASSERT(manifest.fileSet.rootPath == root); - } - manifestList = manifestList + manifest.getBeginKeyString() + ", " + manifest.getEndKeyString() + ", " + - std::to_string(manifest.version) + ", " + std::to_string(manifest.bytes) + ", " + - joinPath(manifest.fileSet.relativePath, manifest.fileSet.manifestFileName) + "\n"; - } - std::string head = "Manifest count: " + std::to_string(manifests.size()) + ", Root: " + root + "\n"; - return head + manifestList; -} diff --git a/fdbserver/BulkLoadAndDumpUtil.actor.cpp b/fdbserver/BulkLoadAndDumpUtil.actor.cpp new file mode 100644 index 00000000000..d6e60881604 --- /dev/null +++ b/fdbserver/BulkLoadAndDumpUtil.actor.cpp @@ -0,0 +1,544 @@ +/* + * BulkLoadAndDumpUtils.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/FDBTypes.h" +#include "fdbclient/NativeAPI.actor.h" +#include "fdbclient/ClientKnobs.h" +#include "fdbserver/BulkLoadAndDumpUtil.actor.h" +#include "fdbserver/RocksDBCheckpointUtils.actor.h" +#include "fdbserver/StorageMetrics.actor.h" +#include +#include "flow/Error.h" +#include "flow/IRandom.h" +#include "flow/Platform.h" +#include "flow/Trace.h" +#include "flow/actorcompiler.h" // has to be last include + +void doFileCopy(std::string fromFile, std::string toFile, size_t fileBytesMax) { + std::string content = readFileBytes(fromFile, fileBytesMax); + writeFile(toFile, content); + return; +} + +ACTOR Future> getBulkLoadTaskStateFromDataMove(Database cx, UID dataMoveId, UID logId) { + loop { + state Transaction tr(cx); + try { + Optional val = wait(tr.get(dataMoveKeyFor(dataMoveId))); + if (!val.present()) { + TraceEvent(SevWarn, "SSBulkLoadDataMoveIdNotExist", logId).detail("DataMoveID", dataMoveId); + return Optional(); + } + DataMoveMetaData dataMoveMetaData = decodeDataMoveValue(val.get()); + return dataMoveMetaData.bulkLoadTaskState; + } catch (Error& e) { + wait(tr.onError(e)); + } + } +} + +SSBulkDumpTask getSSBulkDumpTask(const std::map>& locations, + const BulkDumpState& bulkDumpState) { + StorageServerInterface targetServer; + std::vector checksumServers; + int dcid = 0; + for (const auto& [_, dcServers] : locations) { + if (dcid == 0) { + const int idx = deterministicRandom()->randomInt(0, dcServers.size()); + targetServer = dcServers[idx]; + } + for (int i = 0; i < dcServers.size(); i++) { + if (dcServers[i].id() == targetServer.id()) { + ASSERT_WE_THINK(dcid == 0); + } else { + checksumServers.push_back(dcServers[i].id()); + } + } + dcid++; + } + return SSBulkDumpTask(targetServer, checksumServers, bulkDumpState); +} + +ACTOR Future> getBytesSamplingFromSSTFiles(std::string folderToGenerate, + std::unordered_set dataFiles, + UID logId) { + loop { + try { + // TODO(BulkLoad): generate filename at first + std::string bytesSampleFile = abspath( + joinPath(folderToGenerate, deterministicRandom()->randomUniqueID().toString() + "-byteSample.sst")); + std::unique_ptr sstWriter = newRocksDBSstFileWriter(); + sstWriter->open(bytesSampleFile); + bool anySampled = false; + for (const auto& filePath : dataFiles) { + std::unique_ptr reader = newRocksDBSstFileReader(); + reader->open(filePath); + while (reader->hasNext()) { + KeyValue kv = reader->next(); + ByteSampleInfo sampleInfo = isKeyValueInSample(kv); + if (sampleInfo.inSample) { + sstWriter->write(kv.key, kv.value); // TODO(BulkLoad): validate if kvs are sorted + anySampled = true; + } + } + } + // It is possible that no key is sampled + // This can happen when the data to sample is small + // In this case, no SST sample byte file is generated + if (anySampled) { + ASSERT(sstWriter->finish()); + return bytesSampleFile; + } else { + return Optional(); + } + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + TraceEvent(SevWarn, "SSBulkLoadTaskSamplingError", logId).errorUnsuppressed(e); + wait(delay(5.0)); + } + } +} + +// Generate SST file given the input sortedKVS to the input filePath +void writeKVSToSSTFile(std::string filePath, const std::map& sortedKVS, UID logId) { + const std::string absFilePath = abspath(filePath); + // Check file + if (fileExists(absFilePath)) { + TraceEvent(SevWarn, "SSBulkDumpRetriableError", logId) + .detail("Reason", "exist old File when writeKVSToSSTFile") + .detail("DataFilePathLocal", absFilePath); + ASSERT_WE_THINK(false); + throw retry(); + } + // Dump data to file + std::unique_ptr sstWriter = newRocksDBSstFileWriter(); + sstWriter->open(absFilePath); + for (const auto& [key, value] : sortedKVS) { + sstWriter->write(key, value); // assuming sorted + } + if (!sstWriter->finish()) { + // Unexpected: having data but failed to finish + TraceEvent(SevWarn, "SSBulkDumpRetriableError", logId) + .detail("Reason", "failed to finish data sst writer when writeKVSToSSTFile") + .detail("DataFilePath", absFilePath); + ASSERT_WE_THINK(false); + throw retry(); + } + return; +} + +// Generate key-value data, byte sampling data, and manifest file given a range at a version with a certain bytes +// Return BulkLoadManifest metadata (equivalent to content of the manifest file) +// TODO(BulkDump): can cause slow tasks, do the task in a separate thread in the future. +BulkLoadManifest dumpDataFileToLocal(UID logId, + const std::map& sortedData, + const std::map& sortedSample, + const BulkLoadFileSet& localFileSetConfig, + const BulkLoadFileSet& remoteFileSetConfig, + const BulkLoadByteSampleSetting& byteSampleSetting, + Version dumpVersion, + const KeyRange& dumpRange, + int64_t dumpBytes) { + BulkDumpFileFullPathSet localFiles(localFileSetConfig); + + // Step 1: Clean up local folder + platform::eraseDirectoryRecursive(abspath(localFiles.folder)); + platform::createDirectory(abspath(localFiles.folder)); + + // Step 2: Dump data to file + bool containDataFile = false; + if (sortedData.size() > 0) { + writeKVSToSSTFile(abspath(localFiles.dataFilePath), sortedData, logId); + containDataFile = true; + } else { + ASSERT(sortedSample.empty()); + containDataFile = false; + } + + // Step 3: Dump sample to file + bool containByteSampleFile = false; + if (sortedSample.size() > 0) { + writeKVSToSSTFile(abspath(localFiles.byteSampleFilePath), sortedSample, logId); + ASSERT(containDataFile); + containByteSampleFile = true; + } else { + containByteSampleFile = false; + } + + // Step 4: Generate manifest file + if (fileExists(abspath(localFiles.manifestFilePath))) { + TraceEvent(SevWarn, "SSBulkDumpRetriableError", logId) + .detail("Reason", "exist old manifestFile") + .detail("ManifestFilePathLocal", abspath(localFiles.manifestFilePath)); + ASSERT_WE_THINK(false); + throw retry(); + } + BulkLoadFileSet fileSetRemote(remoteFileSetConfig.rootPath, + remoteFileSetConfig.relativePath, + remoteFileSetConfig.manifestFileName, + containDataFile ? remoteFileSetConfig.dataFileName : "", + containByteSampleFile ? remoteFileSetConfig.byteSampleFileName : ""); + BulkLoadManifest manifest( + fileSetRemote, dumpRange.begin, dumpRange.end, dumpVersion, "", dumpBytes, byteSampleSetting); + writeFile(abspath(localFiles.manifestFilePath), manifest.toString()); + return manifest; +} + +ACTOR Future persistCompleteBulkDumpRange(Database cx, BulkDumpState bulkDumpState) { + state Transaction tr(cx); + state Key beginKey = bulkDumpState.getRange().begin; + state Key endKey = bulkDumpState.getRange().end; + state KeyRange rangeToPersist; + state RangeResult result; + loop { + try { + tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + rangeToPersist = Standalone(KeyRangeRef(beginKey, endKey)); + wait(store(result, krmGetRanges(&tr, bulkDumpPrefix, rangeToPersist))); + bool anyNew = false; + for (int i = 0; i < result.size() - 1; i++) { + if (result[i].value.empty()) { + throw bulkdump_task_outdated(); + } + BulkDumpState currentBulkDumpState = decodeBulkDumpState(result[i].value); + if (currentBulkDumpState.getJobId() != bulkDumpState.getJobId()) { + throw bulkdump_task_outdated(); + } + ASSERT(bulkDumpState.getTaskId().present()); + if (currentBulkDumpState.getTaskId().present() && + currentBulkDumpState.getTaskId().get() != bulkDumpState.getTaskId().get()) { + throw bulkdump_task_outdated(); + } + if (!anyNew && currentBulkDumpState.getPhase() == BulkDumpPhase::Submitted) { + anyNew = true; + } + } + if (!anyNew) { + throw bulkdump_task_outdated(); + } + wait(krmSetRange(&tr, bulkDumpPrefix, bulkDumpState.getRange(), bulkDumpStateValue(bulkDumpState))); + wait(tr.commit()); + beginKey = result[result.size() - 1].key; + if (beginKey >= endKey) { + break; + } else { + tr.reset(); + } + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return Void(); +} + +ACTOR Future uploadSingleFile(BulkLoadTransportMethod transportMethod, + std::string fromLocalPath, + std::string toRemotePath, + UID logId) { + state int retryCount = 0; + loop { + try { + if (transportMethod == BulkLoadTransportMethod::CP) { + TraceEvent(SevInfo, "UploadSingleFile", logId) + .detail("FromLocalPath", fromLocalPath) + .detail("ToRemotePath", toRemotePath); + doFileCopy(abspath(fromLocalPath), abspath(toRemotePath), SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX); + wait(delay(0.1)); + } else { + TraceEvent(SevError, "UploadSingleFileError", logId) + .detail("Reason", "Transport method is not implemented") + .detail("TransportMethod", transportMethod) + .detail("FromLocalPath", fromLocalPath) + .detail("ToRemotePath", toRemotePath); + UNREACHABLE(); + } + // TODO(BulkDump): check uploaded file exist + break; + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + retryCount++; + if (retryCount > 10) { + TraceEvent(SevWarnAlways, "UploadSingleFileError", logId) + .errorUnsuppressed(e) + .detail("TransportMethod", transportMethod) + .detail("FromLocalPath", fromLocalPath) + .detail("ToRemotePath", toRemotePath); + throw e; + } + wait(delay(5.0)); + } + } + return Void(); +} + +ACTOR Future downloadSingleFile(BulkLoadTransportMethod transportMethod, + std::string fromRemotePath, + std::string toLocalPath, + UID logId) { + state int retryCount = 0; + loop { + try { + if (transportMethod == BulkLoadTransportMethod::CP) { + TraceEvent(SevInfo, "DownloadSingleFile", logId) + .detail("FromRemotePath", fromRemotePath) + .detail("ToLocalPath", toLocalPath); + doFileCopy(abspath(fromRemotePath), abspath(toLocalPath), SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX); + wait(delay(0.1)); + } else { + TraceEvent(SevError, "DownloadSingleFileError", logId) + .detail("Reason", "Transport method is not implemented") + .detail("TransportMethod", transportMethod) + .detail("FromRemotePath", fromRemotePath) + .detail("ToLocalPath", toLocalPath); + UNREACHABLE(); + } + if (!fileExists(abspath(toLocalPath))) { + throw retry(); + } + break; + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + retryCount++; + if (retryCount > 10) { + TraceEvent(SevWarnAlways, "DownloadSingleFileError", logId) + .errorUnsuppressed(e) + .detail("TransportMethod", transportMethod) + .detail("FromRemotePath", fromRemotePath) + .detail("ToLocalPath", toLocalPath); + throw e; + } + wait(delay(5.0)); + } + } + return Void(); +} + +SSBulkLoadFileSet bulkLoadTransportCP_impl(std::string dir, + BulkLoadTaskState bulkLoadTaskState, + size_t fileBytesMax, + UID logId) { + ASSERT(bulkLoadTaskState.getTransportMethod() == BulkLoadTransportMethod::CP); + std::string toFile; + std::string fromFile; + + SSBulkLoadFileSet fileSet; + fileSet.folder = abspath(joinPath(dir, bulkLoadTaskState.getFolder())); + // Clear existing folder + platform::eraseDirectoryRecursive(fileSet.folder); + ASSERT(platform::createDirectory(fileSet.folder)); + + // Move bulk load files to loading folder + for (const auto& filePath : bulkLoadTaskState.getDataFiles()) { + fromFile = abspath(filePath); + toFile = abspath(joinPath(fileSet.folder, basename(fromFile))); + if (fileSet.dataFileList.find(toFile) != fileSet.dataFileList.end()) { + ASSERT_WE_THINK(false); + throw retry(); + } + doFileCopy(fromFile, toFile, fileBytesMax); + fileSet.dataFileList.insert(toFile); + TraceEvent(SevInfo, "SSBulkLoadSSTFileCopied", logId) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) + .detail("FromFile", fromFile) + .detail("ToFile", toFile); + } + if (bulkLoadTaskState.getBytesSampleFile().present()) { + fromFile = abspath(bulkLoadTaskState.getBytesSampleFile().get()); + if (fileExists(fromFile)) { + toFile = abspath(joinPath(fileSet.folder, basename(fromFile))); + doFileCopy(fromFile, toFile, fileBytesMax); + fileSet.bytesSampleFile = toFile; + TraceEvent(SevInfo, "SSBulkLoadSSTFileCopied", logId) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) + .detail("FromFile", fromFile) + .detail("ToFile", toFile); + } + } + return fileSet; +} + +// Copy files between local file folders, used to mock blobstore in the test. +void doFileSetCopy(BulkLoadFileSet fromFileSet, BulkLoadFileSet toFileSet, size_t fileBytesMax, UID logId) { + BulkDumpFileFullPathSet localFiles(fromFileSet); + BulkDumpFileFullPathSet remoteFiles(toFileSet); + + // Clear remote existing folder + platform::eraseDirectoryRecursive(abspath(remoteFiles.folder)); + platform::createDirectory(abspath(remoteFiles.folder)); + // Copy bulk dump files to the remote folder + doFileCopy(abspath(localFiles.manifestFilePath), abspath(remoteFiles.manifestFilePath), fileBytesMax); + if (fromFileSet.dataFileName.size() > 0) { + doFileCopy(abspath(localFiles.dataFilePath), abspath(remoteFiles.dataFilePath), fileBytesMax); + } + if (fromFileSet.byteSampleFileName.size() > 0) { + ASSERT(fromFileSet.dataFileName.size() > 0); + doFileCopy(abspath(localFiles.byteSampleFilePath), abspath(remoteFiles.byteSampleFilePath), fileBytesMax); + } + return; +} + +// Validate the invariant of filenames. Source is the file stored locally. Destination is the file going to move to. +bool validateSourceDestinationFileSets(const BulkLoadFileSet& source, const BulkLoadFileSet& destination) { + // Manifest file must be present + if (source.manifestFileName.empty() || destination.manifestFileName.empty()) { + return false; + } + // Source data file and destination data file must present at same time + // If data file not present, byte sampling file must not present + if (source.dataFileName.empty() && (!destination.dataFileName.empty() || !source.byteSampleFileName.empty())) { + return false; + } + if (destination.dataFileName.empty() && (!source.dataFileName.empty() || !source.byteSampleFileName.empty())) { + return false; + } + // Data file path and byte sampling file path must have the same basename between source and destination + if (!source.dataFileName.empty() && source.dataFileName != destination.dataFileName) { + return false; + } + if (!source.byteSampleFileName.empty() && source.byteSampleFileName != destination.byteSampleFileName) { + return false; + } + return true; +} + +ACTOR Future downloadBulkLoadFileSet(BulkLoadTransportMethod transportMethod, + std::string dir, + BulkLoadTaskState bulkLoadTaskState, + size_t fileBytesMax, + UID logId) { + if (transportMethod != BulkLoadTransportMethod::CP) { + TraceEvent(SevWarnAlways, "SSDownloadBulkLoadFileSetError", logId) + .detail("Reason", "Transport method is not implemented") + .detail("TransportMethod", transportMethod); + ASSERT_WE_THINK(false); + throw bulkdump_task_failed(); + } + wait(delay(0.1)); + SSBulkLoadFileSet res = bulkLoadTransportCP_impl(dir, bulkLoadTaskState, fileBytesMax, logId); + return res; +} + +ACTOR Future uploadBulkLoadFileSet(BulkLoadTransportMethod transportMethod, + BulkLoadFileSet sourceFileSet, + BulkLoadFileSet destinationFileSet, + UID logId) { + if (transportMethod != BulkLoadTransportMethod::CP) { + TraceEvent(SevWarnAlways, "SSBulkDumpuploadSingleFilesError", logId) + .detail("Reason", "Transport method is not implemented") + .detail("TransportMethod", transportMethod); + ASSERT_WE_THINK(false); + throw bulkdump_task_failed(); + } + if (!validateSourceDestinationFileSets(sourceFileSet, destinationFileSet)) { + TraceEvent(SevWarnAlways, "SSBulkDumpuploadSingleFilesError", logId) + .detail("SourceFileSet", sourceFileSet.toString()) + .detail("DestinationFileSet", destinationFileSet.toString()); + ASSERT_WE_THINK(false); + throw bulkdump_task_failed(); + } + wait(delay(0.1)); + doFileSetCopy(sourceFileSet, destinationFileSet, SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX, logId); + return Void(); +} + +// Get manifest within the input range +ACTOR Future> getBulkLoadManifestMetadataFromFiles( + std::string localJobManifestFilePath, + KeyRange range, + std::string manifestLocalTempFolder, + BulkLoadTransportMethod transportMethod, + UID logId) { + ASSERT(fileExists(abspath(localJobManifestFilePath))); + state std::vector res; + const std::string jobManifestRawString = + readFileBytes(abspath(localJobManifestFilePath), SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX); + state std::vector lines = splitString(jobManifestRawString, "\n"); + state BulkDumpJobManifestHeader header(lines[0]); + state size_t lineIdx = 1; // skip the first line which is the header + while (lineIdx < lines.size()) { + if (lines[lineIdx].empty()) { + ASSERT(lineIdx == lines.size() - 1); + break; + } + BulkDumpJobManifestEntry manifestEntry(lines[lineIdx]); + KeyRange overlappingRange = range & manifestEntry.getRange(); + if (overlappingRange.empty()) { + // Ignore the manifest entry if no overlapping range + lineIdx = lineIdx + 1; + continue; + } + state std::string remoteManifestFilePath = + joinPath(header.getRootFolder(), manifestEntry.getRelativePath()); // wrong + platform::eraseDirectoryRecursive(abspath(manifestLocalTempFolder)); + platform::createDirectory(abspath(manifestLocalTempFolder)); + state std::string localManifestFilePath = joinPath(manifestLocalTempFolder, basename(remoteManifestFilePath)); + // Download the manifest file + wait(downloadSingleFile(transportMethod, remoteManifestFilePath, localManifestFilePath, logId)); + const std::string manifestRawString = + readFileBytes(abspath(localManifestFilePath), SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX); + ASSERT(!manifestRawString.empty()); + BulkLoadManifest manifest(manifestRawString); + res.push_back(manifest); + wait(delay(1.0)); + lineIdx = lineIdx + 1; + } + platform::eraseDirectoryRecursive(abspath(manifestLocalTempFolder)); + return res; +} + +// Download job manifest file +// Each job has one manifest file including manifest paths of all tasks +ACTOR Future downloadBulkLoadJobManifestFile(BulkLoadTransportMethod transportMethod, + std::string localJobManifestFilePath, + std::string remoteJobManifestFilePath, + UID logId) { + wait(downloadSingleFile(transportMethod, remoteJobManifestFilePath, localJobManifestFilePath, logId)); + return Void(); +} + +// Upload job manifest file +// Each job has one manifest file including manifest paths of all tasks +ACTOR Future uploadBulkLoadJobManifestFile(BulkLoadTransportMethod transportMethod, + std::string localJobManifestFilePath, + std::string remoteJobManifestFilePath, + std::map manifests, + UID logId) { + // Step 1: Generate manifest file content + std::string content = generateBulkLoadJobManifestFileContent(manifests); + ASSERT(!content.empty()); + + // Step 2: Generate the manifest file locally + writeFile(abspath(localJobManifestFilePath), content); + TraceEvent(SevInfo, "UploadBulkLoadJobManifestFile", logId) + .detail("LocalJobManifestFilePath", localJobManifestFilePath) + .detail("Content", content); + + // Step 3: Upload the manifest file + wait(uploadSingleFile(transportMethod, localJobManifestFilePath, remoteJobManifestFilePath, logId)); + return Void(); +} diff --git a/fdbserver/BulkLoadUtil.actor.cpp b/fdbserver/BulkLoadUtil.actor.cpp deleted file mode 100644 index c91602f3066..00000000000 --- a/fdbserver/BulkLoadUtil.actor.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/* - * BulkLoadUtils.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/FDBTypes.h" -#include "fdbclient/NativeAPI.actor.h" -#include "fdbclient/ClientKnobs.h" -#include "fdbserver/BulkLoadUtil.actor.h" -#include "fdbserver/RocksDBCheckpointUtils.actor.h" -#include "fdbserver/StorageMetrics.actor.h" -#include -#include "flow/actorcompiler.h" // has to be last include - -std::string generateRandomBulkLoadDataFileName() { - return deterministicRandom()->randomUniqueID().toString() + "-data.sst"; -} - -std::string generateRandomBulkLoadBytesSampleFileName() { - return deterministicRandom()->randomUniqueID().toString() + "-bytesample.sst"; -} - -ACTOR Future> getBulkLoadStateFromDataMove(Database cx, UID dataMoveId, UID logId) { - loop { - state Transaction tr(cx); - try { - Optional val = wait(tr.get(dataMoveKeyFor(dataMoveId))); - if (!val.present()) { - TraceEvent(SevWarn, "SSBulkLoadDataMoveIdNotExist", logId).detail("DataMoveID", dataMoveId); - return Optional(); - } - DataMoveMetaData dataMoveMetaData = decodeDataMoveValue(val.get()); - return dataMoveMetaData.bulkLoadState; - } catch (Error& e) { - wait(tr.onError(e)); - } - } -} - -void bulkLoadFileCopy(std::string fromFile, std::string toFile, size_t fileBytesMax) { - std::string content = readFileBytes(fromFile, fileBytesMax); - writeFile(toFile, content); - // TODO(BulkLoad): Do file checksum for toFile - return; -} - -ACTOR Future bulkLoadTransportCP_impl(std::string dir, - BulkLoadState bulkLoadState, - size_t fileBytesMax, - UID logId) { - ASSERT(bulkLoadState.getTransportMethod() == BulkLoadTransportMethod::CP); - loop { - state std::string toFile; - state std::string fromFile; - state SSBulkLoadFileSet fileSet; - try { - fileSet.folder = abspath(joinPath(dir, bulkLoadState.getFolder())); - - // Clear existing folder - platform::eraseDirectoryRecursive(fileSet.folder); - if (!platform::createDirectory(fileSet.folder)) { - throw retry(); - } - - // Move bulk load files to loading folder - for (const auto& filePath : bulkLoadState.getDataFiles()) { - fromFile = abspath(filePath); - toFile = abspath(joinPath(fileSet.folder, generateRandomBulkLoadDataFileName())); - if (fileSet.dataFileList.find(toFile) != fileSet.dataFileList.end()) { - ASSERT_WE_THINK(false); - throw retry(); - } - bulkLoadFileCopy(fromFile, toFile, fileBytesMax); - fileSet.dataFileList.insert(toFile); - TraceEvent(SevInfo, "SSBulkLoadSSTFileCopied", logId) - .detail("BulkLoadTask", bulkLoadState.toString()) - .detail("FromFile", fromFile) - .detail("ToFile", toFile); - } - if (bulkLoadState.getBytesSampleFile().present()) { - fromFile = abspath(bulkLoadState.getBytesSampleFile().get()); - if (fileExists(fromFile)) { - toFile = abspath(joinPath(fileSet.folder, generateRandomBulkLoadBytesSampleFileName())); - bulkLoadFileCopy(fromFile, toFile, fileBytesMax); - fileSet.bytesSampleFile = toFile; - TraceEvent(SevInfo, "SSBulkLoadSSTFileCopied", logId) - .detail("BulkLoadTask", bulkLoadState.toString()) - .detail("FromFile", fromFile) - .detail("ToFile", toFile); - } - } - return fileSet; - - } catch (Error& e) { - if (e.code() == error_code_actor_cancelled) { - throw e; - } - TraceEvent(SevInfo, "SSBulkLoadTaskFetchSSTFileCopyError", logId) - .errorUnsuppressed(e) - .detail("BulkLoadTask", bulkLoadState.toString()) - .detail("FromFile", fromFile) - .detail("ToFile", toFile); - wait(delay(5.0)); - } - } -} - -ACTOR Future> getBytesSamplingFromSSTFiles(std::string folderToGenerate, - std::unordered_set dataFiles, - UID logId) { - loop { - try { - std::string bytesSampleFile = - abspath(joinPath(folderToGenerate, generateRandomBulkLoadBytesSampleFileName())); - std::unique_ptr sstWriter = newRocksDBSstFileWriter(); - sstWriter->open(bytesSampleFile); - bool anySampled = false; - for (const auto& filePath : dataFiles) { - std::unique_ptr reader = newRocksDBSstFileReader(); - reader->open(filePath); - while (reader->hasNext()) { - KeyValue kv = reader->next(); - ByteSampleInfo sampleInfo = isKeyValueInSample(kv); - if (sampleInfo.inSample) { - sstWriter->write(kv.key, kv.value); // TODO(BulkLoad): validate if kvs are sorted - anySampled = true; - } - } - } - // It is possible that no key is sampled - // This can happen when the data to sample is small - // In this case, no SST sample byte file is generated - if (anySampled) { - ASSERT(sstWriter->finish()); - return bytesSampleFile; - } else { - return Optional(); - } - } catch (Error& e) { - if (e.code() == error_code_actor_cancelled) { - throw e; - } - TraceEvent(SevWarn, "SSBulkLoadTaskSamplingError", logId).errorUnsuppressed(e); - wait(delay(5.0)); - } - } -} - -void checkContent(std::unordered_set dataFiles, UID logId) { - for (const auto& filePath : dataFiles) { - std::unique_ptr reader = newRocksDBSstFileReader(); - reader->open(filePath); - while (reader->hasNext()) { - KeyValue kv = reader->next(); - TraceEvent("CheckContent", logId).detail("Key", kv.key).detail("Value", kv.value); - } - } - return; -} diff --git a/fdbserver/DDRelocationQueue.actor.cpp b/fdbserver/DDRelocationQueue.actor.cpp index a29099b3dc1..565994e0f43 100644 --- a/fdbserver/DDRelocationQueue.actor.cpp +++ b/fdbserver/DDRelocationQueue.actor.cpp @@ -1013,7 +1013,8 @@ DataMoveType newDataMoveType(bool doBulkLoading) { bool runPendingBulkLoadTaskWithRelocateData(DDQueue* self, RelocateData& rd) { bool doBulkLoading = false; Optional task = self->bulkLoadTaskCollection->getTaskByRange(rd.keys); - if (task.present() && task.get().coreState.onAnyPhase({ BulkLoadPhase::Triggered, BulkLoadPhase::Running })) { + if (task.present() && + task.get().coreState.onAnyPhase({ BulkLoadTaskPhase::Triggered, BulkLoadTaskPhase::Running })) { rd.bulkLoadTask = task.get(); doBulkLoading = true; } @@ -1405,11 +1406,11 @@ static int nonOverlappedServerCount(const std::vector& srcIds, const std::v } void validateBulkLoadRelocateData(const RelocateData& rd, const std::vector& destIds, UID logId) { - BulkLoadState bulkLoadState = rd.bulkLoadTask.get().coreState; - if (rd.keys != bulkLoadState.getRange()) { + BulkLoadTaskState bulkLoadTaskState = rd.bulkLoadTask.get().coreState; + if (rd.keys != bulkLoadTaskState.getRange()) { TraceEvent(g_network->isSimulated() ? SevError : SevWarnAlways, "DDBulkLoadTaskLaunchFailed", logId) .detail("Reason", "Wrong data move range") - .detail("BulkLoadTask", bulkLoadState.toString()) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) .detail("DataMovePriority", rd.priority) .detail("DataMoveId", rd.dataMoveId) .detail("RelocatorRange", rd.keys); @@ -1422,7 +1423,7 @@ void validateBulkLoadRelocateData(const RelocateData& rd, const std::vector // This is not expected TraceEvent(g_network->isSimulated() ? SevError : SevWarnAlways, "DDBulkLoadTaskLaunchFailed", logId) .detail("Reason", "Conflict src and destd due to remote recovery") - .detail("BulkLoadTask", bulkLoadState.toString()) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) .detail("DataMovePriority", rd.priority) .detail("DataMoveId", rd.dataMoveId) .detail("RelocatorRange", rd.keys); @@ -1994,7 +1995,7 @@ ACTOR Future dataDistributionRelocator(DDQueue* self, ddEnabledState, CancelConflictingDataMoves::False, rd.bulkLoadTask.present() ? rd.bulkLoadTask.get().coreState - : Optional()); + : Optional()); } else { params = std::make_unique(rd.dataMoveId, rd.keys, @@ -2009,7 +2010,7 @@ ACTOR Future dataDistributionRelocator(DDQueue* self, ddEnabledState, CancelConflictingDataMoves::False, rd.bulkLoadTask.present() ? rd.bulkLoadTask.get().coreState - : Optional()); + : Optional()); } state Future doMoveKeys = self->txnProcessor->moveKeys(*params); state Future pollHealth = @@ -2040,7 +2041,7 @@ ACTOR Future dataDistributionRelocator(DDQueue* self, CancelConflictingDataMoves::False, rd.bulkLoadTask.present() ? rd.bulkLoadTask.get().coreState - : Optional()); + : Optional()); } else { params = std::make_unique(rd.dataMoveId, rd.keys, @@ -2056,7 +2057,7 @@ ACTOR Future dataDistributionRelocator(DDQueue* self, CancelConflictingDataMoves::False, rd.bulkLoadTask.present() ? rd.bulkLoadTask.get().coreState - : Optional()); + : Optional()); } doMoveKeys = self->txnProcessor->moveKeys(*params); } else { diff --git a/fdbserver/DDShardTracker.actor.cpp b/fdbserver/DDShardTracker.actor.cpp index d2dd7cce574..8c5af89c8b4 100644 --- a/fdbserver/DDShardTracker.actor.cpp +++ b/fdbserver/DDShardTracker.actor.cpp @@ -984,11 +984,11 @@ static bool shardBackwardMergeFeasible(DataDistributionTracker* self, KeyRange c } // Must be atomic -void createShardToBulkLoad(DataDistributionTracker* self, BulkLoadState bulkLoadState) { - KeyRange keys = bulkLoadState.getRange(); +void createShardToBulkLoad(DataDistributionTracker* self, BulkLoadTaskState bulkLoadTaskState) { + KeyRange keys = bulkLoadTaskState.getRange(); ASSERT(!keys.empty()); TraceEvent e(SevInfo, "DDBulkLoadCreateShardToBulkLoad", self->distributorId); - e.detail("TaskId", bulkLoadState.getTaskId()); + e.detail("TaskId", bulkLoadTaskState.getTaskId()); e.detail("BulkLoadRange", keys); // Create shards at the two ends and do not data move for those shards // Create a new shard and trigger data move for bulk loading on the new shard @@ -1025,7 +1025,7 @@ void createShardToBulkLoad(DataDistributionTracker* self, BulkLoadState bulkLoad restartShardTrackers(self, keys, ShardMetrics(oldStats, now(), shardCount)); self->shardsAffectedByTeamFailure->defineShard(keys); self->output.send( - RelocateShard(keys, DataMovementReason::TEAM_HEALTHY, RelocateReason::OTHER, bulkLoadState.getTaskId())); + RelocateShard(keys, DataMovementReason::TEAM_HEALTHY, RelocateReason::OTHER, bulkLoadTaskState.getTaskId())); e.detail("NewShardToLoad", keys); return; } @@ -1684,7 +1684,7 @@ struct DataDistributionTrackerImpl { triggerStorageQueueRebalance(self, req); } when(BulkLoadShardRequest req = waitNext(self->triggerShardBulkLoading)) { - createShardToBulkLoad(self, req.bulkLoadState); + createShardToBulkLoad(self, req.bulkLoadTaskState); } when(wait(self->actors.getResult())) {} when(TenantCacheTenantCreated newTenant = waitNext(tenantCreationSignal.getFuture())) { diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 9e3bdd6f78c..26dc14f4ad2 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -23,7 +23,7 @@ #include "fdbclient/Audit.h" #include "fdbclient/AuditUtils.actor.h" -#include "fdbclient/BulkDumping.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/DatabaseContext.h" #include "fdbclient/FDBOptions.g.h" #include "fdbclient/FDBTypes.h" @@ -34,8 +34,7 @@ #include "fdbclient/SystemData.h" #include "fdbclient/Tenant.h" #include "fdbrpc/Replication.h" -#include "fdbserver/BulkDumpUtil.actor.h" -#include "fdbserver/BulkLoadUtil.actor.h" +#include "fdbserver/BulkLoadAndDumpUtil.actor.h" #include "fdbserver/DDSharedContext.h" #include "fdbserver/DDTeamCollection.h" #include "fdbserver/DataDistribution.actor.h" @@ -57,11 +56,13 @@ #include "flow/Platform.h" #include "flow/Trace.h" #include "flow/UnitTest.h" +#include "flow/flow.h" #include "flow/genericactors.actor.h" #include "flow/serialize.h" #include "flow/actorcompiler.h" // This must be the last #include. static const std::string ddServerBulkDumpFolder = "ddBulkDumpFiles"; +static const std::string ddServerBulkLoadFolder = "ddBulkLoadFiles"; DataMoveType getDataMoveTypeFromDataMoveId(const UID& dataMoveId) { bool assigned, emptyRange; @@ -376,7 +377,6 @@ ACTOR Future skipAuditOnRange(Reference self, std::shared_ptr audit, KeyRange rangeToSkip); -void runBulkLoadTaskAsync(Reference self, KeyRange range, UID taskId, bool restart); ACTOR Future scheduleBulkLoadTasks(Reference self); struct DataDistributor : NonCopyable, ReferenceCounted { @@ -426,14 +426,16 @@ struct DataDistributor : NonCopyable, ReferenceCounted { Promise configChangeWatching; Future onConfigChange; - ActorCollection bulkLoadActors; - bool bulkLoadEnabled = false; - - bool bulkDumpEnabled = false; KeyRangeActorMap ongoingBulkDumpActors; ParallelismLimitor bulkDumpParallelismLimitor; + ParallelismLimitor bulkLoadParallelismLimitor; + + bool bulkLoadEnabled = false; + bool bulkDumpEnabled = false; + std::string folder; std::string bulkDumpFolder; + std::string bulkLoadFolder; DataDistributor(Reference const> const& db, UID id, @@ -446,12 +448,14 @@ struct DataDistributor : NonCopyable, ReferenceCounted { totalDataInFlightRemoteEventHolder(makeReference("TotalDataInFlightRemote")), teamCollection(nullptr), bulkLoadTaskCollection(nullptr), auditStorageHaLaunchingLock(1), auditStorageReplicaLaunchingLock(1), auditStorageLocationMetadataLaunchingLock(1), - auditStorageSsShardLaunchingLock(1), auditStorageInitStarted(false), bulkLoadActors(false), - bulkLoadEnabled(false), bulkDumpEnabled(false), - bulkDumpParallelismLimitor(SERVER_KNOBS->DD_BULKDUMP_PARALLELISM), folder(folder) { + auditStorageSsShardLaunchingLock(1), auditStorageInitStarted(false), bulkLoadEnabled(false), + bulkDumpEnabled(false), bulkDumpParallelismLimitor(SERVER_KNOBS->DD_BULKDUMP_PARALLELISM, "BulkDump"), + bulkLoadParallelismLimitor(SERVER_KNOBS->DD_BULKLOAD_PARALLELISM, "BulkLoad"), folder(folder) { if (!folder.empty()) { bulkDumpFolder = abspath(joinPath(folder, ddServerBulkDumpFolder)); // TODO(BulkDump): clear this folder in the presence of crash + bulkLoadFolder = abspath(joinPath(folder, ddServerBulkLoadFolder)); + // TODO(BulkLoad): clear this folder in the presence of crash } } @@ -828,7 +832,7 @@ struct DataDistributor : NonCopyable, ReferenceCounted { TraceEvent(SevInfo, "EmptyDataMoveRange", self->ddId).detail("DataMoveMetaData", meta.toString()); continue; } - if (meta.bulkLoadState.present()) { + if (meta.bulkLoadTaskState.present()) { RelocateShard rs(meta.ranges.front(), DataMovementReason::RECOVER_MOVE, RelocateReason::OTHER); rs.dataMoveId = meta.id; rs.cancelled = true; @@ -1060,46 +1064,51 @@ ACTOR Future serveBlobMigratorRequests(Reference self, } // Trigger a task on range based on the current bulk load task metadata -ACTOR Future> triggerBulkLoadTask(Reference self, - KeyRange range, - UID taskId, - bool restart) { +ACTOR Future> triggerBulkLoadTask(Reference self, + KeyRange range, + UID taskId) { loop { Database cx = self->txnProcessor->context(); state Transaction tr(cx); - state BulkLoadState newBulkLoadState; + state BulkLoadTaskState newBulkLoadTaskState; try { // TODO(BulkLoad): make sure the range has been locked tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); wait(checkMoveKeysLock(&tr, self->context->lock, self->context->ddEnabledState.get())); - std::vector phase; - if (!restart) { - wait( - store(newBulkLoadState, - getBulkLoadTask(&tr, range, taskId, { BulkLoadPhase::Submitted, BulkLoadPhase::Triggered }))); - } else { - wait(store(newBulkLoadState, - getBulkLoadTask(&tr, range, taskId, { BulkLoadPhase::Triggered, BulkLoadPhase::Running }))); - } - newBulkLoadState.phase = BulkLoadPhase::Triggered; - newBulkLoadState.clearDataMoveId(); - newBulkLoadState.restartCount = newBulkLoadState.restartCount + 1; - newBulkLoadState.triggerTime = now(); - wait(krmSetRange(&tr, bulkLoadPrefix, newBulkLoadState.getRange(), bulkLoadStateValue(newBulkLoadState))); + std::vector phase; + wait( + store(newBulkLoadTaskState, + getBulkLoadTask( + &tr, + range, + taskId, + { BulkLoadTaskPhase::Submitted, BulkLoadTaskPhase::Triggered, BulkLoadTaskPhase::Running }))); + newBulkLoadTaskState.phase = BulkLoadTaskPhase::Triggered; + newBulkLoadTaskState.clearDataMoveId(); + newBulkLoadTaskState.restartCount = newBulkLoadTaskState.restartCount + 1; + newBulkLoadTaskState.triggerTime = now(); + wait(krmSetRange(&tr, + bulkLoadTaskPrefix, + newBulkLoadTaskState.getRange(), + bulkLoadTaskStateValue(newBulkLoadTaskState))); wait(tr.commit()); Version commitVersion = tr.getCommittedVersion(); TraceEvent(SevInfo, "DDBulkLoadTaskTriggeredPersist", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) .detail("CommitVersion", commitVersion) - .detail("BulkLoadState", newBulkLoadState.toString()); + .detail("BulkLoadTaskState", newBulkLoadTaskState.toString()); ASSERT(commitVersion != invalidVersion); - return std::make_pair(newBulkLoadState, commitVersion); + return std::make_pair(newBulkLoadTaskState, commitVersion); } catch (Error& e) { if (e.code() != error_code_actor_cancelled) { TraceEvent(SevInfo, "DDBulkLoadTaskTriggeredPersistError", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) .errorUnsuppressed(e) - .detail("BulkLoadState", newBulkLoadState.toString()); + .detail("BulkLoadTaskState", newBulkLoadTaskState.toString()); } wait(tr.onError(e)); } @@ -1129,27 +1138,26 @@ ACTOR Future waitUntilBulkLoadTaskCanStart(Reference self // A bulk load task is guaranteed to be either complete or overwritten by another task // When a bulk load task is trigged, the range traffic is turned off atomically // If the task completes, the task re-enables the traffic atomically -ACTOR Future doBulkLoadTask(Reference self, KeyRange range, UID taskId, bool restart) { +ACTOR Future doBulkLoadTask(Reference self, KeyRange range, UID taskId) { state Promise completeAck; - state BulkLoadState triggeredBulkLoadTask; + state BulkLoadTaskState triggeredBulkLoadTask; state Version commitVersion = invalidVersion; TraceEvent(SevInfo, "DDBulkLoadDoBulkLoadBegin", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) .detail("Range", range) - .detail("TaskID", taskId) - .detail("Restart", restart); + .detail("TaskID", taskId); wait(tryStartBulkLoadTaskUntilSucceed(self)); // increase the task counter when succeed try { // Step 1: persist bulk load task phase as triggered - std::pair triggeredBulkLoadTask_ = - wait(triggerBulkLoadTask(self, range, taskId, restart)); + std::pair triggeredBulkLoadTask_ = wait(triggerBulkLoadTask(self, range, taskId)); triggeredBulkLoadTask = triggeredBulkLoadTask_.first; commitVersion = triggeredBulkLoadTask_.second; TraceEvent(SevInfo, "DDBulkLoadDoBulkLoadTaskTriggered", self->ddId) .setMaxEventLength(-1) .setMaxFieldLength(-1) .detail("Task", triggeredBulkLoadTask.toString()) - .detail("CommitVersion", commitVersion) - .detail("Restart", restart); + .detail("CommitVersion", commitVersion); ASSERT(triggeredBulkLoadTask.getRange() == range); // Step 2: submit the task to in-memory task map, which (1) turns off shard boundary change; @@ -1166,41 +1174,40 @@ ACTOR Future doBulkLoadTask(Reference self, KeyRange rang TraceEvent(SevInfo, "DDBulkLoadDoBulkLoadTaskComplete", self->ddId) .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("Task", triggeredBulkLoadTask.toString()) - .detail("Restart", restart); - self->bulkLoadTaskCollection->decrementTaskCounter(); + .detail("Task", triggeredBulkLoadTask.toString()); } catch (Error& e) { if (e.code() == error_code_actor_cancelled) { throw e; } TraceEvent(SevWarn, "DDBulkLoadDoBulkLoadTaskFailed", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) .errorUnsuppressed(e) .detail("Range", range) - .detail("TaskID", taskId) - .detail("Restart", restart); + .detail("TaskID", taskId); if (e.code() == error_code_bulkload_task_outdated) { - self->bulkLoadTaskCollection->decrementTaskCounter(); // sliently exits } else if (e.code() == error_code_movekeys_conflict) { throw e; } else { - // retry by spawning a new one - runBulkLoadTaskAsync(self, range, taskId, true); + // sliently exits } } + self->bulkLoadTaskCollection->decrementTaskCounter(); return Void(); } ACTOR Future eraseBulkLoadTask(Reference self, KeyRange range, UID taskId) { state Database cx = self->txnProcessor->context(); state Transaction tr(cx); - state BulkLoadState bulkLoadTask; + state BulkLoadTaskState bulkLoadTask; loop { try { tr.setOption(FDBTransactionOptions::LOCK_AWARE); tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - wait(store(bulkLoadTask, getBulkLoadTask(&tr, range, taskId, { BulkLoadPhase::Acknowledged }))); - wait(krmSetRangeCoalescing(&tr, bulkLoadPrefix, range, normalKeys, StringRef())); + wait(store(bulkLoadTask, getBulkLoadTask(&tr, range, taskId, { BulkLoadTaskPhase::Acknowledged }))); + wait(checkMoveKeysLock(&tr, self->context->lock, self->context->ddEnabledState.get())); + wait(krmSetRangeCoalescing(&tr, bulkLoadTaskPrefix, range, normalKeys, StringRef())); wait(tr.commit()); self->bulkLoadTaskCollection->eraseTask(bulkLoadTask); break; @@ -1215,59 +1222,42 @@ ACTOR Future eraseBulkLoadTask(Reference self, KeyRange r return Void(); } -void runBulkLoadTaskAsync(Reference self, KeyRange range, UID taskId, bool restart) { - TraceEvent(SevInfo, "DDBulkLoadTaskRunAsync", self->ddId) - .detail("Range", range) - .detail("TaskId", taskId) - .detail("Restart", restart); - self->bulkLoadActors.add(doBulkLoadTask(self, range, taskId, restart)); - return; -} - -void eraseBulkLoadTaskAsync(Reference self, KeyRange range, UID taskId, bool restart) { - TraceEvent(SevInfo, "DDBulkLoadTaskEraseAsync", self->ddId) - .detail("Range", range) - .detail("TaskId", taskId) - .detail("Restart", restart); - self->bulkLoadActors.add(eraseBulkLoadTask(self, range, taskId)); - return; -} - -ACTOR Future scheduleBulkLoadTasks(Reference self) { +ACTOR Future bulkLoadTaskScheduler(Reference self) { state Key beginKey = normalKeys.begin; state Key endKey = normalKeys.end; state KeyRange rangeToRead; state Database cx = self->txnProcessor->context(); state Transaction tr(cx); state int i = 0; - state BulkLoadState bulkLoadState; + state BulkLoadTaskState bulkLoadTaskState; state RangeResult result; + state std::vector> actors; while (beginKey < endKey) { try { rangeToRead = Standalone(KeyRangeRef(beginKey, endKey)); result.clear(); wait(store( result, - krmGetRanges(&tr, bulkLoadPrefix, rangeToRead, SERVER_KNOBS->DD_BULKLOAD_TASK_METADATA_READ_SIZE))); + krmGetRanges(&tr, bulkLoadTaskPrefix, rangeToRead, SERVER_KNOBS->DD_BULKLOAD_TASK_METADATA_READ_SIZE))); i = 0; for (; i < result.size() - 1; i++) { if (!result[i].value.empty()) { KeyRange range = Standalone(KeyRangeRef(result[i].key, result[i + 1].key)); - bulkLoadState = decodeBulkLoadState(result[i].value); - if (range != bulkLoadState.getRange()) { + bulkLoadTaskState = decodeBulkLoadTaskState(result[i].value); + if (range != bulkLoadTaskState.getRange()) { // This task is outdated continue; - } else if (bulkLoadState.phase == BulkLoadPhase::Submitted) { + } else if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Submitted || + bulkLoadTaskState.phase == BulkLoadTaskPhase::Triggered || + bulkLoadTaskState.phase == BulkLoadTaskPhase::Running) { wait(waitUntilBulkLoadTaskCanStart(self)); - runBulkLoadTaskAsync( - self, bulkLoadState.getRange(), bulkLoadState.getTaskId(), /*restart=*/false); - } else if (bulkLoadState.phase == BulkLoadPhase::Acknowledged) { - eraseBulkLoadTaskAsync( - self, bulkLoadState.getRange(), bulkLoadState.getTaskId(), /*restart=*/false); + actors.push_back( + doBulkLoadTask(self, bulkLoadTaskState.getRange(), bulkLoadTaskState.getTaskId())); + } else if (bulkLoadTaskState.phase == BulkLoadTaskPhase::Acknowledged) { + actors.push_back( + eraseBulkLoadTask(self, bulkLoadTaskState.getRange(), bulkLoadTaskState.getTaskId())); } else { - ASSERT(bulkLoadState.phase == BulkLoadPhase::Triggered || - bulkLoadState.phase == BulkLoadPhase::Running || - bulkLoadState.phase == BulkLoadPhase::Complete); + ASSERT(bulkLoadTaskState.phase == BulkLoadTaskPhase::Complete); } } } @@ -1279,80 +1269,381 @@ ACTOR Future scheduleBulkLoadTasks(Reference self) { wait(tr.onError(e)); } } + wait(waitForAllReadyThenThrow(actors)); return Void(); } -ACTOR Future bulkLoadTaskScheduler(Reference self) { +ACTOR Future bulkLoadTaskEngineCore(Reference self, Future readyToStart) { + wait(readyToStart); + state Database cx = self->txnProcessor->context(); + wait(registerRangeLockOwner(cx, rangeLockNameForBulkLoad, rangeLockNameForBulkLoad)); loop { - wait(scheduleBulkLoadTasks(self) && delay(SERVER_KNOBS->DD_BULKLOAD_SCHEDULE_MIN_INTERVAL_SEC)); + try { + wait(bulkLoadTaskScheduler(self)); + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + TraceEvent(SevInfo, "DDBulkLoadCoreError", self->ddId).errorUnsuppressed(e); + if (e.code() == error_code_movekeys_conflict) { + throw e; + } + } + wait(delay(SERVER_KNOBS->DD_BULKLOAD_SCHEDULE_MIN_INTERVAL_SEC)); } } -ACTOR Future resumeBulkLoadTasks(Reference self) { - state Key beginKey = normalKeys.begin; - state Key endKey = normalKeys.end; +ACTOR Future bulkLoadJobRunOnTask(Reference self, + BulkLoadJobState bulkLoadJobTask, + bool existBulkLoadTask) { + ASSERT(bulkLoadJobTask.isValidTask()); + state Database cx = self->txnProcessor->context(); + state Transaction tr(cx); + state BulkLoadTaskState bulkLoadTask; + + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTask", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("ExistBulkLoadTask", existBulkLoadTask) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + + try { + // Step 1: Trigger bulkload + if (!existBulkLoadTask) { + bulkLoadTask = BulkLoadTaskState(bulkLoadJobTask.getRange(), + BulkLoadFileType::SST, + bulkLoadJobTask.getTransportMethod() == + BulkLoadTransportMethod::CP // TODO(BulkDump): support S3 + ? BulkLoadTransportMethod::CP + : BulkLoadTransportMethod::Invalid, + bulkLoadJobTask.getRemoteRoot(), + { bulkLoadJobTask.getDataFilePath() }, + bulkLoadJobTask.getBytesSampleFilePath(), + bulkLoadJobTask.getJobId()); + loop { + try { + // At any time, there must be at most one bulkdump restore task + wait(checkMoveKeysLock(&tr, self->context->lock, self->context->ddEnabledState.get())); + bool doUpdate = wait(updateBulkLoadJobMetadata(&tr, bulkLoadJobTask)); + wait(setBulkLoadSubmissionTransaction(&tr, + bulkLoadTask)); // Internally shut down traffic to the range + wait(tr.commit()); + break; + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } else if (e.code() == error_code_bulkload_task_outdated) { + TraceEvent(SevWarn, "DDBulkLoadJobRunOnTaskOutdated", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Location", "Trigger bulkload") + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + self->bulkLoadParallelismLimitor.decrementTaskCounter(); + return Void(); // sliently exit + } + wait(tr.onError(e)); + } + } + tr.fullReset(); + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTaskTriggered", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + } else { + std::vector existTasks = + wait(getValidBulkLoadTasksWithinRange(cx, bulkLoadJobTask.getRange())); + ASSERT(existTasks.empty() || existTasks.size() == 1); + if (existTasks.empty()) { + TraceEvent(SevWarn, "DDBulkLoadJobRunOnTaskOutdated", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Location", "Read bulkload") + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + self->bulkLoadParallelismLimitor.decrementTaskCounter(); + return Void(); // sliently exit + } + bulkLoadTask = existTasks[0]; + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTaskHasTriggered", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + } + + // Step 2: Monitor the bulkload completion + ASSERT(bulkLoadTask.isValid()); + loop { + try { + wait(store(bulkLoadTask, + getBulkLoadTask(&tr, + bulkLoadTask.getRange(), + bulkLoadTask.getTaskId(), + { BulkLoadTaskPhase::Submitted, + BulkLoadTaskPhase::Triggered, + BulkLoadTaskPhase::Running, + BulkLoadTaskPhase::Complete, + BulkLoadTaskPhase::Acknowledged }))); + if (bulkLoadTask.phase == BulkLoadTaskPhase::Complete || + bulkLoadTask.phase == BulkLoadTaskPhase::Acknowledged) { + break; + } + TraceEvent(SevDebug, "DDBulkLoadJobRunOnTaskLoadRunning", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } else if (e.code() == error_code_bulkload_task_outdated) { + // Unexpected case + TraceEvent(SevError, "DDBulkLoadJobRunOnTaskOutdated", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Location", "Monitor bulkload") + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + self->bulkLoadParallelismLimitor.decrementTaskCounter(); + return Void(); // sliently exit + } + wait(tr.onError(e)); + } + wait(delay(10.0)); + } + tr.fullReset(); + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTaskLoadComplete", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + + // Step 3: Acknowledge completed bulkload task + loop { + try { + wait(checkMoveKeysLock(&tr, self->context->lock, self->context->ddEnabledState.get())); + wait(setBulkLoadAcknowledgeTransaction(&tr, bulkLoadTask.getRange(), bulkLoadTask.getTaskId())); + bulkLoadJobTask.markComplete(); + bool doUpdate = wait(updateBulkLoadJobMetadata(&tr, bulkLoadJobTask)); + wait(tr.commit()); + break; + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } else if (e.code() == error_code_bulkload_task_outdated) { + // Unexpected case + TraceEvent(SevError, "DDBulkLoadJobRunOnTaskOutdated", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Location", "Acknowledge bulkload") + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + self->bulkLoadParallelismLimitor.decrementTaskCounter(); + return Void(); // sliently exit + } + wait(tr.onError(e)); + } + } + tr.fullReset(); + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTaskComplete", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTaskError", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .errorUnsuppressed(e) + .detail("Task", bulkLoadTask.toString()) + .detail("BulkLoadJobTask", bulkLoadJobTask.toString()); + // sliently exit + } + self->bulkLoadParallelismLimitor.decrementTaskCounter(); + return Void(); +} + +ACTOR Future bulkLoadJobCompleteEmptyTask(Reference self, BulkLoadJobState bulkLoadJobTask) { + ASSERT(bulkLoadJobTask.getPhase() == BulkLoadJobPhase::Complete); + state Database cx = self->txnProcessor->context(); + state Transaction tr(cx); + loop { + try { + wait(checkMoveKeysLock(&tr, self->context->lock, self->context->ddEnabledState.get())); + bool doUpdate = wait(updateBulkLoadJobMetadata(&tr, bulkLoadJobTask)); + if (doUpdate) { + wait(tr.commit()); + TraceEvent(SevInfo, "DDBulkLoadJobRunOnTaskComplete", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadJobTask.toString()); + } + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return Void(); +} + +ACTOR Future bulkLoadJobExecutor(Reference self, BulkLoadJobState job) { + state std::vector> bulkLoadJobActors; + state Database cx = self->txnProcessor->context(); + state Transaction tr(cx); + + state UID jobId = job.getJobId(); + state std::string remoteRoot = job.getRemoteRoot(); + std::string remoteFolder = generateBulkLoadJobRoot(remoteRoot, jobId); + std::string jobManifestFileName = generateBulkLoadJobManifestFileName(jobId); + state std::string localFolder = generateBulkLoadJobRoot(self->bulkLoadFolder, jobId); + state std::string localJobManifestFilePath = joinPath(localFolder, jobManifestFileName); + state std::string remoteJobManifestFilePath = joinPath(remoteFolder, jobManifestFileName); + state std::vector manifests; + + state BulkLoadTransportMethod transportMethod = job.getTransportMethod(); + state KeyRange range = job.getRange(); + state Key beginKey = range.begin; + state Key endKey = range.end; state KeyRange rangeToRead; + state RangeResult bulkLoadJobResult; + state int index = 0; + state BulkLoadJobState existTask; + state std::string manifestLocalTempFolder = joinPath(localFolder, "manifest-batch-tmp"); while (beginKey < endKey) { - Database cx = self->txnProcessor->context(); - state Transaction tr(cx); try { + bulkLoadJobResult.clear(); + index = 0; rangeToRead = Standalone(KeyRangeRef(beginKey, endKey)); - RangeResult result = - wait(krmGetRanges(&tr, bulkLoadPrefix, rangeToRead, SERVER_KNOBS->DD_BULKLOAD_TASK_METADATA_READ_SIZE)); - for (int i = 0; i < result.size() - 1; i++) { - if (!result[i].value.empty()) { - KeyRange range = Standalone(KeyRangeRef(result[i].key, result[i + 1].key)); - BulkLoadState bulkLoadState = decodeBulkLoadState(result[i].value); - if (range != bulkLoadState.getRange()) { - TraceEvent(SevWarn, "DDBulkLoadRestartTriggeredTaskFailed", self->ddId) - .detail("Reason", "Task boundary changed") - .detail("BulkLoadTask", bulkLoadState.toString()) - .detail("RangeInSpace", range); - } else if (bulkLoadState.phase == BulkLoadPhase::Triggered) { - runBulkLoadTaskAsync( - self, bulkLoadState.getRange(), bulkLoadState.getTaskId(), /*restart=*/true); - } else if (bulkLoadState.phase == BulkLoadPhase::Running) { - runBulkLoadTaskAsync( - self, bulkLoadState.getRange(), bulkLoadState.getTaskId(), /*restart=*/true); - } else if (bulkLoadState.phase == BulkLoadPhase::Acknowledged) { - eraseBulkLoadTaskAsync( - self, bulkLoadState.getRange(), bulkLoadState.getTaskId(), /*restart=*/true); - } else { - TraceEvent(SevDebug, "DDBulkLoadRestartRangeNoTask", self->ddId).detail("RangeInSpace", range); + wait(store( + bulkLoadJobResult, + krmGetRanges(&tr, bulkLoadJobPrefix, rangeToRead, SERVER_KNOBS->DD_BULKDUMP_TASK_METADATA_READ_SIZE))); + for (; index < bulkLoadJobResult.size() - 1; index++) { + if (bulkLoadJobResult[index].value.empty()) { + ASSERT(false); + } + existTask = decodeBulkLoadJobState(bulkLoadJobResult[index].value); + if (existTask.getPhase() == BulkLoadJobPhase::Complete) { + TraceEvent(SevDebug, "BulkLoadJobExecutorTaskComplete", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", existTask.toString()); + continue; + } + TraceEvent(SevDebug, "BulkLoadJobExecutorTaskRun", self->ddId).detail("Task", existTask.toString()); + + ASSERT(existTask.getPhase() == BulkLoadJobPhase::Submitted || + existTask.getPhase() == BulkLoadJobPhase::Triggered); + + if (existTask.getPhase() == BulkLoadJobPhase::Submitted) { + // Get manifest metadata for necessary range + // TODO(BulkLoad): check if the local path has the file and the file is complete + if (!directoryExists(abspath(localFolder))) { + ASSERT(platform::createDirectory(abspath(localFolder))); + } + if (!fileExists(abspath(localJobManifestFilePath))) { + // TODO(BulkLoad): check if the file complete + TraceEvent(SevDebug, "BulkLoadJobExecutorManifestDownload", self->ddId) + .detail("LocalJobManifestFilePath", localJobManifestFilePath) + .detail("RemoteJobManifestFilePath", remoteJobManifestFilePath); + wait(downloadBulkLoadJobManifestFile( + transportMethod, localJobManifestFilePath, remoteJobManifestFilePath, self->ddId)); + TraceEvent(SevDebug, "BulkLoadJobExecutorManifestDownloaded", self->ddId); } + manifests.clear(); + wait(store(manifests, + getBulkLoadManifestMetadataFromFiles(localJobManifestFilePath, + existTask.getRange(), + manifestLocalTempFolder, + transportMethod, + self->ddId))); + // TODO(BulkLoad): avoid repeatedly call getBulkLoadManifestMetadataFromFiles which is heavy + TraceEvent(SevDebug, "BulkLoadJobExecutorManifestGot", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("ManifestCount", manifests.size()); + + // Dispatch job + state size_t i = 0; + for (; i < manifests.size(); i++) { + if (manifests[i].isEmptyRange()) { + BulkLoadJobState bulkLoadJobTask = job.getEmptyTaskToComplete(manifests[i]); + TraceEvent(SevDebug, "BulkLoadJobExecutorEmptyRangeTask", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Manifest", manifests[i].toString()) + .detail("Task", bulkLoadJobTask.toString()); + bulkLoadJobActors.push_back(bulkLoadJobCompleteEmptyTask(self, bulkLoadJobTask)); + wait(delay(1.0)); + } else { + // Limit parallelism + loop { + if (self->bulkLoadParallelismLimitor.tryIncrementTaskCounter()) { + break; + } + wait(self->bulkLoadParallelismLimitor.waitUntilCounterChanged()); + } + BulkLoadJobState bulkLoadJobTask = job.getTaskToTrigger(manifests[i]); + TraceEvent(SevDebug, "BulkLoadJobExecutorNewTask", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", bulkLoadJobTask.toString()); + bulkLoadJobActors.push_back(bulkLoadJobRunOnTask(self, bulkLoadJobTask, false)); + } + } + } else { + // Limit parallelism + loop { + if (self->bulkLoadParallelismLimitor.tryIncrementTaskCounter()) { + break; + } + wait(self->bulkLoadParallelismLimitor.waitUntilCounterChanged()); + } + TraceEvent(SevDebug, "BulkLoadJobExecutorMonitorTask", self->ddId) + .setMaxEventLength(-1) + .setMaxFieldLength(-1) + .detail("Task", existTask.toString()); + bulkLoadJobActors.push_back(bulkLoadJobRunOnTask(self, existTask, true)); } } - beginKey = result.back().key; + + beginKey = bulkLoadJobResult.back().key; + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + TraceEvent(SevWarn, "BulkLoadJobExecutorError", self->ddId).errorUnsuppressed(e); + // TODO(BulkLoad): error handling wait(tr.onError(e)); } } - TraceEvent(SevInfo, "DDBulkLoadRestartTriggeredTasksComplete", self->ddId); + wait(waitForAllReadyThenThrow(bulkLoadJobActors)); return Void(); } -ACTOR Future bulkLoadingCore(Reference self, Future readyToStart) { +ACTOR Future bulkLoadCore(Reference self, Future readyToStart) { wait(readyToStart); state Database cx = self->txnProcessor->context(); - wait(registerRangeLockOwner(cx, rangeLockNameForBulkLoad, rangeLockNameForBulkLoad)); - wait(resumeBulkLoadTasks(self)); - TraceEvent(SevInfo, "DDBulkLoadCoreResumed", self->ddId); - loop { try { - self->bulkLoadActors.add(bulkLoadTaskScheduler(self)); - wait(self->bulkLoadActors.getResult()); + Optional job = wait(getOngoingBulkLoadJob(cx)); + if (job.present()) { + TraceEvent(SevInfo, "BulkLoadCoreFoundJob", self->ddId).detail("Job", job.get().toString()); + wait(bulkLoadJobExecutor(self, job.get())); + // TODO(BulkLoad): finalize restore job and clear range + } } catch (Error& e) { if (e.code() == error_code_actor_cancelled) { throw e; } - TraceEvent(SevInfo, "DDBulkLoadCoreError", self->ddId).errorUnsuppressed(e); - if (e.code() == error_code_movekeys_conflict) { - throw e; - } + TraceEvent(SevWarn, "BulkLoadingCoreError", self->ddId).errorUnsuppressed(e); } - self->bulkLoadActors.clear(false); - wait(delay(SERVER_KNOBS->DD_BULKLOAD_SCHEDULE_MIN_INTERVAL_SEC)); + wait(delay(SERVER_KNOBS->DD_BULKDUMP_SCHEDULE_MIN_INTERVAL_SEC)); } } @@ -1394,7 +1685,7 @@ ACTOR Future doBulkDumpTask(Reference self, // limitation controlled by bulkDumpParallelismLimitor. // DD_BULKDUMP_PARALLELISM defines the maximum number of concurrent bulkdump // tasks spawned by DD. -ACTOR Future scheduleBulkDumpTasks(Reference self) { +ACTOR Future bulkDumpTaskScheduler(Reference self) { state Database cx = self->txnProcessor->context(); state Key beginKey = normalKeys.begin; @@ -1474,34 +1765,12 @@ ACTOR Future scheduleBulkDumpTasks(Reference self) { return allComplete && hasJob; } -void bulkDumpUploadJobManifestFile(Reference self, - BulkDumpTransportMethod transportMethod, - const std::map& manifests, - const std::string& remoteRoot, - const UID& jobId) { - if (self->folder.empty()) { - return; - } - // Upload job manifest file - std::string content = generateJobManifestFileContent(manifests); - ASSERT(!content.empty() && !self->bulkDumpFolder.empty()); - std::string localFolder = getBulkDumpJobRoot(self->bulkDumpFolder, jobId); - std::string remoteFolder = getBulkDumpJobRoot(remoteRoot, jobId); - std::string jobManifestFileName = getJobManifestFileName(jobId); - std::string localJobManifestFilePath = joinPath(localFolder, jobManifestFileName); - std::string remoteJobManifestFilePath = joinPath(remoteFolder, jobManifestFileName); - generateBulkDumpJobManifestFile(localFolder, localJobManifestFilePath, content, self->ddId); - uploadBulkDumpJobManifestFile(transportMethod, localJobManifestFilePath, remoteJobManifestFilePath, self->ddId); - clearFileFolder(localFolder); - return; -} - ACTOR Future finalizeBulkDumpJob(Reference self) { // Collect necessary info to generate job manifest file by scan the entire bulkDump key space - state std::map manifests; + state std::map manifests; state Optional jobId; state Optional remoteRoot; - state Optional transportMethod; + state Optional transportMethod; TraceEvent(SevInfo, "DDBulkDumpJobFinalizeStart", self->ddId); state Database cx = self->txnProcessor->context(); @@ -1574,16 +1843,27 @@ ACTOR Future finalizeBulkDumpJob(Reference self) { // Finally all bulkdump metadata. // Any failure during this process will retry by DD. try { - ASSERT(jobId.present() && remoteRoot.present() && transportMethod.present()); + ASSERT(jobId.present() && remoteRoot.present() && transportMethod.present() && !self->folder.empty() && + !self->bulkDumpFolder.empty()); // Generate the file at a local folder at first and then upload the file to the remote // The local file path: // bulkDumpFolder>//-job-manifest.txt // The remote file path: // rootRemote>//-job-manifest.txt - bulkDumpUploadJobManifestFile(self, transportMethod.get(), manifests, remoteRoot.get(), jobId.get()); + state std::string localFolder = generateBulkLoadJobRoot(self->bulkDumpFolder, jobId.get()); + std::string remoteFolder = generateBulkLoadJobRoot(remoteRoot.get(), jobId.get()); + std::string jobManifestFileName = generateBulkLoadJobManifestFileName(jobId.get()); + std::string localJobManifestFilePath = joinPath(localFolder, jobManifestFileName); + std::string remoteJobManifestFilePath = joinPath(remoteFolder, jobManifestFileName); + platform::eraseDirectoryRecursive(abspath(localFolder)); + platform::createDirectory(abspath(localFolder)); + wait(uploadBulkLoadJobManifestFile( + transportMethod.get(), localJobManifestFilePath, remoteJobManifestFilePath, manifests, self->ddId)); + platform::eraseDirectoryRecursive(abspath(localFolder)); TraceEvent(SevInfo, "DDBulkDumpJobFinalizeUploadManifest", self->ddId); // clear all bulkdump metadata + // TODO(BulkDump): resolve two DD conflict using moveKeyLock wait(clearBulkDumpJob(cx)); TraceEvent(SevInfo, "DDBulkDumpJobFinalizeMetadataClear", self->ddId); } catch (Error& e) { @@ -1596,37 +1876,18 @@ ACTOR Future finalizeBulkDumpJob(Reference self) { return Void(); } -// The actor monitors whether the all tasks completed by the scheduleBulkDumpTasks. -// If not, it issue a new scheduleBulkDumpTasks to do the remaining tasks. -ACTOR Future bulkDumpTaskScheduler(Reference self) { +ACTOR Future bulkDumpCore(Reference self, Future readyToStart) { + wait(readyToStart); state Database cx = self->txnProcessor->context(); - state bool allComplete = false; loop { + state bool allComplete = false; try { - wait(store(allComplete, scheduleBulkDumpTasks(self)) && + wait(store(allComplete, bulkDumpTaskScheduler(self)) && delay(SERVER_KNOBS->DD_BULKDUMP_SCHEDULE_MIN_INTERVAL_SEC)); if (allComplete) { wait(finalizeBulkDumpJob(self)); TraceEvent(SevInfo, "DDBulkDumpTaskSchedulerComplete", self->ddId); - break; - } - } catch (Error& e) { - if (e.code() == error_code_actor_cancelled) { - throw e; } - TraceEvent(SevInfo, "DDBulkDumpTaskSchedulerError", self->ddId).errorUnsuppressed(e); - } - wait(delay(5.0)); - } - return Void(); -} - -ACTOR Future bulkDumpingCore(Reference self, Future readyToStart) { - wait(readyToStart); - state Database cx = self->txnProcessor->context(); - loop { - try { - wait(bulkDumpTaskScheduler(self)); } catch (Error& e) { if (e.code() == error_code_actor_cancelled) { throw e; @@ -1876,9 +2137,12 @@ ACTOR Future dataDistribution(Reference self, self->bulkLoadEnabled = true; if (self->configuration.usableRegions > 1) { actors.push_back( - bulkLoadingCore(self, self->initialized.getFuture() && remoteRecovered(self->dbInfo))); + bulkLoadTaskEngineCore(self, self->initialized.getFuture() && remoteRecovered(self->dbInfo))); + actors.push_back( + bulkLoadCore(self, self->initialized.getFuture() && remoteRecovered(self->dbInfo))); } else { - actors.push_back(bulkLoadingCore(self, self->initialized.getFuture())); + actors.push_back(bulkLoadTaskEngineCore(self, self->initialized.getFuture())); + actors.push_back(bulkLoadCore(self, self->initialized.getFuture())); } } @@ -1888,9 +2152,9 @@ ACTOR Future dataDistribution(Reference self, self->bulkDumpEnabled = true; if (self->configuration.usableRegions > 1) { actors.push_back( - bulkDumpingCore(self, self->initialized.getFuture() && remoteRecovered(self->dbInfo))); + bulkDumpCore(self, self->initialized.getFuture() && remoteRecovered(self->dbInfo))); } else { - actors.push_back(bulkDumpingCore(self, self->initialized.getFuture())); + actors.push_back(bulkDumpCore(self, self->initialized.getFuture())); } } diff --git a/fdbserver/MoveKeys.actor.cpp b/fdbserver/MoveKeys.actor.cpp index 20419898d99..b01e93eb043 100644 --- a/fdbserver/MoveKeys.actor.cpp +++ b/fdbserver/MoveKeys.actor.cpp @@ -30,7 +30,7 @@ #include "fdbclient/KeyBackedTypes.actor.h" #include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/SystemData.h" -#include "fdbserver/BulkLoadUtil.actor.h" +#include "fdbserver/BulkLoadAndDumpUtil.actor.h" #include "fdbserver/MoveKeys.actor.h" #include "fdbserver/Knobs.h" #include "fdbclient/ReadYourWrites.h" @@ -1622,7 +1622,7 @@ ACTOR static Future startMoveShards(Database occ, std::map* tssMapping, const DDEnabledState* ddEnabledState, CancelConflictingDataMoves cancelConflictingDataMoves, - Optional bulkLoadState) { + Optional bulkLoadTaskState) { state Future warningLogger = logWarningAfter("StartMoveShardsTooLong", 600, servers); wait(startMoveKeysLock->take(TaskPriority::DataDistributionLaunch)); @@ -1634,7 +1634,7 @@ ACTOR static Future startMoveShards(Database occ, TraceEvent(SevInfo, "StartMoveShardsBegin", relocationIntervalId) .detail("DataMoveID", dataMoveId) .detail("TargetRange", describe(ranges)) - .detail("BulkLoadState", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTaskState", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); // TODO: make startMoveShards work with multiple ranges. ASSERT(ranges.size() == 1); @@ -1752,13 +1752,13 @@ ACTOR static Future startMoveShards(Database occ, .detail("DestID", destId) .detail("ReadVersion", tr.getReadVersion().get()); - if (bulkLoadState.present()) { + if (bulkLoadTaskState.present()) { state std::vector owners(src.size() + dest.size()); std::merge(src.begin(), src.end(), dest.begin(), dest.end(), owners.begin()); for (const auto& ssid : servers) { if (std::find(owners.begin(), owners.end(), ssid) != owners.end()) { TraceEvent(SevWarn, "DDBulkLoadTaskStartMoveShardsMoveInConflict") - .detail("BulkLoadState", bulkLoadState.get().toString()) + .detail("BulkLoadTaskState", bulkLoadTaskState.get().toString()) .detail("DestServerId", ssid) .detail("OwnerIds", describe(owners)) .detail("DataMove", dataMove.toString()); @@ -1830,7 +1830,7 @@ ACTOR static Future startMoveShards(Database occ, dataMove.src.insert(src.begin(), src.end()); // If this is a bulk load data move, need not create checkpoint on the source servers - if (shouldCreateCheckpoint(dataMoveId) && !bulkLoadState.present()) { + if (shouldCreateCheckpoint(dataMoveId) && !bulkLoadTaskState.present()) { const UID checkpointId = UID(deterministicRandom()->randomUInt64(), srcId.first()); CheckpointMetaData checkpoint(std::vector{ rangeIntersectKeys }, DataMoveRocksCF, @@ -1865,17 +1865,17 @@ ACTOR static Future startMoveShards(Database occ, } if (currentKeys.end == keys.end) { - if (bulkLoadState.present()) { - state BulkLoadState newBulkLoadState; + if (bulkLoadTaskState.present()) { + state BulkLoadTaskState newBulkLoadTaskState; try { - wait(store(newBulkLoadState, + wait(store(newBulkLoadTaskState, getBulkLoadTask(&tr, - bulkLoadState.get().getRange(), - bulkLoadState.get().getTaskId(), - { BulkLoadPhase::Triggered, BulkLoadPhase::Running }))); + bulkLoadTaskState.get().getRange(), + bulkLoadTaskState.get().getTaskId(), + { BulkLoadTaskPhase::Triggered, BulkLoadTaskPhase::Running }))); // It is possible that the previous data move is cancelled but has updated the // task phase as running. In this case, we update the phase from Running to Running - newBulkLoadState.phase = BulkLoadPhase::Running; + newBulkLoadTaskState.phase = BulkLoadTaskPhase::Running; } catch (Error& e) { if (e.code() == error_code_bulkload_task_outdated) { cancelDataMove = true; @@ -1883,19 +1883,22 @@ ACTOR static Future startMoveShards(Database occ, } throw e; } - newBulkLoadState.setDataMoveId(dataMoveId); - newBulkLoadState.startTime = now(); - wait(krmSetRange( - &tr, bulkLoadPrefix, newBulkLoadState.getRange(), bulkLoadStateValue(newBulkLoadState))); + newBulkLoadTaskState.setDataMoveId(dataMoveId); + newBulkLoadTaskState.startTime = now(); + wait(krmSetRange(&tr, + bulkLoadTaskPrefix, + newBulkLoadTaskState.getRange(), + bulkLoadTaskStateValue(newBulkLoadTaskState))); TraceEvent(SevInfo, "DDBulkLoadTaskRunningPersist", relocationIntervalId) - .detail("BulkLoadState", newBulkLoadState.toString()); - dataMove.bulkLoadState = newBulkLoadState; + .detail("BulkLoadTaskState", newBulkLoadTaskState.toString()); + dataMove.bulkLoadTaskState = newBulkLoadTaskState; } dataMove.setPhase(DataMoveMetaData::Running); TraceEvent(sevDm, "StartMoveShardsDataMoveComplete", relocationIntervalId) .detail("DataMoveID", dataMoveId) .detail("DataMove", dataMove.toString()) - .detail("BulkLoadState", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTaskState", + bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); } else { dataMove.setPhase(DataMoveMetaData::Prepare); TraceEvent(sevDm, "StartMoveShardsDataMovePartial", relocationIntervalId) @@ -1918,7 +1921,7 @@ ACTOR static Future startMoveShards(Database occ, .detail("DeltaRange", currentKeys.toString()) .detail("Range", describe(dataMove.ranges)) .detail("DataMove", dataMove.toString()) - .detail("BulkLoadState", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTaskState", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); // Post validate consistency of update of keyServers and serverKeys if (SERVER_KNOBS->AUDIT_DATAMOVE_POST_CHECK) { @@ -2038,7 +2041,7 @@ ACTOR static Future finishMoveShards(Database occ, UID relocationIntervalId, std::map tssMapping, const DDEnabledState* ddEnabledState, - Optional bulkLoadState) { + Optional bulkLoadTaskState) { // TODO: make startMoveShards work with multiple ranges. ASSERT(targetRanges.size() == 1); state KeyRange keys = targetRanges[0]; @@ -2304,15 +2307,16 @@ ACTOR static Future finishMoveShards(Database occ, wait(waitForAll(actors)); if (range.end == dataMove.ranges.front().end) { - if (bulkLoadState.present()) { - state BulkLoadState newBulkLoadState; + if (bulkLoadTaskState.present()) { + state BulkLoadTaskState newBulkLoadTaskState; try { - wait(store(newBulkLoadState, - getBulkLoadTask(&tr, - bulkLoadState.get().getRange(), - bulkLoadState.get().getTaskId(), - { BulkLoadPhase::Running, BulkLoadPhase::Complete }))); - newBulkLoadState.phase = BulkLoadPhase::Complete; + wait(store( + newBulkLoadTaskState, + getBulkLoadTask(&tr, + bulkLoadTaskState.get().getRange(), + bulkLoadTaskState.get().getTaskId(), + { BulkLoadTaskPhase::Running, BulkLoadTaskPhase::Complete }))); + newBulkLoadTaskState.phase = BulkLoadTaskPhase::Complete; } catch (Error& e) { if (e.code() == error_code_bulkload_task_outdated) { cancelDataMove = true; @@ -2320,22 +2324,22 @@ ACTOR static Future finishMoveShards(Database occ, } throw e; } - ASSERT(newBulkLoadState.getDataMoveId().present() && - newBulkLoadState.getDataMoveId().get() == dataMoveId); - newBulkLoadState.completeTime = now(); + ASSERT(newBulkLoadTaskState.getDataMoveId().present() && + newBulkLoadTaskState.getDataMoveId().get() == dataMoveId); + newBulkLoadTaskState.completeTime = now(); wait(krmSetRange(&tr, - bulkLoadPrefix, - newBulkLoadState.getRange(), - bulkLoadStateValue(newBulkLoadState))); + bulkLoadTaskPrefix, + newBulkLoadTaskState.getRange(), + bulkLoadTaskStateValue(newBulkLoadTaskState))); TraceEvent(SevInfo, "DDBulkLoadTaskCompletePersist", relocationIntervalId) - .detail("BulkLoadState", newBulkLoadState.toString()); - dataMove.bulkLoadState = newBulkLoadState; + .detail("BulkLoadTaskState", newBulkLoadTaskState.toString()); + dataMove.bulkLoadTaskState = newBulkLoadTaskState; } wait(deleteCheckpoints(&tr, dataMove.checkpoints, dataMoveId)); tr.clear(dataMoveKeyFor(dataMoveId)); TraceEvent(sevDm, "FinishMoveShardsDeleteMetaData", relocationIntervalId) .detail("DataMove", dataMove.toString()); - } else if (!bulkLoadState.present()) { + } else if (!bulkLoadTaskState.present()) { // Bulk Loading data move does not allow partial complete TraceEvent(SevInfo, "FinishMoveShardsPartialComplete", relocationIntervalId) .detail("DataMoveID", dataMoveId) @@ -2394,7 +2398,7 @@ ACTOR static Future finishMoveShards(Database occ, TraceEvent(SevInfo, "FinishMoveShardsEnd", relocationIntervalId) .detail("DataMoveID", dataMoveId) - .detail("BulkLoadState", bulkLoadState.present() ? bulkLoadState.get().toString() : "") + .detail("BulkLoadTaskState", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : "") .detail("DataMove", dataMove.toString()); return Void(); } @@ -3224,7 +3228,7 @@ Future rawStartMovement(Database occ, &tssMapping, params.ddEnabledState, params.cancelConflictingDataMoves, - params.bulkLoadState); + params.bulkLoadTaskState); } ASSERT(params.keys.present()); return startMoveKeys(std::move(occ), @@ -3275,7 +3279,7 @@ Future rawFinishMovement(Database occ, params.relocationIntervalId, tssMapping, params.ddEnabledState, - params.bulkLoadState); + params.bulkLoadTaskState); } ASSERT(params.keys.present()); return finishMoveKeys(std::move(occ), diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index ad291bb45ae..477e8ad3cf7 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -2932,12 +2932,12 @@ ACTOR void simulationSetupAndRun(std::string dataFolder, state bool allowCreatingTenants = testConfig.allowCreatingTenants; if (!SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA && - // NOTE: PhysicalShardMove and BulkLoading are required to have SHARDED_ROCKSDB storage engine working. - // Inside the TOML file, the SHARD_ENCODE_LOCATION_METADATA is overridden, however, the - // override will not take effect until the test starts. Here, we do an additional check - // for this special simulation test. + // NOTE: PhysicalShardMove and BulkLoadTask and BulkDumpAndLoad are required to have SHARDED_ROCKSDB storage + // engine working. Inside the TOML file, the SHARD_ENCODE_LOCATION_METADATA is overridden, however, the override + // will not take effect until the test starts. Here, we do an additional check for this special simulation test. (std::string_view(testFile).find("PhysicalShardMove") == std::string_view::npos && - std::string_view(testFile).find("BulkLoading") == std::string_view::npos)) { + std::string_view(testFile).find("BulkLoadTask") == std::string_view::npos && + std::string_view(testFile).find("BulkDumpAndLoad") == std::string_view::npos)) { testConfig.storageEngineExcludeTypes.insert(SimulationStorageEngine::SHARDED_ROCKSDB); } diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index 4d88b223977..5953010a2e4 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -2227,8 +2227,8 @@ int main(int argc, char* argv[]) { auto dataFolder = opts.dataFolder.size() ? opts.dataFolder : "simfdb"; std::vector directories = platform::listDirectories(dataFolder); const std::set allowedDirectories = { - ".", "..", "backups", "unittests", "fdbblob", "bulkdump" - }; + ".", "..", "backups", "unittests", "fdbblob", "bulkDumpAndLoad" + }; // bulkDumpAndLoad is used for BulkDumpAndLoad simulation test for (const auto& dir : directories) { if (dir.size() != 32 && !allowedDirectories.contains(dir) && dir.find("snap") == std::string::npos) { diff --git a/fdbserver/include/fdbserver/BulkDumpUtil.actor.h b/fdbserver/include/fdbserver/BulkDumpUtil.actor.h deleted file mode 100644 index 1bf1bc4d27b..00000000000 --- a/fdbserver/include/fdbserver/BulkDumpUtil.actor.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * BulkDumpUtil.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_BULKDUMPUTIL_ACTOR_G_H) -#define FDBSERVER_BULKDUMPUTIL_ACTOR_G_H -#include "fdbserver/BulkDumpUtil.actor.g.h" -#elif !defined(FDBSERVER_BULKDUMPUTIL_ACTOR_H) -#define FDBSERVER_BULKDUMPUTIL_ACTOR_H -#pragma once - -#include "fdbclient/BulkDumping.h" -#include "fdbclient/StorageServerInterface.h" -#include "flow/actorcompiler.h" // has to be last include - -struct SSBulkDumpTask { - SSBulkDumpTask(const StorageServerInterface& targetServer, - const std::vector& checksumServers, - const BulkDumpState& bulkDumpState) - : targetServer(targetServer), checksumServers(checksumServers), bulkDumpState(bulkDumpState){}; - - std::string toString() const { - return "[BulkDumpState]: " + bulkDumpState.toString() + ", [TargetServer]: " + targetServer.toString() + - ", [ChecksumServers]: " + describe(checksumServers); - } - - StorageServerInterface targetServer; - std::vector checksumServers; - BulkDumpState bulkDumpState; -}; - -// Used by DD to generate a SSBulkDumpTask and send to SS -// SS dumps the data based on the configuration of the SSBulkDumpTask -SSBulkDumpTask getSSBulkDumpTask(const std::map>& locations, - const BulkDumpState& bulkDumpState); - -std::string generateRandomBulkDumpDataFileName(Version version); - -// Return two file settings: first: LocalFilePaths; Second: RemoteFilePaths. -// The local file path: -// //-manifest.txt (must have) -// //-data.sst (omitted for empty range) -// //-sample.sst (omitted if data size is too small to have a sample) -// The remote file path: -// //-manifest.txt (must have) -// //-data.sst (omitted for empty range) -// //-sample.sst (omitted if data size is too small to have a sample) -std::pair getLocalRemoteFileSetSetting(Version dumpVersion, - const std::string& relativeFolder, - const std::string& rootLocal, - const std::string& rootRemote); - -// Persist the complete progress of bulkDump by writing the metadata with Complete phase -// to the bulk dump system key space. -ACTOR Future persistCompleteBulkDumpRange(Database cx, BulkDumpState bulkDumpState); - -// Define bulk dump job folder. Job is set by user. At most one job at a time globally. -std::string generateBulkDumpJobFolder(const UID& jobId); - -// Define job manifest file name. -std::string getJobManifestFileName(const UID& jobId); - -// Define task folder name. -std::string getBulkDumpTaskFolder(const UID& taskId); - -// Define job root folder. -std::string getBulkDumpJobRoot(const std::string& root, const UID& jobId); - -// Define job manifest file content based on job's all BulkDumpManifest. -// Each row is a range sorted by the beginKey. Any two ranges do not have overlapping. -// Col: beginKey, endKey, dataVersion, dataBytes, manifestPath. -// dataVersion should be always valid. dataBytes can be 0 in case of an empty range. -std::string generateJobManifestFileContent(const std::map& manifests); - -// The size of sortedData is defined at the place of generating the data (getRangeDataToDump). -// The size is configured by MOVE_SHARD_KRM_ROW_LIMIT. -BulkDumpManifest dumpDataFileToLocalDirectory(UID logId, - const std::map& sortedData, - const std::map& sortedSample, - const BulkDumpFileSet& localFileSet, - const BulkDumpFileSet& remoteFileSet, - const ByteSampleSetting& byteSampleSetting, - Version dumpVersion, - const KeyRange& dumpRange, - int64_t dumpBytes); - -void generateBulkDumpJobManifestFile(const std::string& workFolder, - const std::string& localJobManifestFilePath, - const std::string& content, - const UID& logId); - -// Upload manifest file for bulkdump job -// Each job has one manifest file including manifest paths of all tasks. -// The local file path: -// /-manifest.txt -// The remote file path: -// /-manifest.txt -void uploadBulkDumpJobManifestFile(BulkDumpTransportMethod transportMethod, - const std::string& localJobManifestFilePath, - const std::string& remoteJobManifestFilePath, - UID logId); - -// Upload file for each task. Each task is spawned by bulkdump job according to the shard boundary -ACTOR Future uploadBulkDumpFileSet(BulkDumpTransportMethod transportMethod, - BulkDumpFileSet sourceFileSet, - BulkDumpFileSet destinationFileSet, - UID logId); - -// Erase file folder -void clearFileFolder(const std::string& folderPath); - -class ParallelismLimitor { -public: - ParallelismLimitor(int maxParallelism) : maxParallelism(maxParallelism) {} - - inline void decrementTaskCounter() { - ASSERT(numRunningTasks.get() <= maxParallelism); - numRunningTasks.set(numRunningTasks.get() - 1); - ASSERT(numRunningTasks.get() >= 0); - } - - // return true if succeed - inline bool tryIncrementTaskCounter() { - if (numRunningTasks.get() < maxParallelism) { - numRunningTasks.set(numRunningTasks.get() + 1); - return true; - } else { - return false; - } - } - - inline Future waitUntilCounterChanged() const { return numRunningTasks.onChange(); } - -private: - AsyncVar numRunningTasks; - int maxParallelism; -}; - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/include/fdbserver/BulkLoadAndDumpUtil.actor.h b/fdbserver/include/fdbserver/BulkLoadAndDumpUtil.actor.h new file mode 100644 index 00000000000..581af1ca7f3 --- /dev/null +++ b/fdbserver/include/fdbserver/BulkLoadAndDumpUtil.actor.h @@ -0,0 +1,154 @@ +/* + * BulkLoadAndDumpUtil.actor.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_BULKLOADANDDUMPUTIL_ACTOR_G_H) +#define FDBSERVER_BULKLOADANDDUMPUTIL_ACTOR_G_H +#include "fdbserver/BulkLoadAndDumpUtil.actor.g.h" +#elif !defined(FDBSERVER_BULKLOADANDDUMPUTIL_ACTOR_H) +#define FDBSERVER_BULKLOADANDDUMPUTIL_ACTOR_H +#pragma once + +#include "fdbclient/BulkLoadAndDump.h" +#include "flow/actorcompiler.h" // has to be last include + +class ParallelismLimitor { +public: + ParallelismLimitor(int maxParallelism, const std::string& context) + : maxParallelism(maxParallelism), context(context) {} + + inline void decrementTaskCounter() { + ASSERT(numRunningTasks.get() <= maxParallelism); + numRunningTasks.set(numRunningTasks.get() - 1); + ASSERT(numRunningTasks.get() >= 0); + } + + // return true if succeed + inline bool tryIncrementTaskCounter() { + if (numRunningTasks.get() < maxParallelism) { + numRunningTasks.set(numRunningTasks.get() + 1); + return true; + } else { + return false; + } + } + + inline Future waitUntilCounterChanged() const { return numRunningTasks.onChange(); } + +private: + AsyncVar numRunningTasks; + int maxParallelism; + std::string context; +}; + +struct SSBulkLoadFileSet { + std::unordered_set dataFileList; + Optional bytesSampleFile; + std::string folder; + SSBulkLoadFileSet() = default; + std::string toString() { + std::string res = "SSBulkLoadFileSet: [DataFiles]: " + describe(dataFileList); + if (bytesSampleFile.present()) { + res = res + ", [BytesSampleFile]: " + bytesSampleFile.get(); + } + res = res + ", [Folder]: " + folder; + return res; + } +}; + +struct SSBulkDumpTask { + SSBulkDumpTask(const StorageServerInterface& targetServer, + const std::vector& checksumServers, + const BulkDumpState& bulkDumpState) + : targetServer(targetServer), checksumServers(checksumServers), bulkDumpState(bulkDumpState){}; + + std::string toString() const { + return "[BulkDumpState]: " + bulkDumpState.toString() + ", [TargetServer]: " + targetServer.toString() + + ", [ChecksumServers]: " + describe(checksumServers); + } + + StorageServerInterface targetServer; + std::vector checksumServers; + BulkDumpState bulkDumpState; +}; + +ACTOR Future> getBulkLoadTaskStateFromDataMove(Database cx, UID dataMoveId, UID logId); + +ACTOR Future> getBytesSamplingFromSSTFiles(std::string folderToGenerate, + std::unordered_set dataFiles, + UID logId); + +// Used by DD to generate a SSBulkDumpTask and send to SS +// SS dumps the data based on the configuration of the SSBulkDumpTask +SSBulkDumpTask getSSBulkDumpTask(const std::map>& locations, + const BulkDumpState& bulkDumpState); + +// Used by bulk dumping +// The size of sortedData is defined at the place of generating the data (getRangeDataToDump). +// The size is configured by MOVE_SHARD_KRM_ROW_LIMIT. +BulkLoadManifest dumpDataFileToLocal(UID logId, + const std::map& sortedData, + const std::map& sortedSample, + const BulkLoadFileSet& localFileSet, + const BulkLoadFileSet& remoteFileSet, + const BulkLoadByteSampleSetting& byteSampleSetting, + Version dumpVersion, + const KeyRange& dumpRange, + int64_t dumpBytes); + +// Upload file for each task. Each task is spawned by bulkdump job according to the shard boundary +ACTOR Future uploadBulkLoadFileSet(BulkLoadTransportMethod transportMethod, + BulkLoadFileSet sourceFileSet, + BulkLoadFileSet destinationFileSet, + UID logId); + +// Download file for each task +ACTOR Future downloadBulkLoadFileSet(BulkLoadTransportMethod transportMethod, + std::string dir, + BulkLoadTaskState bulkLoadTaskState, + size_t fileBytesMax, + UID logId); + +// Download job manifest file which is generated when dumping the data +ACTOR Future downloadBulkLoadJobManifestFile(BulkLoadTransportMethod transportMethod, + std::string localJobManifestFilePath, + std::string remoteJobManifestFilePath, + UID logId); + +// Upload job manifest file when dumping the data +ACTOR Future uploadBulkLoadJobManifestFile(BulkLoadTransportMethod transportMethod, + std::string localJobManifestFilePath, + std::string remoteJobManifestFilePath, + std::map manifests, + UID logId); + +// Extract manifests from job manifest file with the input range +ACTOR Future> getBulkLoadManifestMetadataFromFiles( + std::string localJobManifestFilePath, + KeyRange range, + std::string manifestLocalTempFolder, + BulkLoadTransportMethod transportMethod, + UID logId); + +// Persist the complete progress of bulkDump by writing the metadata with Complete phase +// to the bulk dump system key space. +ACTOR Future persistCompleteBulkDumpRange(Database cx, BulkDumpState bulkDumpState); + +#include "flow/unactorcompiler.h" +#endif diff --git a/fdbserver/include/fdbserver/BulkLoadUtil.actor.h b/fdbserver/include/fdbserver/BulkLoadUtil.actor.h deleted file mode 100644 index 61ac81e8dad..00000000000 --- a/fdbserver/include/fdbserver/BulkLoadUtil.actor.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * BulkLoadUtil.actor.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if defined(NO_INTELLISENSE) && !defined(FDBSERVER_BULKLOADUTIL_ACTOR_G_H) -#define FDBSERVER_BULKLOADUTIL_ACTOR_G_H -#include "fdbserver/BulkLoadUtil.actor.g.h" -#elif !defined(FDBSERVER_BULKLOADUTIL_ACTOR_H) -#define FDBSERVER_BULKLOADUTIL_ACTOR_H -#pragma once - -#include "fdbclient/BulkLoading.h" -#include "flow/actorcompiler.h" // has to be last include - -struct SSBulkLoadFileSet { - std::unordered_set dataFileList; - Optional bytesSampleFile; - std::string folder; - SSBulkLoadFileSet() = default; - std::string toString() { - std::string res = "SSBulkLoadFileSet: [DataFiles]: " + describe(dataFileList); - if (bytesSampleFile.present()) { - res = res + ", [BytesSampleFile]: " + bytesSampleFile.get(); - } - res = res + ", [Folder]: " + folder; - return res; - } -}; - -std::string generateRandomBulkLoadDataFileName(); - -std::string generateRandomBulkLoadBytesSampleFileName(); - -ACTOR Future> getBulkLoadStateFromDataMove(Database cx, UID dataMoveId, UID logId); - -void bulkLoadFileCopy(std::string fromFile, std::string toFile, size_t fileBytesMax); - -ACTOR Future bulkLoadTransportCP_impl(std::string dir, - BulkLoadState bulkLoadState, - size_t fileBytesMax, - UID logId); - -ACTOR Future> getBytesSamplingFromSSTFiles(std::string folderToGenerate, - std::unordered_set dataFiles, - UID logId); - -void checkContent(std::unordered_set dataFiles, UID logId); - -#include "flow/unactorcompiler.h" -#endif diff --git a/fdbserver/include/fdbserver/DataDistribution.actor.h b/fdbserver/include/fdbserver/DataDistribution.actor.h index cdd3019ba8a..18fe7cefc35 100644 --- a/fdbserver/include/fdbserver/DataDistribution.actor.h +++ b/fdbserver/include/fdbserver/DataDistribution.actor.h @@ -24,7 +24,7 @@ #elif !defined(FDBSERVER_DATA_DISTRIBUTION_ACTOR_H) #define FDBSERVER_DATA_DISTRIBUTION_ACTOR_H -#include "fdbclient/BulkLoading.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/MoveKeys.actor.h" #include "fdbserver/TenantCache.h" @@ -226,10 +226,10 @@ struct GetMetricsListRequest { }; struct BulkLoadShardRequest { - BulkLoadState bulkLoadState; + BulkLoadTaskState bulkLoadTaskState; BulkLoadShardRequest() {} - BulkLoadShardRequest(BulkLoadState const& bulkLoadState) : bulkLoadState(bulkLoadState) {} + BulkLoadShardRequest(BulkLoadTaskState const& bulkLoadTaskState) : bulkLoadTaskState(bulkLoadTaskState) {} }; // PhysicalShardCollection maintains physical shard concepts in data distribution @@ -535,14 +535,14 @@ struct TeamCollectionInterface { }; struct DDBulkLoadTask { - BulkLoadState coreState; + BulkLoadTaskState coreState; Version commitVersion = invalidVersion; Promise completeAck; // satisfied when a data move for this task completes for the first time, where the task // metadata phase has been complete DDBulkLoadTask() = default; - DDBulkLoadTask(BulkLoadState coreState, Version commitVersion, Promise completeAck) + DDBulkLoadTask(BulkLoadTaskState coreState, Version commitVersion, Promise completeAck) : coreState(coreState), commitVersion(commitVersion), completeAck(completeAck) {} bool operator==(const DDBulkLoadTask& rhs) const { @@ -597,23 +597,23 @@ class BulkLoadTaskCollection : public ReferenceCounted { // DDTracker stops any shard boundary change overlapping the task range // DDQueue attaches the task to following data moves until the task has been completed // If there are overlapped old tasks, make it outdated by sending a signal to completeAck - void publishTask(const BulkLoadState& bulkLoadState, Version commitVersion, Promise completeAck) { - if (overlappingTaskSince(bulkLoadState.getRange(), commitVersion)) { + void publishTask(const BulkLoadTaskState& bulkLoadTaskState, Version commitVersion, Promise completeAck) { + if (overlappingTaskSince(bulkLoadTaskState.getRange(), commitVersion)) { throw bulkload_task_outdated(); } - DDBulkLoadTask task(bulkLoadState, commitVersion, completeAck); + DDBulkLoadTask task(bulkLoadTaskState, commitVersion, completeAck); TraceEvent(SevDebug, "DDBulkLoadCollectionPublishTask", ddId) .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("Range", bulkLoadState.getRange()) + .detail("Range", bulkLoadTaskState.getRange()) .detail("Task", task.toString()); // For any overlapping task, make it outdated - for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadState.getRange())) { + for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadTaskState.getRange())) { if (!it->value().present()) { continue; } - if (it->value().get().coreState.getTaskId() == bulkLoadState.getTaskId()) { - ASSERT(it->value().get().coreState.getRange() == bulkLoadState.getRange()); + if (it->value().get().coreState.getTaskId() == bulkLoadTaskState.getTaskId()) { + ASSERT(it->value().get().coreState.getRange() == bulkLoadTaskState.getRange()); // In case that the task has been already triggered // Avoid repeatedly being triggered by throwing the error // then the current doBulkLoadTask will sliently exit @@ -624,24 +624,24 @@ class BulkLoadTaskCollection : public ReferenceCounted { TraceEvent(SevInfo, "DDBulkLoadCollectionPublishTaskOverwriteTask", ddId) .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("NewRange", bulkLoadState.getRange()) + .detail("NewRange", bulkLoadTaskState.getRange()) .detail("NewTask", task.toString()) .detail("OldTaskRange", it->range()) .detail("OldTask", it->value().get().toString()); } } - bulkLoadTaskMap.insert(bulkLoadState.getRange(), task); + bulkLoadTaskMap.insert(bulkLoadTaskState.getRange(), task); return; } // This method is called when there is a data move assigned to run the bulk load task - void startTask(const BulkLoadState& bulkLoadState) { - for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadState.getRange())) { - if (!it->value().present() || it->value().get().coreState.getTaskId() != bulkLoadState.getTaskId()) { + void startTask(const BulkLoadTaskState& bulkLoadTaskState) { + for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadTaskState.getRange())) { + if (!it->value().present() || it->value().get().coreState.getTaskId() != bulkLoadTaskState.getTaskId()) { throw bulkload_task_outdated(); } TraceEvent(SevDebug, "DDBulkLoadCollectionStartTask", ddId) - .detail("Range", bulkLoadState.getRange()) + .detail("Range", bulkLoadTaskState.getRange()) .detail("TaskRange", it->range()) .detail("Task", it->value().get().toString()); } @@ -649,16 +649,16 @@ class BulkLoadTaskCollection : public ReferenceCounted { } // Send complete signal to indicate this task has been completed - void terminateTask(const BulkLoadState& bulkLoadState) { - for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadState.getRange())) { - if (!it->value().present() || it->value().get().coreState.getTaskId() != bulkLoadState.getTaskId()) { + void terminateTask(const BulkLoadTaskState& bulkLoadTaskState) { + for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadTaskState.getRange())) { + if (!it->value().present() || it->value().get().coreState.getTaskId() != bulkLoadTaskState.getTaskId()) { throw bulkload_task_outdated(); } // It is possible that the task has been completed by a past data move if (it->value().get().completeAck.canBeSet()) { it->value().get().completeAck.send(Void()); TraceEvent(SevDebug, "DDBulkLoadCollectionTerminateTask", ddId) - .detail("Range", bulkLoadState.getRange()) + .detail("Range", bulkLoadTaskState.getRange()) .detail("TaskRange", it->range()) .detail("Task", it->value().get().toString()); } @@ -667,14 +667,14 @@ class BulkLoadTaskCollection : public ReferenceCounted { } // Erase any metadata on the map for the input bulkload task - void eraseTask(const BulkLoadState& bulkLoadState) { + void eraseTask(const BulkLoadTaskState& bulkLoadTaskState) { std::vector rangesToClear; - for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadState.getRange())) { - if (!it->value().present() || it->value().get().coreState.getTaskId() != bulkLoadState.getTaskId()) { + for (auto it : bulkLoadTaskMap.intersectingRanges(bulkLoadTaskState.getRange())) { + if (!it->value().present() || it->value().get().coreState.getTaskId() != bulkLoadTaskState.getTaskId()) { continue; } TraceEvent(SevDebug, "DDBulkLoadCollectionEraseTaskdata", ddId) - .detail("Range", bulkLoadState.getRange()) + .detail("Range", bulkLoadTaskState.getRange()) .detail("TaskRange", it->range()) .detail("Task", it->value().get().toString()); rangesToClear.push_back(it->range()); diff --git a/fdbserver/include/fdbserver/MoveKeys.actor.h b/fdbserver/include/fdbserver/MoveKeys.actor.h index 08a3618035e..426f99c316c 100644 --- a/fdbserver/include/fdbserver/MoveKeys.actor.h +++ b/fdbserver/include/fdbserver/MoveKeys.actor.h @@ -88,7 +88,7 @@ struct MoveKeysParams { const DDEnabledState* ddEnabledState = nullptr; CancelConflictingDataMoves cancelConflictingDataMoves = CancelConflictingDataMoves::False; - Optional bulkLoadState; + Optional bulkLoadTaskState; MoveKeysParams() {} @@ -104,13 +104,13 @@ struct MoveKeysParams { UID relocationIntervalId, const DDEnabledState* ddEnabledState, CancelConflictingDataMoves cancelConflictingDataMoves, - Optional bulkLoadState) + Optional bulkLoadTaskState) : dataMoveId(dataMoveId), keys(keys), destinationTeam(destinationTeam), healthyDestinations(healthyDestinations), lock(lock), dataMovementComplete(dataMovementComplete), startMoveKeysParallelismLock(startMoveKeysParallelismLock), finishMoveKeysParallelismLock(finishMoveKeysParallelismLock), hasRemote(hasRemote), relocationIntervalId(relocationIntervalId), ddEnabledState(ddEnabledState), - cancelConflictingDataMoves(cancelConflictingDataMoves), bulkLoadState(bulkLoadState) {} + cancelConflictingDataMoves(cancelConflictingDataMoves), bulkLoadTaskState(bulkLoadTaskState) {} MoveKeysParams(UID dataMoveId, const std::vector& ranges, @@ -124,13 +124,13 @@ struct MoveKeysParams { UID relocationIntervalId, const DDEnabledState* ddEnabledState, CancelConflictingDataMoves cancelConflictingDataMoves, - Optional bulkLoadState) + Optional bulkLoadTaskState) : dataMoveId(dataMoveId), ranges(ranges), destinationTeam(destinationTeam), healthyDestinations(healthyDestinations), lock(lock), dataMovementComplete(dataMovementComplete), startMoveKeysParallelismLock(startMoveKeysParallelismLock), finishMoveKeysParallelismLock(finishMoveKeysParallelismLock), hasRemote(hasRemote), relocationIntervalId(relocationIntervalId), ddEnabledState(ddEnabledState), - cancelConflictingDataMoves(cancelConflictingDataMoves), bulkLoadState(bulkLoadState) {} + cancelConflictingDataMoves(cancelConflictingDataMoves), bulkLoadTaskState(bulkLoadTaskState) {} }; // read the lock value in system keyspace but do not change anything diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index da8c13f6b22..11eab9c9d26 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -74,8 +74,7 @@ #include "fdbrpc/Smoother.h" #include "fdbrpc/Stats.h" #include "fdbserver/AccumulativeChecksumUtil.h" -#include "fdbserver/BulkDumpUtil.actor.h" -#include "fdbserver/BulkLoadUtil.actor.h" +#include "fdbserver/BulkLoadAndDumpUtil.actor.h" #include "fdbserver/DataDistribution.actor.h" #include "fdbserver/FDBExecHelper.actor.h" #include "fdbclient/GetEncryptCipherKeys.h" @@ -6016,7 +6015,9 @@ ACTOR Future getRangeDataToDump(StorageServer* data, KeyRange ran ASSERT(res.second); ByteSampleInfo sampleInfo = isKeyValueInSample(KeyValueRef(kv.key, kv.value)); if (sampleInfo.inSample) { - auto resSample = sample.insert({ kv.key, kv.value }); + Key sampleKey = kv.key; + Value sampleValue = BinaryWriter::toValue(sampleInfo.sampledSize, Unversioned()); + auto resSample = sample.insert({ sampleKey, sampleValue }); ASSERT(resSample.second); } currentExpectedBytes = currentExpectedBytes + kv.expectedSize() + kv.expectedSize(); @@ -6075,19 +6076,19 @@ ACTOR Future bulkDumpQ(StorageServer* data, BulkDumpRequest req) { state uint64_t batchNum = 0; state Version versionToDump; state RangeDumpData rangeDumpData; - state std::string rootFolderLocal = getBulkDumpJobRoot(data->bulkDumpFolder, req.bulkDumpState.getJobId()); + state std::string rootFolderLocal = generateBulkLoadJobRoot(data->bulkDumpFolder, req.bulkDumpState.getJobId()); state std::string rootFolderRemote = - getBulkDumpJobRoot(req.bulkDumpState.getRemoteRoot(), req.bulkDumpState.getJobId()); + generateBulkLoadJobRoot(req.bulkDumpState.getRemoteRoot(), req.bulkDumpState.getJobId()); // Use jobId and taskId as the folder to store the data of the task range ASSERT(req.bulkDumpState.getTaskId().present()); - state std::string taskFolder = getBulkDumpTaskFolder(req.bulkDumpState.getTaskId().get()); - state BulkDumpFileSet destinationFileSets; + state std::string taskFolder = req.bulkDumpState.getTaskId().get().toString(); + state BulkLoadFileSet destinationFileSets; state Transaction tr(data->cx); loop { try { // Clear local files - clearFileFolder(abspath(joinPath(rootFolderLocal, taskFolder))); + platform::eraseDirectoryRecursive(abspath(joinPath(rootFolderLocal, taskFolder))); // Dump data of rangeToDump in a relativeFolder state KeyRange rangeToDump = Standalone(KeyRangeRef(rangeBegin, rangeEnd)); @@ -6108,36 +6109,36 @@ ACTOR Future bulkDumpQ(StorageServer* data, BulkDumpRequest req) { // The data in KVStore is dumped to the local folder at first and then // the local files are uploaded to the remote folder // Local files and remotes files have the same relative path but different root - state std::pair resFileSets = - getLocalRemoteFileSetSetting(versionToDump, - relativeFolder, - /*rootLocal=*/rootFolderLocal, - /*rootRemote=*/rootFolderRemote); + state std::pair resFileSets = + generateBulkLoadFileSetting(versionToDump, + relativeFolder, + /*rootLocal=*/rootFolderLocal, + /*rootRemote=*/rootFolderRemote); // The remote file path: - state BulkDumpFileSet localFileSetSetting = resFileSets.first; - state BulkDumpFileSet remoteFileSetSetting = resFileSets.second; + state BulkLoadFileSet localFileSetSetting = resFileSets.first; + state BulkLoadFileSet remoteFileSetSetting = resFileSets.second; // Generate byte sampling setting - ByteSampleSetting byteSampleSetting(0, - "hashlittle2", // use function name to represent the method - SERVER_KNOBS->BYTE_SAMPLING_FACTOR, - SERVER_KNOBS->BYTE_SAMPLING_OVERHEAD, - SERVER_KNOBS->MIN_BYTE_SAMPLING_PROBABILITY); + BulkLoadByteSampleSetting byteSampleSetting(0, + "hashlittle2", // use function name to represent the method + SERVER_KNOBS->BYTE_SAMPLING_FACTOR, + SERVER_KNOBS->BYTE_SAMPLING_OVERHEAD, + SERVER_KNOBS->MIN_BYTE_SAMPLING_PROBABILITY); // Write to SST file state KeyRange dataRange = rangeToDump & Standalone(KeyRangeRef(rangeBegin, keyAfter(rangeDumpData.lastKey))); - state BulkDumpManifest manifest = - dumpDataFileToLocalDirectory(data->thisServerID, - rangeDumpData.kvs, - rangeDumpData.sampled, - localFileSetSetting, - remoteFileSetSetting, - byteSampleSetting, - versionToDump, - dataRange, // the actual range of the rangeDumpData.kvs - rangeDumpData.kvsBytes); + state BulkLoadManifest manifest = + dumpDataFileToLocal(data->thisServerID, + rangeDumpData.kvs, + rangeDumpData.sampled, + localFileSetSetting, + remoteFileSetSetting, + byteSampleSetting, + versionToDump, + dataRange, // the actual range of the rangeDumpData.kvs + rangeDumpData.kvsBytes); readBytes = readBytes + rangeDumpData.kvsBytes; TraceEvent(SevInfo, "SSBulkDump", data->thisServerID) .detail("Task", req.bulkDumpState.toString()) @@ -6151,14 +6152,14 @@ ACTOR Future bulkDumpQ(StorageServer* data, BulkDumpRequest req) { .detail("BatchNum", batchNum); // Upload Files - state BulkDumpFileSet localFileSet = localFileSetSetting; + state BulkLoadFileSet localFileSet = localFileSetSetting; if (manifest.fileSet.dataFileName.empty()) { localFileSet.dataFileName = ""; } if (manifest.fileSet.byteSampleFileName.empty()) { localFileSet.byteSampleFileName = ""; } - wait(uploadBulkDumpFileSet( + wait(uploadBulkLoadFileSet( req.bulkDumpState.getTransportMethod(), localFileSet, manifest.fileSet, data->thisServerID)); // Progressively set metadata of the data range as complete phase @@ -6200,7 +6201,7 @@ ACTOR Future bulkDumpQ(StorageServer* data, BulkDumpRequest req) { wait(delay(1.0)); } try { - clearFileFolder(abspath(joinPath(rootFolderLocal, taskFolder))); + platform::eraseDirectoryRecursive(abspath(joinPath(rootFolderLocal, taskFolder))); } catch (Error& e) { // exit } @@ -9391,27 +9392,25 @@ ACTOR Future fallBackToAddingShard(StorageServer* data, MoveInShard* moveI ACTOR Future fetchShardFetchBulkLoadSSTFiles(StorageServer* data, MoveInShard* moveInShard, std::string dir, - BulkLoadState bulkLoadState) { + BulkLoadTaskState bulkLoadTaskState) { TraceEvent(SevInfo, "SSBulkLoadTaskFetchSSTFile", data->thisServerID) - .detail("BulkLoadTask", bulkLoadState.toString()) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) .detail("MoveInShard", moveInShard->toString()) .detail("Folder", abspath(dir)); state double fetchStartTime = now(); // Step 1: Fetch data to dir - state SSBulkLoadFileSet fileSetToLoad; - ASSERT(bulkLoadState.getTransportMethod() != BulkLoadTransportMethod::Invalid); - if (bulkLoadState.getTransportMethod() == BulkLoadTransportMethod::CP) { - wait(store( - fileSetToLoad, - bulkLoadTransportCP_impl(dir, bulkLoadState, SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX, data->thisServerID))); - } else { - throw not_implemented(); - } + ASSERT(bulkLoadTaskState.getTransportMethod() != BulkLoadTransportMethod::Invalid); + state SSBulkLoadFileSet fileSetToLoad = wait(downloadBulkLoadFileSet(bulkLoadTaskState.getTransportMethod(), + dir, + bulkLoadTaskState, + SERVER_KNOBS->BULKLOAD_FILE_BYTES_MAX, + data->thisServerID)); + // At this point, all necessary data for bulk loading locate at fileSetToLoad TraceEvent(SevInfo, "SSBulkLoadTaskFetchSSTFileFetched", data->thisServerID) - .detail("BulkLoadTask", bulkLoadState.toString()) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) .detail("MoveInShard", moveInShard->toString()) .detail("Dir", dir) .detail("FileSetToLoad", fileSetToLoad.toString()); @@ -9420,17 +9419,17 @@ ACTOR Future fetchShardFetchBulkLoadSSTFiles(StorageServer* data, // TODO(BulkLoad): Validate all files specified in fileSetToLoad exist // TODO(BulkLoad): Check file checksum // TODO(BulkLoad): Check file data all in the moveInShard range - // TODO(BulkLoad): checkContent(fileSetToLoad.dataFileList, data->thisServerID); + // TODO(BulkLoad): checkContent if (!fileSetToLoad.bytesSampleFile.present()) { TraceEvent(SevWarn, "SSBulkLoadTaskFetchSSTFileByteSampleNotFound", data->thisServerID) - .detail("BulkLoadState", bulkLoadState.toString()) + .detail("BulkLoadTaskState", bulkLoadTaskState.toString()) .detail("FileSetToLoad", fileSetToLoad.toString()); Optional bytesSampleFile_ = wait(getBytesSamplingFromSSTFiles(fileSetToLoad.folder, fileSetToLoad.dataFileList, data->thisServerID)); fileSetToLoad.bytesSampleFile = bytesSampleFile_; } TraceEvent(SevInfo, "SSBulkLoadTaskFetchSSTFileValidated", data->thisServerID) - .detail("BulkLoadTask", bulkLoadState.toString()) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) .detail("MoveInShard", moveInShard->toString()) .detail("Folder", abspath(dir)) .detail("FileSetToLoad", fileSetToLoad.toString()); @@ -9440,16 +9439,16 @@ ACTOR Future fetchShardFetchBulkLoadSSTFiles(StorageServer* data, localRecord.checkpointID = UID(); localRecord.dir = abspath(fileSetToLoad.folder); for (const auto& range : moveInShard->ranges()) { - ASSERT(bulkLoadState.getRange().contains(range)); + ASSERT(bulkLoadTaskState.getRange().contains(range)); } localRecord.ranges = moveInShard->ranges(); - RocksDBCheckpointKeyValues rcp({ bulkLoadState.getRange() }); + RocksDBCheckpointKeyValues rcp({ bulkLoadTaskState.getRange() }); for (const auto& filePath : fileSetToLoad.dataFileList) { std::vector coalesceRanges = coalesceRangeList(moveInShard->ranges()); if (coalesceRanges.size() != 1) { TraceEvent(SevError, "SSBulkLoadTaskFetchSSTFileError", data->thisServerID) .detail("Reason", "MoveInShard ranges unexpected") - .detail("BulkLoadState", bulkLoadState.toString()) + .detail("BulkLoadTaskState", bulkLoadTaskState.toString()) .detail("MoveInShard", moveInShard->toString()) .detail("FileSetToLoad", fileSetToLoad.toString()); } @@ -9466,7 +9465,7 @@ ACTOR Future fetchShardFetchBulkLoadSSTFiles(StorageServer* data, const double duration = now() - fetchStartTime; const int64_t totalBytes = getTotalFetchedBytes(moveInShard->meta->checkpoints); TraceEvent(SevInfo, "SSBulkLoadTaskFetchSSTFileBuildMetadata", data->thisServerID) - .detail("BulkLoadTask", bulkLoadState.toString()) + .detail("BulkLoadTask", bulkLoadTaskState.toString()) .detail("MoveInShard", moveInShard->toString()) .detail("Folder", abspath(dir)) .detail("FileSetToLoad", fileSetToLoad.toString()) @@ -9574,10 +9573,10 @@ ACTOR Future fetchShardCheckpoint(StorageServer* data, MoveInShard* moveIn ACTOR Future fetchShardIngestCheckpoint(StorageServer* data, MoveInShard* moveInShard, - Optional bulkLoadState) { + Optional bulkLoadTaskState) { TraceEvent(SevInfo, "FetchShardIngestCheckpointBegin", data->thisServerID) .detail("Checkpoints", describe(moveInShard->checkpoints())) - .detail("BulkLoadTask", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTask", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); ASSERT(moveInShard->getPhase() == MoveInPhase::Ingesting); state double startTime = now(); @@ -9590,7 +9589,7 @@ ACTOR Future fetchShardIngestCheckpoint(StorageServer* data, .errorUnsuppressed(e) .detail("MoveInShard", moveInShard->toString()) .detail("Checkpoints", describe(moveInShard->checkpoints())) - .detail("BulkLoadTask", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTask", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); if (e.code() == error_code_failed_to_restore_checkpoint && !moveInShard->failed()) { moveInShard->setPhase(MoveInPhase::Fetching); updateMoveInShardMetaData(data, moveInShard); @@ -9602,7 +9601,7 @@ ACTOR Future fetchShardIngestCheckpoint(StorageServer* data, TraceEvent(SevInfo, "FetchShardIngestedCheckpoint", data->thisServerID) .detail("MoveInShard", moveInShard->toString()) .detail("Checkpoints", describe(moveInShard->checkpoints())) - .detail("BulkLoadTask", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTask", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); if (moveInShard->failed()) { return Void(); @@ -9629,14 +9628,14 @@ ACTOR Future fetchShardIngestCheckpoint(StorageServer* data, .detail("Checkpoint", checkpoint.toString()) .detail("SampleKey", key) .detail("Size", size) - .detail("BulkLoadTask", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTask", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); continue; } TraceEvent(moveInShard->logSev, "StorageRestoreCheckpointKeySample", data->thisServerID) .detail("Checkpoint", checkpoint.checkpointID.toString()) .detail("SampleKey", key) .detail("Size", size) - .detail("BulkLoadTask", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTask", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); data->metrics.byteSample.sample.insert(key, size); data->metrics.notifyBytes(key, size); data->addMutationToMutationLogOrStorage( @@ -9657,7 +9656,7 @@ ACTOR Future fetchShardIngestCheckpoint(StorageServer* data, .detail("Bytes", totalBytes) .detail("Duration", duration) .detail("Rate", static_cast(totalBytes) / duration) - .detail("BulkLoadTask", bulkLoadState.present() ? bulkLoadState.get().toString() : ""); + .detail("BulkLoadTask", bulkLoadTaskState.present() ? bulkLoadTaskState.get().toString() : ""); return Void(); } @@ -9868,21 +9867,21 @@ ACTOR Future fetchShard(StorageServer* data, MoveInShard* moveInShard) { wait(data->fetchKeysParallelismLock.take(TaskPriority::DefaultYield)); state FlowLock::Releaser holdingFKPL(data->fetchKeysParallelismLock); - state Optional bulkLoadState; + state Optional bulkLoadTaskState; if (moveInShard->meta->conductBulkLoad) { - wait(store(bulkLoadState, - getBulkLoadStateFromDataMove(data->cx, moveInShard->dataMoveId(), data->thisServerID))); + wait(store(bulkLoadTaskState, + getBulkLoadTaskStateFromDataMove(data->cx, moveInShard->dataMoveId(), data->thisServerID))); } // It is possible that the data move id is generated by an old binary which does not // encode the data move type. In this case, it is possible that the data move id indicates // this is an bulk load data move but it is not. To tolerate this issue, here we check - // whether the bulkLoadState metadata is persisted in the data move metadata. If yes, + // whether the bulkLoadTaskState metadata is persisted in the data move metadata. If yes, // this SS conducts bulk loading. If no, the SS conducts a normal data move. - if (bulkLoadState.present()) { - ASSERT(bulkLoadState.get().getDataMoveId() == moveInShard->dataMoveId()); + if (bulkLoadTaskState.present()) { + ASSERT(bulkLoadTaskState.get().getDataMoveId() == moveInShard->dataMoveId()); TraceEvent(SevInfo, "FetchShardBeginReceivedBulkLoadTask", data->thisServerID) .detail("MoveInShard", moveInShard->toString()) - .detail("BulkLoadTask", bulkLoadState.get().toString()); + .detail("BulkLoadTask", bulkLoadTaskState.get().toString()); } loop { @@ -9892,13 +9891,13 @@ ACTOR Future fetchShard(StorageServer* data, MoveInShard* moveInShard) { try { // Pending = 0, Fetching = 1, Ingesting = 2, ApplyingUpdates = 3, Complete = 4, Deleting = 4, Fail = 6, if (phase == MoveInPhase::Fetching) { - if (bulkLoadState.present()) { - wait(fetchShardFetchBulkLoadSSTFiles(data, moveInShard, dir, bulkLoadState.get())); + if (bulkLoadTaskState.present()) { + wait(fetchShardFetchBulkLoadSSTFiles(data, moveInShard, dir, bulkLoadTaskState.get())); } else { wait(fetchShardCheckpoint(data, moveInShard, dir)); } } else if (phase == MoveInPhase::Ingesting) { - wait(fetchShardIngestCheckpoint(data, moveInShard, bulkLoadState)); + wait(fetchShardIngestCheckpoint(data, moveInShard, bulkLoadTaskState)); } else if (phase == MoveInPhase::ApplyingUpdates) { wait(fetchShardApplyUpdates(data, moveInShard, moveInUpdates)); } else if (phase == MoveInPhase::Complete) { @@ -10616,7 +10615,7 @@ void changeServerKeysWithPhysicalShards(StorageServer* data, .detail("NowAssigned", nowAssigned) .detail("Version", version) .detail("PhysicalShardMove", static_cast(enablePSM)) - .detail("BulkLoading", static_cast(conductBulkLoad)) + .detail("ConductBulkLoad", static_cast(conductBulkLoad)) .detail("IsTSS", data->isTss()) .detail("Context", changeServerKeysContextName(context)); diff --git a/fdbserver/workloads/BulkDumpAndLoad.actor.cpp b/fdbserver/workloads/BulkDumpAndLoad.actor.cpp new file mode 100644 index 00000000000..e73a0801f5a --- /dev/null +++ b/fdbserver/workloads/BulkDumpAndLoad.actor.cpp @@ -0,0 +1,275 @@ +/* + * BulkDumpAndLoad.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/BulkLoadAndDump.h" +#include "fdbclient/FDBTypes.h" +#include "fdbclient/ManagementAPI.actor.h" +#include "fdbclient/NativeAPI.actor.h" +#include "fdbclient/SystemData.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "fdbserver/BulkLoadAndDumpUtil.actor.h" +#include "flow/Error.h" +#include "flow/Platform.h" +#include "flow/Trace.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +const std::string simulationBulkDumpFolder = joinPath("simfdb", "bulkDumpAndLoad"); + +struct BulkDumpAndLoad : TestWorkload { + static constexpr auto NAME = "BulkDumpAndLoadWorkload"; + const bool enabled; + bool pass; + + // This workload is not compatible with following workload because they will race in changing the DD mode + // This workload is not compatible with RandomRangeLock for the conflict in range lock + void disableFailureInjectionWorkloads(std::set& out) const override { + out.insert({ "RandomMoveKeys", + "DataLossRecovery", + "IDDTxnProcessorApiCorrectness", + "PerpetualWiggleStatsWorkload", + "PhysicalShardMove", + "StorageCorruption", + "StorageServerCheckpointRestoreTest", + "ValidateStorage", + "RandomRangeLock", + "BulkLoadTask" }); + } + + BulkDumpAndLoad(WorkloadContext const& wcx) : TestWorkload(wcx), enabled(true), pass(true) {} + + Future setup(Database const& cx) override { return Void(); } + + Future start(Database const& cx) override { return _start(this, cx); } + + Future check(Database const& cx) override { return true; } + + void getMetrics(std::vector& m) override {} + + Standalone getRandomStringRef() const { + int stringLength = deterministicRandom()->randomInt(1, 10); + Standalone stringBuffer = makeString(stringLength); + deterministicRandom()->randomBytes(mutateString(stringBuffer), stringLength); + return stringBuffer; + } + + KeyRange getRandomRange(BulkDumpAndLoad* self, KeyRange scope) const { + loop { + Standalone keyA = self->getRandomStringRef(); + Standalone keyB = self->getRandomStringRef(); + if (!scope.contains(keyA) || !scope.contains(keyB)) { + continue; + } else if (keyA < keyB) { + return Standalone(KeyRangeRef(keyA, keyB)); + } else if (keyA > keyB) { + return Standalone(KeyRangeRef(keyB, keyA)); + } else { + continue; + } + } + } + + std::map generateOrderedKVS(BulkDumpAndLoad* self, KeyRange range, size_t count) { + std::map kvs; // ordered + while (kvs.size() < count) { + Standalone str = self->getRandomStringRef(); + Key key = range.begin.withSuffix(str); + Value val = self->getRandomStringRef(); + if (!range.contains(key)) { + continue; + } + auto res = kvs.insert({ key, val }); + if (!res.second) { + continue; + } + } + return kvs; // ordered + } + + ACTOR Future setKeys(Database cx, std::map kvs) { + state Transaction tr(cx); + loop { + try { + for (const auto& [key, value] : kvs) { + tr.set(key, value); + } + wait(tr.commit()); + return Void(); + } catch (Error& e) { + wait(tr.onError(e)); + } + } + } + + ACTOR Future waitUntilTaskComplete(Database cx, BulkDumpState newTask) { + state std::vector res; + loop { + try { + res.clear(); + wait(store(res, getBulkDumpTasksWithinRange(cx, normalKeys))); + // When complete, the job metadata is cleared + if (res.empty()) { + break; + } + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + } + wait(delay(30.0)); + } + return Void(); + } + + ACTOR Future clearDatabase(Database cx) { + state Transaction tr(cx); + loop { + try { + tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + tr.clear(normalKeys); + tr.clear(bulkDumpKeys); + tr.clear(bulkLoadTaskKeys); + wait(tr.commit()); + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return Void(); + } + + ACTOR Future waitUntilLoadJobComplete(Database cx, KeyRange range) { + loop { + state bool complete = true; + state Transaction tr(cx); + state Key readBegin = range.begin; + state Key readEnd = range.end; + state RangeResult rangeResult; + while (readBegin < readEnd) { + try { + rangeResult.clear(); + tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); + tr.setOption(FDBTransactionOptions::LOCK_AWARE); + wait(store(rangeResult, + krmGetRanges(&tr, + bulkLoadJobPrefix, + KeyRangeRef(readBegin, readEnd), + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT, + CLIENT_KNOBS->KRM_GET_RANGE_LIMIT_BYTES))); + for (int i = 0; i < rangeResult.size() - 1; i++) { + if (rangeResult[i].value.empty()) { + continue; + } + BulkLoadJobState task = decodeBulkLoadJobState(rangeResult[i].value); + if (task.getPhase() != BulkLoadJobPhase::Complete) { + complete = false; + break; + } + } + if (!complete) { + break; + } + readBegin = rangeResult.back().key; + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + wait(tr.onError(e)); + } + } + if (complete) { + break; + } + wait(delay(30.0)); + } + return Void(); + } + + ACTOR Future> getAllKVSFromDB(Database cx) { + state Transaction tr(cx); + state std::map kvs; + loop { + try { + RangeResult kvsRes = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); + ASSERT(!kvsRes.more); + kvs.clear(); + for (auto& kv : kvsRes) { + auto res = kvs.insert({ kv.key, kv.value }); + ASSERT(res.second); + } + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } + return kvs; + } + + ACTOR Future _start(BulkDumpAndLoad* self, Database cx) { + if (self->clientId != 0) { + return Void(); + } + + if (g_network->isSimulated()) { + // Network partition between CC and DD can cause DD no longer existing, + // which results in the bulk loading task cannot complete + // So, this workload disable the network partition + disableConnectionFailures("BulkDumpAndLoad"); + } + + state std::map kvs = self->generateOrderedKVS(self, normalKeys, 1000); + wait(self->setKeys(cx, kvs)); + + // Submit a bulk dump job + state int oldBulkDumpMode = 0; + wait(store(oldBulkDumpMode, setBulkDumpMode(cx, 1))); // Enable bulkDump + state BulkDumpState newJob = newBulkDumpJobLocalSST(normalKeys, simulationBulkDumpFolder); + wait(submitBulkDumpJob(cx, newJob)); + TraceEvent("BulkDumpAndLoadWorkLoad").detail("Phase", "Dump Job Submitted").detail("Job", newJob.toString()); + + // Wait until the dump job completes + wait(self->waitUntilTaskComplete(cx, newJob)); + TraceEvent("BulkDumpAndLoadWorkLoad").detail("Phase", "Dump Job Complete").detail("Job", newJob.toString()); + + // Clear database + wait(self->clearDatabase(cx)); + TraceEvent("BulkDumpAndLoadWorkLoad").detail("Phase", "Clear DB").detail("Job", newJob.toString()); + + // Submit a bulk load job + state int oldBulkLoadMode = 0; + wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 1))); // Enable bulkLoad + state BulkLoadJobState bulkLoadJobTask = + newBulkLoadJobLocalSST(newJob.getJobId(), newJob.getRange(), newJob.getRemoteRoot()); + TraceEvent("BulkDumpAndLoadWorkLoad").detail("Phase", "Load Job Submitted").detail("Job", newJob.toString()); + bool succeed = wait(submitBulkLoadJob(cx, bulkLoadJobTask)); + ASSERT(succeed); + + // Wait until the load job complete + wait(self->waitUntilLoadJobComplete(cx, newJob.getRange())); + TraceEvent("BulkDumpAndLoadWorkLoad").detail("Phase", "Load Job Complete").detail("Job", newJob.toString()); + + // Check the loaded data in DB is same as the data in DB before dumping + std::map newKvs = wait(self->getAllKVSFromDB(cx)); + ASSERT(kvs == newKvs); + return Void(); + } +}; + +WorkloadFactory BulkDumpAndLoadFactory; diff --git a/fdbserver/workloads/BulkDumping.actor.cpp b/fdbserver/workloads/BulkDumping.actor.cpp deleted file mode 100644 index 1fa56adac17..00000000000 --- a/fdbserver/workloads/BulkDumping.actor.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * BulkDumping.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2024 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/BulkDumping.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbclient/NativeAPI.actor.h" -#include "fdbserver/workloads/workloads.actor.h" -#include "flow/Error.h" -#include "flow/Platform.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -const std::string simulationBulkDumpFolder = joinPath("simfdb", "bulkdump"); - -struct BulkDumping : TestWorkload { - static constexpr auto NAME = "BulkDumpingWorkload"; - const bool enabled; - bool pass; - - BulkDumping(WorkloadContext const& wcx) : TestWorkload(wcx), enabled(true), pass(true) {} - - Future setup(Database const& cx) override { return Void(); } - - Future start(Database const& cx) override { return _start(this, cx); } - - Future check(Database const& cx) override { return true; } - - void getMetrics(std::vector& m) override {} - - Standalone getRandomStringRef() const { - int stringLength = deterministicRandom()->randomInt(1, 10); - Standalone stringBuffer = makeString(stringLength); - deterministicRandom()->randomBytes(mutateString(stringBuffer), stringLength); - return stringBuffer; - } - - KeyRange getRandomRange(BulkDumping* self, KeyRange scope) const { - loop { - Standalone keyA = self->getRandomStringRef(); - Standalone keyB = self->getRandomStringRef(); - if (!scope.contains(keyA) || !scope.contains(keyB)) { - continue; - } else if (keyA < keyB) { - return Standalone(KeyRangeRef(keyA, keyB)); - } else if (keyA > keyB) { - return Standalone(KeyRangeRef(keyB, keyA)); - } else { - continue; - } - } - } - - std::vector generateOrderedKVS(BulkDumping* self, KeyRange range, size_t count) { - std::set keys; // ordered - while (keys.size() < count) { - Standalone str = self->getRandomStringRef(); - Key key = range.begin.withSuffix(str); - if (keys.contains(key)) { - continue; - } - if (!range.contains(key)) { - continue; - } - keys.insert(key); - } - std::vector res; - for (const auto& key : keys) { - Value val = self->getRandomStringRef(); - res.push_back(Standalone(KeyValueRef(key, val))); - } - return res; // ordered - } - - ACTOR Future setKeys(Database cx, std::vector kvs) { - state Transaction tr(cx); - loop { - try { - for (const auto& kv : kvs) { - tr.set(kv.key, kv.value); - } - wait(tr.commit()); - return Void(); - } catch (Error& e) { - wait(tr.onError(e)); - } - } - } - - ACTOR Future waitUntilTaskComplete(Database cx, BulkDumpState newTask) { - state std::vector res; - loop { - try { - res.clear(); - wait(store(res, getBulkDumpTasksWithinRange(cx, normalKeys))); - // When complete, the job metadata is cleared - if (res.empty()) { - break; - } - } catch (Error& e) { - if (e.code() == error_code_actor_cancelled) { - throw e; - } - } - wait(delay(30.0)); - } - return Void(); - } - - ACTOR Future _start(BulkDumping* self, Database cx) { - if (self->clientId != 0) { - return Void(); - } - - std::vector kvs = self->generateOrderedKVS(self, normalKeys, 1000); - wait(self->setKeys(cx, kvs)); - - state int oldBulkDumpMode = 0; - wait(store(oldBulkDumpMode, setBulkDumpMode(cx, 1))); - TraceEvent("BulkDumpingSetMode").detail("OldMode", oldBulkDumpMode).detail("NewMode", 1); - - state BulkDumpState newTask = newBulkDumpTaskLocalSST(normalKeys, simulationBulkDumpFolder); - TraceEvent("BulkDumpingTaskNew").detail("Task", newTask.toString()); - wait(submitBulkDumpJob(cx, newTask)); - std::vector res = wait(getBulkDumpTasksWithinRange(cx, normalKeys, 100)); - for (const auto& task : res) { - TraceEvent("BulkDumpingTaskRes").detail("Task", task.toString()); - } - wait(self->waitUntilTaskComplete(cx, newTask)); - - return Void(); - } -}; - -WorkloadFactory BulkDumpingFactory; diff --git a/fdbserver/workloads/BulkLoading.actor.cpp b/fdbserver/workloads/BulkLoadTask.actor.cpp similarity index 75% rename from fdbserver/workloads/BulkLoading.actor.cpp rename to fdbserver/workloads/BulkLoadTask.actor.cpp index 52d2f12c5a9..3b68ff16389 100644 --- a/fdbserver/workloads/BulkLoading.actor.cpp +++ b/fdbserver/workloads/BulkLoadTask.actor.cpp @@ -1,5 +1,5 @@ /* - * BulkLoading.actor.cpp + * BulkLoadTask.actor.cpp * * This source file is part of the FoundationDB open source project * @@ -18,25 +18,25 @@ * limitations under the License. */ -#include "fdbclient/BulkLoading.h" +#include "fdbclient/BulkLoadAndDump.h" #include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/NativeAPI.actor.h" -#include "fdbserver/BulkLoadUtil.actor.h" +#include "fdbserver/BulkLoadAndDumpUtil.actor.h" #include "fdbserver/RocksDBCheckpointUtils.actor.h" #include "fdbserver/StorageMetrics.actor.h" #include "fdbserver/workloads/workloads.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -const std::string simulationBulkLoadFolder = "bulkLoad"; +const std::string simulationBulkLoadFolder = "bulkLoadTask"; struct BulkLoadTaskTestUnit { - BulkLoadState bulkLoadTask; + BulkLoadTaskState bulkLoadTask; std::vector data; BulkLoadTaskTestUnit() = default; }; -struct BulkLoading : TestWorkload { - static constexpr auto NAME = "BulkLoadingWorkload"; +struct BulkLoadTask : TestWorkload { + static constexpr auto NAME = "BulkLoadTaskWorkload"; const bool enabled; bool pass; bool debugging = false; @@ -53,10 +53,11 @@ struct BulkLoading : TestWorkload { "StorageCorruption", "StorageServerCheckpointRestoreTest", "ValidateStorage", - "RandomRangeLock" }); + "RandomRangeLock", + "BulkDumpAndLoad" }); } - BulkLoading(WorkloadContext const& wcx) : TestWorkload(wcx), enabled(true), pass(true) {} + BulkLoadTask(WorkloadContext const& wcx) : TestWorkload(wcx), enabled(true), pass(true) {} Future setup(Database const& cx) override { return Void(); } @@ -66,23 +67,31 @@ struct BulkLoading : TestWorkload { void getMetrics(std::vector& m) override {} - ACTOR Future submitBulkLoadTasks(BulkLoading* self, Database cx, std::vector tasks) { + std::string generateRandomBulkLoadDataFileName() { + return deterministicRandom()->randomUniqueID().toString() + "-data.sst"; + } + + std::string generateRandomBulkLoadBytesSampleFileName() { + return deterministicRandom()->randomUniqueID().toString() + "-bytesample.sst"; + } + + ACTOR Future submitBulkLoadTasks(BulkLoadTask* self, Database cx, std::vector tasks) { state int i = 0; for (; i < tasks.size(); i++) { loop { try { wait(submitBulkLoadTask(cx, tasks[i])); - TraceEvent("BulkLoadingSubmitBulkLoadTask") + TraceEvent("BulkLoadTaskSubmitBulkLoadTask") .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("BulkLoadState", tasks[i].toString()); + .detail("BulkLoadTaskState", tasks[i].toString()); break; } catch (Error& e) { - TraceEvent("BulkLoadingSubmitBulkLoadTaskError") + TraceEvent("BulkLoadTaskSubmitBulkLoadTaskError") .setMaxEventLength(-1) .setMaxFieldLength(-1) .errorUnsuppressed(e) - .detail("BulkLoadState", tasks[i].toString()); + .detail("BulkLoadTaskState", tasks[i].toString()); wait(delay(0.1)); } } @@ -90,23 +99,23 @@ struct BulkLoading : TestWorkload { return Void(); } - ACTOR Future acknowledgeBulkLoadTasks(BulkLoading* self, Database cx, std::vector tasks) { + ACTOR Future acknowledgeBulkLoadTasks(BulkLoadTask* self, Database cx, std::vector tasks) { state int i = 0; for (; i < tasks.size(); i++) { loop { try { wait(acknowledgeBulkLoadTask(cx, tasks[i].getRange(), tasks[i].getTaskId())); - TraceEvent("BulkLoadingAcknowledgeBulkLoadTask") + TraceEvent("BulkLoadTaskAcknowledgeBulkLoadTask") .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("BulkLoadState", tasks[i].toString()); + .detail("BulkLoadTaskState", tasks[i].toString()); break; } catch (Error& e) { - TraceEvent("BulkLoadingAcknowledgeBulkLoadTaskError") + TraceEvent("BulkLoadTaskAcknowledgeBulkLoadTaskError") .setMaxEventLength(-1) .setMaxFieldLength(-1) .errorUnsuppressed(e) - .detail("BulkLoadState", tasks[i].toString()); + .detail("BulkLoadTaskState", tasks[i].toString()); if (e.code() == error_code_bulkload_task_outdated) { break; // has been erased or overwritten by other tasks } @@ -143,19 +152,20 @@ struct BulkLoading : TestWorkload { while (beginKey < endKey) { try { tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); - RangeResult res = wait(krmGetRanges(&tr, bulkLoadPrefix, Standalone(KeyRangeRef(beginKey, endKey)))); + RangeResult res = + wait(krmGetRanges(&tr, bulkLoadTaskPrefix, Standalone(KeyRangeRef(beginKey, endKey)))); for (int i = 0; i < res.size() - 1; i++) { if (!res[i].value.empty()) { - BulkLoadState bulkLoadState = decodeBulkLoadState(res[i].value); - ASSERT(bulkLoadState.isValid()); - if (bulkLoadState.getRange() != KeyRangeRef(res[i].key, res[i + 1].key)) { + BulkLoadTaskState bulkLoadTaskState = decodeBulkLoadTaskState(res[i].value); + ASSERT(bulkLoadTaskState.isValid()); + if (bulkLoadTaskState.getRange() != KeyRangeRef(res[i].key, res[i + 1].key)) { continue; // Ignore outdated task } - if (bulkLoadState.phase != BulkLoadPhase::Complete) { - TraceEvent("BulkLoadingWorkLoadIncompleteTasks") + if (bulkLoadTaskState.phase != BulkLoadTaskPhase::Complete) { + TraceEvent("BulkLoadTaskWorkLoadIncompleteTasks") .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("Task", bulkLoadState.toString()); + .detail("Task", bulkLoadTaskState.toString()); return false; } } @@ -168,7 +178,7 @@ struct BulkLoading : TestWorkload { return true; } - ACTOR Future waitUntilAllTaskComplete(BulkLoading* self, Database cx) { + ACTOR Future waitUntilAllTaskComplete(BulkLoadTask* self, Database cx) { loop { bool complete = wait(self->checkAllTaskComplete(cx)); if (complete) { @@ -179,7 +189,7 @@ struct BulkLoading : TestWorkload { return Void(); } - ACTOR Future checkBulkLoadMetadataCleared(BulkLoading* self, Database cx) { + ACTOR Future checkBulkLoadMetadataCleared(BulkLoadTask* self, Database cx) { state Key beginKey = allKeys.begin; state Key endKey = allKeys.end; state KeyRange rangeToRead; @@ -188,22 +198,22 @@ struct BulkLoading : TestWorkload { try { tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); rangeToRead = Standalone(KeyRangeRef(beginKey, endKey)); - RangeResult res = wait(krmGetRanges(&tr, bulkLoadPrefix, allKeys)); + RangeResult res = wait(krmGetRanges(&tr, bulkLoadTaskPrefix, allKeys)); beginKey = res.back().key; int emptyCount = 0; int nonEmptyCount = 0; for (int i = 0; i < res.size() - 1; i++) { if (!res[i].value.empty()) { - BulkLoadState bulkLoadState = decodeBulkLoadState(res[i].value); + BulkLoadTaskState bulkLoadTaskState = decodeBulkLoadTaskState(res[i].value); KeyRange currentRange = Standalone(KeyRangeRef(res[i].key, res[i + 1].key)); - if (bulkLoadState.getRange() == currentRange) { - TraceEvent("BulkLoadingWorkLoadMetadataNotCleared") + if (bulkLoadTaskState.getRange() == currentRange) { + TraceEvent("BulkLoadTaskWorkLoadMetadataNotCleared") .setMaxEventLength(-1) .setMaxFieldLength(-1) - .detail("BulkLoadTask", bulkLoadState.toString()); + .detail("BulkLoadTask", bulkLoadTaskState.toString()); return false; } else { - ASSERT(bulkLoadState.getRange().contains(currentRange)); + ASSERT(bulkLoadTaskState.getRange().contains(currentRange)); } nonEmptyCount++; } else { @@ -228,13 +238,13 @@ struct BulkLoading : TestWorkload { return false; } - ACTOR Future> getKvsFromDB(BulkLoading* self, + ACTOR Future> getKvsFromDB(BulkLoadTask* self, Database cx, std::vector outdatedRanges, std::vector loadedRanges) { state std::vector res; state Transaction tr(cx); - TraceEvent("BulkLoadingWorkLoadGetKVSFromDBStart"); + TraceEvent("BulkLoadTaskWorkLoadGetKVSFromDBStart"); loop { try { RangeResult result = wait(tr.getRange(normalKeys, CLIENT_KNOBS->TOO_MANY)); @@ -253,7 +263,7 @@ struct BulkLoading : TestWorkload { wait(tr.onError(e)); } } - TraceEvent("BulkLoadingWorkLoadGetKVSFromDBDone"); + TraceEvent("BulkLoadTaskWorkLoadGetKVSFromDBDone"); return res; } @@ -264,7 +274,7 @@ struct BulkLoading : TestWorkload { return stringBuffer; } - KeyRange getRandomRange(BulkLoading* self, KeyRange scope) const { + KeyRange getRandomRange(BulkLoadTask* self, KeyRange scope) const { loop { Standalone keyA = self->getRandomStringRef(); Standalone keyB = self->getRandomStringRef(); @@ -280,7 +290,7 @@ struct BulkLoading : TestWorkload { } } - std::vector generateOrderedKVS(BulkLoading* self, KeyRange range, size_t count) { + std::vector generateOrderedKVS(BulkLoadTask* self, KeyRange range, size_t count) { std::set keys; // ordered while (keys.size() < count) { Standalone str = self->getRandomStringRef(); @@ -301,7 +311,7 @@ struct BulkLoading : TestWorkload { return res; // ordered } - void generateSSTFiles(BulkLoading* self, BulkLoadTaskTestUnit task) { + void generateSSTFiles(BulkLoadTask* self, BulkLoadTaskTestUnit task) { std::string folder = task.bulkLoadTask.getFolder(); platform::eraseDirectoryRecursive(folder); ASSERT(platform::createDirectory(folder)); @@ -320,7 +330,7 @@ struct BulkLoading : TestWorkload { } sstWriter->write(kv.key, kv.value); } - TraceEvent("BulkLoadingDataProduced") + TraceEvent("BulkLoadTaskDataProduced") .detail("Task", task.bulkLoadTask.toString()) .detail("LoadKeyCount", task.data.size()) .detail("BytesSampleSize", bytesSample.size()) @@ -349,7 +359,7 @@ struct BulkLoading : TestWorkload { for (const auto& kv : bytesSample) { sstWriter->write(kv.key, kv.value); } - TraceEvent("BulkLoadingByteSampleProduced") + TraceEvent("BulkLoadTaskByteSampleProduced") .detail("Task", task.bulkLoadTask.toString()) .detail("LoadKeyCount", task.data.size()) .detail("BytesSampleSize", bytesSample.size()) @@ -358,27 +368,28 @@ struct BulkLoading : TestWorkload { .detail("BytesSampleFile", bytesSampleFile); ASSERT(sstWriter->finish()); } - TraceEvent("BulkLoadingProduceDataToLoad").detail("Folder", folder).detail("LoadKeyCount", task.data.size()); + TraceEvent("BulkLoadTaskProduceDataToLoad").detail("Folder", folder).detail("LoadKeyCount", task.data.size()); return; } - BulkLoadTaskTestUnit generateBulkLoadTaskUnit(BulkLoading* self, + BulkLoadTaskTestUnit generateBulkLoadTaskUnit(BulkLoadTask* self, std::string folderPath, int dataSize, Optional range = Optional()) { - std::string dataFilePath = joinPath(folderPath, generateRandomBulkLoadDataFileName()); - std::string bytesSampleFilePath = joinPath(folderPath, generateRandomBulkLoadBytesSampleFileName()); + std::string dataFilePath = joinPath(folderPath, self->generateRandomBulkLoadDataFileName()); + std::string bytesSampleFilePath = joinPath(folderPath, self->generateRandomBulkLoadBytesSampleFileName()); KeyRange rangeToLoad = range.present() ? range.get() : self->getRandomRange(self, normalKeys); BulkLoadTaskTestUnit taskUnit; - taskUnit.bulkLoadTask = newBulkLoadTaskLocalSST(rangeToLoad, folderPath, dataFilePath, bytesSampleFilePath); + taskUnit.bulkLoadTask = + newBulkLoadTaskLocalSST(UID(), rangeToLoad, folderPath, dataFilePath, bytesSampleFilePath); taskUnit.data = self->generateOrderedKVS(self, rangeToLoad, dataSize); self->generateSSTFiles(self, taskUnit); return taskUnit; } - bool checkSame(BulkLoading* self, std::vector kvs, std::vector kvsdb) { + bool checkSame(BulkLoadTask* self, std::vector kvs, std::vector kvsdb) { if (kvs.size() != kvsdb.size()) { - TraceEvent(SevError, "BulkLoadingWorkLoadDataWrong") + TraceEvent(SevError, "BulkLoadTaskWorkLoadDataWrong") .detail("Reason", "KeyValue count wrong") .detail("KVS", kvs.size()) .detail("DB", kvsdb.size()); @@ -408,13 +419,13 @@ struct BulkLoading : TestWorkload { std::sort(kvsdb.begin(), kvsdb.end(), [](KeyValue a, KeyValue b) { return a.key < b.key; }); for (int i = 0; i < kvs.size(); i++) { if (kvs[i].key != kvsdb[i].key) { - TraceEvent(SevError, "BulkLoadingWorkLoadDataWrong") + TraceEvent(SevError, "BulkLoadTaskWorkLoadDataWrong") .detail("Reason", "Key mismatch") .detail("KVS", kvs[i]) .detail("DB", kvsdb[i]); return false; } else if (kvs[i].value != kvsdb[i].value) { - TraceEvent(SevError, "BulkLoadingWorkLoadDataWrong") + TraceEvent(SevError, "BulkLoadTaskWorkLoadDataWrong") .detail("Reason", "Value mismatch") .detail("KVS", kvs[i]) .detail("DB", kvsdb[i]); @@ -425,15 +436,15 @@ struct BulkLoading : TestWorkload { } // Issue three non-overlapping tasks and check data consistency and correctness - ACTOR Future simpleTest(BulkLoading* self, Database cx) { - TraceEvent("BulkLoadingWorkLoadSimpleTestBegin"); + ACTOR Future simpleTest(BulkLoadTask* self, Database cx) { + TraceEvent("BulkLoadTaskWorkLoadSimpleTestBegin"); state int counter = 0; state int oldBulkLoadMode = 0; - state std::vector bulkLoadStates; + state std::vector bulkLoadTaskStates; state std::vector> bulkLoadDataList; state std::vector completeRanges; loop { // New tasks overwrite old tasks on the same range - bulkLoadStates.clear(); + bulkLoadTaskStates.clear(); bulkLoadDataList.clear(); completeRanges.clear(); for (int i = 0; i < 2; i++) { @@ -445,17 +456,17 @@ struct BulkLoading : TestWorkload { int dataSize = deterministicRandom()->randomInt(2, 5); BulkLoadTaskTestUnit taskUnit = self->generateBulkLoadTaskUnit(self, folderPath, dataSize, KeyRangeRef(beginKey, endKey)); - bulkLoadStates.push_back(taskUnit.bulkLoadTask); + bulkLoadTaskStates.push_back(taskUnit.bulkLoadTask); bulkLoadDataList.push_back(taskUnit.data); completeRanges.push_back(taskUnit.bulkLoadTask.getRange()); } // Issue above 3 tasks in the same transaction - wait(self->submitBulkLoadTasks(self, cx, bulkLoadStates)); - TraceEvent("BulkLoadingWorkLoadSimpleTestIssuedTasks"); + wait(self->submitBulkLoadTasks(self, cx, bulkLoadTaskStates)); + TraceEvent("BulkLoadTaskWorkLoadSimpleTestIssuedTasks"); wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 1))); - TraceEvent("BulkLoadingWorkLoadSimpleTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); + TraceEvent("BulkLoadTaskWorkLoadSimpleTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); wait(self->waitUntilAllTaskComplete(self, cx)); - TraceEvent("BulkLoadingWorkLoadSimpleTestAllComplete"); + TraceEvent("BulkLoadTaskWorkLoadSimpleTestAllComplete"); counter++; if (counter > 1) { break; @@ -464,7 +475,7 @@ struct BulkLoading : TestWorkload { // Check data wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 0))); - TraceEvent("BulkLoadingWorkLoadSimpleTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 0); + TraceEvent("BulkLoadTaskWorkLoadSimpleTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 0); state std::vector dbkvs = wait(self->getKvsFromDB(self, cx, std::vector(), completeRanges)); state std::vector kvs; for (int j = 0; j < bulkLoadDataList.size(); j++) { @@ -474,8 +485,8 @@ struct BulkLoading : TestWorkload { // Check bulk load metadata wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 1))); - TraceEvent("BulkLoadingWorkLoadSimpleTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); - wait(self->acknowledgeBulkLoadTasks(self, cx, bulkLoadStates)); + TraceEvent("BulkLoadTaskWorkLoadSimpleTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); + wait(self->acknowledgeBulkLoadTasks(self, cx, bulkLoadTaskStates)); loop { bool cleared = wait(self->checkBulkLoadMetadataCleared(self, cx)); if (cleared) { @@ -483,7 +494,7 @@ struct BulkLoading : TestWorkload { } wait(delay(1.0)); } - TraceEvent("BulkLoadingWorkLoadSimpleTestComplete"); + TraceEvent("BulkLoadTaskWorkLoadSimpleTestComplete"); return Void(); } @@ -502,7 +513,7 @@ struct BulkLoading : TestWorkload { } } - ACTOR Future backgroundWriteTraffic(BulkLoading* self, Database cx) { + ACTOR Future backgroundWriteTraffic(BulkLoadTask* self, Database cx) { loop { int keyCount = deterministicRandom()->randomInt(1, 20); std::vector kvs = self->generateOrderedKVS(self, normalKeys, keyCount); @@ -512,7 +523,7 @@ struct BulkLoading : TestWorkload { } } - ACTOR Future complexTest(BulkLoading* self, Database cx) { + ACTOR Future complexTest(BulkLoadTask* self, Database cx) { state KeyRangeMap> taskMap; taskMap.insert(allKeys, Optional()); state int i = 0; @@ -521,7 +532,7 @@ struct BulkLoading : TestWorkload { // Run tasks wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 1))); - TraceEvent("BulkLoadingWorkLoadComplexTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); + TraceEvent("BulkLoadTaskWorkLoadComplexTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); for (; i < 3; i++) { std::string folderPath = joinPath(simulationBulkLoadFolder, std::to_string(i)); int dataSize = deterministicRandom()->randomInt(2, 5); @@ -534,12 +545,12 @@ struct BulkLoading : TestWorkload { } if (deterministicRandom()->coinflip()) { wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 0))); - TraceEvent("BulkLoadingWorkLoadComplexTestSetMode") + TraceEvent("BulkLoadTaskWorkLoadComplexTestSetMode") .detail("OldMode", oldBulkLoadMode) .detail("NewMode", 0); wait(delay(deterministicRandom()->random01() * 5)); wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 1))); - TraceEvent("BulkLoadingWorkLoadComplexTestSetMode") + TraceEvent("BulkLoadTaskWorkLoadComplexTestSetMode") .detail("OldMode", oldBulkLoadMode) .detail("NewMode", 1); } @@ -550,11 +561,11 @@ struct BulkLoading : TestWorkload { // Wait until all tasks have completed wait(self->waitUntilAllTaskComplete(self, cx)); wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 0))); // trigger DD restart - TraceEvent("BulkLoadingWorkLoadComplexTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 0); + TraceEvent("BulkLoadTaskWorkLoadComplexTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 0); // Check correctness state std::vector kvs; - state std::vector bulkLoadStates; + state std::vector bulkLoadTaskStates; state std::vector incompleteRanges; state std::vector completeRanges; for (auto& range : taskMap.ranges()) { @@ -572,7 +583,7 @@ struct BulkLoading : TestWorkload { completeRanges.push_back(range.range()); std::vector kvsToCheck = range.value().get().data; kvs.insert(std::end(kvs), std::begin(kvsToCheck), std::end(kvsToCheck)); - bulkLoadStates.push_back(range.value().get().bulkLoadTask); + bulkLoadTaskStates.push_back(range.value().get().bulkLoadTask); } std::vector dbkvs = wait(self->getKvsFromDB(self, cx, incompleteRanges, completeRanges)); ASSERT(self->checkSame(self, kvs, dbkvs)); @@ -582,8 +593,8 @@ struct BulkLoading : TestWorkload { // Clear metadata wait(store(oldBulkLoadMode, setBulkLoadMode(cx, 1))); - TraceEvent("BulkLoadingWorkLoadComplexTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); - wait(self->acknowledgeBulkLoadTasks(self, cx, bulkLoadStates)); + TraceEvent("BulkLoadTaskWorkLoadComplexTestSetMode").detail("OldMode", oldBulkLoadMode).detail("NewMode", 1); + wait(self->acknowledgeBulkLoadTasks(self, cx, bulkLoadTaskStates)); loop { bool cleared = wait(self->checkBulkLoadMetadataCleared(self, cx)); if (cleared) { @@ -591,12 +602,12 @@ struct BulkLoading : TestWorkload { } wait(delay(1.0)); } - TraceEvent("BulkLoadingWorkLoadComplexTestComplete"); + TraceEvent("BulkLoadTaskWorkLoadComplexTestComplete"); return Void(); } // For offline test - void produceLargeData(BulkLoading* self, Database cx) { + void produceLargeData(BulkLoadTask* self, Database cx) { for (int i = 0; i < 3; i++) { std::string folderName = std::to_string(i); Key beginKey = StringRef(std::to_string(i)); @@ -608,7 +619,7 @@ struct BulkLoading : TestWorkload { return; } - ACTOR Future _start(BulkLoading* self, Database cx) { + ACTOR Future _start(BulkLoadTask* self, Database cx) { if (self->clientId != 0) { return Void(); } @@ -617,7 +628,7 @@ struct BulkLoading : TestWorkload { // Network partition between CC and DD can cause DD no longer existing, // which results in the bulk loading task cannot complete // So, this workload disable the network partition - disableConnectionFailures("BulkLoading"); + disableConnectionFailures("BulkLoadTask"); } // Run background traffic @@ -643,4 +654,4 @@ struct BulkLoading : TestWorkload { } }; -WorkloadFactory BulkLoadingFactory; +WorkloadFactory BulkLoadTaskFactory; diff --git a/fdbserver/workloads/DataLossRecovery.actor.cpp b/fdbserver/workloads/DataLossRecovery.actor.cpp index 6f053a6b627..465d4a8ab4b 100644 --- a/fdbserver/workloads/DataLossRecovery.actor.cpp +++ b/fdbserver/workloads/DataLossRecovery.actor.cpp @@ -251,7 +251,7 @@ struct DataLossRecoveryWorkload : TestWorkload { UID(), // for logging only &ddEnabledState, CancelConflictingDataMoves::True, - Optional()); + Optional()); } else { UID dataMoveId = newDataMoveId(deterministicRandom()->randomUInt64(), AssignEmptyRange(false), @@ -270,7 +270,7 @@ struct DataLossRecoveryWorkload : TestWorkload { UID(), // for logging only &ddEnabledState, CancelConflictingDataMoves::True, - Optional()); + Optional()); } wait(moveKeys(cx, *params)); break; diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 2a720c8c6e5..1290d4e4b5c 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -368,7 +368,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { UID(), self->ddContext.ddEnabledState.get(), CancelConflictingDataMoves::True, - Optional()); + Optional()); } else { return MoveKeysParams(dataMoveId, keys, @@ -382,7 +382,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { UID(), self->ddContext.ddEnabledState.get(), CancelConflictingDataMoves::True, - Optional()); + Optional()); } } diff --git a/fdbserver/workloads/PhysicalShardMove.actor.cpp b/fdbserver/workloads/PhysicalShardMove.actor.cpp index c915fb7970a..b97e9d79075 100644 --- a/fdbserver/workloads/PhysicalShardMove.actor.cpp +++ b/fdbserver/workloads/PhysicalShardMove.actor.cpp @@ -617,7 +617,7 @@ struct PhysicalShardMoveWorkLoad : TestWorkload { deterministicRandom()->randomUniqueID(), // for logging only &ddEnabledState, CancelConflictingDataMoves::False, - Optional()))); + Optional()))); break; } catch (Error& e) { if (e.code() == error_code_movekeys_conflict) { diff --git a/fdbserver/workloads/RandomMoveKeys.actor.cpp b/fdbserver/workloads/RandomMoveKeys.actor.cpp index 8f47c781f9b..383bbb3a079 100644 --- a/fdbserver/workloads/RandomMoveKeys.actor.cpp +++ b/fdbserver/workloads/RandomMoveKeys.actor.cpp @@ -175,7 +175,7 @@ struct MoveKeysWorkload : FailureInjectionWorkload { relocateShardInterval.pairID, &ddEnabledState, CancelConflictingDataMoves::True, - Optional()); + Optional()); } else { UID dataMoveId = newDataMoveId(deterministicRandom()->randomUInt64(), AssignEmptyRange(false), @@ -194,7 +194,7 @@ struct MoveKeysWorkload : FailureInjectionWorkload { relocateShardInterval.pairID, &ddEnabledState, CancelConflictingDataMoves::True, - Optional()); + Optional()); } wait(moveKeys(cx, *params)); TraceEvent(relocateShardInterval.end()).detail("Result", "Success"); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 575e12e5ed2..1da3b25ba25 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -152,8 +152,8 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES rare/BlobRestoreLarge.toml IGNORE) add_fdb_test(TEST_FILES rare/BlobRestoreToVersion.toml) add_fdb_test(TEST_FILES rare/BlobRestoreTenantMode.toml) - add_fdb_test(TEST_FILES fast/BulkDumping.toml) - add_fdb_test(TEST_FILES fast/BulkLoading.toml) + add_fdb_test(TEST_FILES fast/BulkDumpAndLoad.toml) + add_fdb_test(TEST_FILES fast/BulkLoadTask.toml) add_fdb_test(TEST_FILES fast/CacheTest.toml) add_fdb_test(TEST_FILES fast/CloggedSideband.toml) add_fdb_test(TEST_FILES fast/CompressionUtilsUnit.toml IGNORE) diff --git a/tests/fast/BulkLoading.toml b/tests/fast/BulkDumpAndLoad.toml similarity index 86% rename from tests/fast/BulkLoading.toml rename to tests/fast/BulkDumpAndLoad.toml index b6c31f91fb7..abdc684bfab 100644 --- a/tests/fast/BulkLoading.toml +++ b/tests/fast/BulkDumpAndLoad.toml @@ -13,7 +13,7 @@ encryptModes = ['disabled'] # Do not support encryption # The purpose of setting the knob to true was to enable the shard rocksdb storage engine # The shard rocksdb storage engine is set up before this knob is overridden # The temporary fix is that in SimulatedCluster.cpp:simulationSetupAndRun, we are doing one additional check -# so for this BulkLoading test, the shard RocksDB storage engine is always turned on. +# so for this test, the shard RocksDB storage engine is always turned on. shard_encode_location_metadata = true # BulkLoad relies on RangeLock @@ -25,8 +25,8 @@ enable_version_vector_tlog_unicast = false min_byte_sampling_probability = 0.5 [[test]] -testTitle = 'BulkLoadingWorkload' +testTitle = 'BulkDumpAndLoadWorkload' useDB = true [[test.workload]] - testName = 'BulkLoadingWorkload' + testName = 'BulkDumpAndLoadWorkload' diff --git a/tests/fast/BulkDumping.toml b/tests/fast/BulkDumping.toml deleted file mode 100644 index b586801b3da..00000000000 --- a/tests/fast/BulkDumping.toml +++ /dev/null @@ -1,6 +0,0 @@ -[[test]] -testTitle = 'BulkDumpingWorkload' -useDB = true - - [[test.workload]] - testName = 'BulkDumpingWorkload' diff --git a/tests/fast/BulkLoadTask.toml b/tests/fast/BulkLoadTask.toml new file mode 100644 index 00000000000..40e4adfd2e3 --- /dev/null +++ b/tests/fast/BulkLoadTask.toml @@ -0,0 +1,32 @@ +[configuration] +config = 'triple' +storageEngineType = 5 +processesPerMachine = 2 +machineCount = 15 +extraStorageMachineCountPerDC = 8 +tenantModes = ['disabled'] # Do not support tenant +encryptModes = ['disabled'] # Do not support encryption + +[[knobs]] +# This knob is commented out since the knob override is done *after* the simulation system is set up. However, +# this is not going to completely work: +# The purpose of setting the knob to true was to enable the shard rocksdb storage engine +# The shard rocksdb storage engine is set up before this knob is overridden +# The temporary fix is that in SimulatedCluster.cpp:simulationSetupAndRun, we are doing one additional check +# so for this test, the shard RocksDB storage engine is always turned on. +shard_encode_location_metadata = true + +# BulkLoad relies on RangeLock +enable_read_lock_on_range = true +enable_version_vector = false +enable_version_vector_tlog_unicast = false + +# Set high enough sample rate to test bytes sampling +min_byte_sampling_probability = 0.5 + +[[test]] +testTitle = 'BulkLoadTaskWorkload' +useDB = true + + [[test.workload]] + testName = 'BulkLoadTaskWorkload'