Source code
Revision control
Copy as Markdown
Other Tools
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
index 1a90a1eb88..6f5f3d2868 100644
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@@ -5,7 +5,6 @@
#include <array>
#include <cinttypes>
#include <cstring>
-#include <future>
#include "moz-overrides.h"
@@ -926,7 +925,7 @@
GGML_ASSERT(size_data != 0 && "call init_mappings() first");
std::vector<no_init<uint8_t>> read_buf;
- std::vector<std::future<std::pair<ggml_tensor *, bool>>> validation_result;
+ std::vector<std::pair<ggml_tensor *, bool>> validation_result;
// 4 staging buffers for async uploads, each sized 1MB seems to be a good default for single NVMe drives.
// NVMe raid configurations might require more / larger buffers.
@@ -1041,9 +1040,7 @@
uint8_t * data = (uint8_t *) mapping->addr() + weight->offs;
if (check_tensors) {
- validation_result.emplace_back(std::async(std::launch::async, [cur, data, n_size] {
- return std::make_pair(cur, ggml_validate_row_data(cur->type, data, n_size));
- }));
+ validation_result.push_back(std::make_pair(cur, ggml_validate_row_data(cur->type, data, n_size)));
}
GGML_ASSERT(buf_mmap || cur->data); // either we have a buffer to allocate the tensor in, or it is already allocated
@@ -1066,9 +1063,7 @@
file->seek(weight->offs, SEEK_SET);
file->read_raw(cur->data, n_size);
if (check_tensors) {
- validation_result.emplace_back(std::async(std::launch::async, [cur, n_size] {
- return std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size));
- }));
+ validation_result.push_back(std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size)));
}
} else {
// If upload_backend is valid load the tensor in chunks to pinned memory and upload the buffers asynchronously to the GPU.
@@ -1116,8 +1111,7 @@
// check validation results
bool validation_failed = false;
- for (auto & future : validation_result) {
- auto result = future.get();
+ for (const auto & result : validation_result) {
if (!result.second) {
LLAMA_LOG_ERROR("%s: tensor '%s' has invalid data\n", __func__, ggml_get_name(result.first));
validation_failed = true;