Source code
Revision control
Copy as Markdown
Other Tools
// Copyright 2020 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hwy/aligned_allocator.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h> // malloc
#include <array>
#include <random>
#include <set>
#include <vector>
#include "gtest/gtest.h"
#include "hwy/base.h"
#include "hwy/per_target.h"
namespace {
// Sample object that keeps track on an external counter of how many times was
// the explicit constructor and destructor called.
template <size_t N>
class SampleObject {
public:
SampleObject() { data_[0] = 'a'; }
explicit SampleObject(int* counter) : counter_(counter) {
if (counter) (*counter)++;
data_[0] = 'b';
}
~SampleObject() {
if (counter_) (*counter_)--;
}
static_assert(N > sizeof(int*), "SampleObject size too small.");
int* counter_ = nullptr;
char data_[N - sizeof(int*)];
};
class FakeAllocator {
public:
// static AllocPtr and FreePtr member to be used with the aligned
// allocator. These functions calls the private non-static members.
static void* StaticAlloc(void* opaque, size_t bytes) {
return reinterpret_cast<FakeAllocator*>(opaque)->Alloc(bytes);
}
static void StaticFree(void* opaque, void* memory) {
return reinterpret_cast<FakeAllocator*>(opaque)->Free(memory);
}
// Returns the number of pending allocations to be freed.
size_t PendingAllocs() { return allocs_.size(); }
private:
void* Alloc(size_t bytes) {
void* ret = malloc(bytes);
allocs_.insert(ret);
return ret;
}
void Free(void* memory) {
if (!memory) return;
EXPECT_NE(allocs_.end(), allocs_.find(memory));
allocs_.erase(memory);
free(memory);
}
std::set<void*> allocs_;
};
} // namespace
namespace hwy {
class AlignedAllocatorTest : public testing::Test {};
TEST(AlignedAllocatorTest, FreeNullptr) {
// Calling free with a nullptr is always ok.
FreeAlignedBytes(/*aligned_pointer=*/nullptr, /*free_ptr=*/nullptr,
/*opaque_ptr=*/nullptr);
}
TEST(AlignedAllocatorTest, Log2) {
EXPECT_EQ(0u, detail::ShiftCount(1));
EXPECT_EQ(1u, detail::ShiftCount(2));
EXPECT_EQ(3u, detail::ShiftCount(8));
}
// Allocator returns null when it detects overflow of items * sizeof(T).
TEST(AlignedAllocatorTest, Overflow) {
constexpr size_t max = ~size_t(0);
constexpr size_t msb = (max >> 1) + 1;
using Size5 = std::array<uint8_t, 5>;
using Size10 = std::array<uint8_t, 10>;
EXPECT_EQ(nullptr,
detail::AllocateAlignedItems<uint32_t>(max / 2, nullptr, nullptr));
EXPECT_EQ(nullptr,
detail::AllocateAlignedItems<uint32_t>(max / 3, nullptr, nullptr));
EXPECT_EQ(nullptr,
detail::AllocateAlignedItems<Size5>(max / 4, nullptr, nullptr));
EXPECT_EQ(nullptr,
detail::AllocateAlignedItems<uint16_t>(msb, nullptr, nullptr));
EXPECT_EQ(nullptr,
detail::AllocateAlignedItems<double>(msb + 1, nullptr, nullptr));
EXPECT_EQ(nullptr,
detail::AllocateAlignedItems<Size10>(msb / 4, nullptr, nullptr));
}
TEST(AlignedAllocatorTest, AllocDefaultPointers) {
const size_t kSize = 7777;
void* ptr = AllocateAlignedBytes(kSize, /*alloc_ptr=*/nullptr,
/*opaque_ptr=*/nullptr);
ASSERT_NE(nullptr, ptr);
// Make sure the pointer is actually aligned.
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT);
char* p = static_cast<char*>(ptr);
size_t ret = 0;
for (size_t i = 0; i < kSize; i++) {
// Performs a computation using p[] to prevent it being optimized away.
p[i] = static_cast<char>(i & 0x7F);
if (i) ret += static_cast<size_t>(p[i] * p[i - 1]);
}
EXPECT_NE(0U, ret);
FreeAlignedBytes(ptr, /*free_ptr=*/nullptr, /*opaque_ptr=*/nullptr);
}
TEST(AlignedAllocatorTest, EmptyAlignedUniquePtr) {
AlignedUniquePtr<SampleObject<32>> ptr(nullptr, AlignedDeleter());
AlignedUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedDeleter());
}
TEST(AlignedAllocatorTest, EmptyAlignedFreeUniquePtr) {
AlignedFreeUniquePtr<SampleObject<32>> ptr(nullptr, AlignedFreer());
AlignedFreeUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedFreer());
}
TEST(AlignedAllocatorTest, CustomAlloc) {
FakeAllocator fake_alloc;
const size_t kSize = 7777;
void* ptr =
AllocateAlignedBytes(kSize, &FakeAllocator::StaticAlloc, &fake_alloc);
ASSERT_NE(nullptr, ptr);
// We should have only requested one alloc from the allocator.
EXPECT_EQ(1U, fake_alloc.PendingAllocs());
// Make sure the pointer is actually aligned.
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT);
FreeAlignedBytes(ptr, &FakeAllocator::StaticFree, &fake_alloc);
EXPECT_EQ(0U, fake_alloc.PendingAllocs());
}
TEST(AlignedAllocatorTest, MakeUniqueAlignedDefaultConstructor) {
{
auto ptr = MakeUniqueAligned<SampleObject<24>>();
// Default constructor sets the data_[0] to 'a'.
EXPECT_EQ('a', ptr->data_[0]);
EXPECT_EQ(nullptr, ptr->counter_);
}
}
TEST(AlignedAllocatorTest, MakeUniqueAligned) {
int counter = 0;
{
// Creates the object, initializes it with the explicit constructor and
// returns an unique_ptr to it.
auto ptr = MakeUniqueAligned<SampleObject<24>>(&counter);
EXPECT_EQ(1, counter);
// Custom constructor sets the data_[0] to 'b'.
EXPECT_EQ('b', ptr->data_[0]);
}
EXPECT_EQ(0, counter);
}
TEST(AlignedAllocatorTest, MakeUniqueAlignedArray) {
int counter = 0;
{
// Creates the array of objects and initializes them with the explicit
// constructor.
auto arr = MakeUniqueAlignedArray<SampleObject<24>>(7, &counter);
EXPECT_EQ(7, counter);
for (size_t i = 0; i < 7; i++) {
// Custom constructor sets the data_[0] to 'b'.
EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i;
}
}
EXPECT_EQ(0, counter);
}
TEST(AlignedAllocatorTest, AllocSingleInt) {
auto ptr = AllocateAligned<uint32_t>(1);
ASSERT_NE(nullptr, ptr.get());
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT);
// Force delete of the unique_ptr now to check that it doesn't crash.
ptr.reset(nullptr);
EXPECT_EQ(nullptr, ptr.get());
}
TEST(AlignedAllocatorTest, AllocMultipleInt) {
const size_t kSize = 7777;
auto ptr = AllocateAligned<uint32_t>(kSize);
ASSERT_NE(nullptr, ptr.get());
EXPECT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT);
// ptr[i] is actually (*ptr.get())[i] which will use the operator[] of the
// underlying type chosen by AllocateAligned() for the std::unique_ptr.
EXPECT_EQ(&(ptr[0]) + 1, &(ptr[1]));
size_t ret = 0;
for (size_t i = 0; i < kSize; i++) {
// Performs a computation using ptr[] to prevent it being optimized away.
ptr[i] = static_cast<uint32_t>(i);
if (i) ret += static_cast<size_t>(ptr[i]) * ptr[i - 1];
}
EXPECT_NE(0U, ret);
}
TEST(AlignedAllocatorTest, AllocateAlignedObjectWithoutDestructor) {
int counter = 0;
{
// This doesn't call the constructor.
auto obj = AllocateAligned<SampleObject<24>>(1);
obj[0].counter_ = &counter;
}
// Destroying the unique_ptr shouldn't have called the destructor of the
// SampleObject<24>.
EXPECT_EQ(0, counter);
}
TEST(AlignedAllocatorTest, MakeUniqueAlignedArrayWithCustomAlloc) {
FakeAllocator fake_alloc;
int counter = 0;
{
// Creates the array of objects and initializes them with the explicit
// constructor.
auto arr = MakeUniqueAlignedArrayWithAlloc<SampleObject<24>>(
7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc,
&counter);
ASSERT_NE(nullptr, arr.get());
// An array should still only call a single allocation.
EXPECT_EQ(1u, fake_alloc.PendingAllocs());
EXPECT_EQ(7, counter);
for (size_t i = 0; i < 7; i++) {
// Custom constructor sets the data_[0] to 'b'.
EXPECT_EQ('b', arr[i].data_[0]) << "Where i = " << i;
}
}
EXPECT_EQ(0, counter);
EXPECT_EQ(0u, fake_alloc.PendingAllocs());
}
TEST(AlignedAllocatorTest, DefaultInit) {
// The test is whether this compiles. Default-init is useful for output params
// and per-thread storage.
std::vector<AlignedUniquePtr<int[]>> ptrs;
std::vector<AlignedFreeUniquePtr<double[]>> free_ptrs;
ptrs.resize(128);
free_ptrs.resize(128);
// The following is to prevent elision of the pointers.
std::mt19937 rng(129); // Emscripten lacks random_device.
std::uniform_int_distribution<size_t> dist(0, 127);
ptrs[dist(rng)] = MakeUniqueAlignedArray<int>(123);
free_ptrs[dist(rng)] = AllocateAligned<double>(456);
// "Use" pointer without resorting to printf. 0 == 0. Can't shift by 64.
const auto addr1 = reinterpret_cast<uintptr_t>(ptrs[dist(rng)].get());
const auto addr2 = reinterpret_cast<uintptr_t>(free_ptrs[dist(rng)].get());
constexpr size_t kBits = sizeof(uintptr_t) * 8;
EXPECT_EQ((addr1 >> (kBits - 1)) >> (kBits - 1),
(addr2 >> (kBits - 1)) >> (kBits - 1));
}
namespace {
using std::array;
using std::vector;
template <typename T>
void CheckEqual(const T& t1, const T& t2) {
EXPECT_EQ(t1.size(), t2.size());
for (size_t i = 0; i < t1.size(); i++) {
EXPECT_EQ(t1[i], t2[i]) << "i=" << i;
}
}
template <typename T>
void CheckEqual(const AlignedNDArray<T, 1>& a, const vector<T>& v) {
const array<size_t, 1> want_shape({v.size()});
const array<size_t, 1> got_shape = a.shape();
CheckEqual(got_shape, want_shape);
Span<const T> a_span = a[{}];
EXPECT_EQ(a_span.size(), v.size());
for (size_t i = 0; i < a_span.size(); i++) {
EXPECT_EQ(a_span[i], v[i]) << "i=" << i;
EXPECT_EQ(*(a_span.data() + i), v[i]) << "i=" << i;
}
}
template <typename T>
void CheckEqual(const AlignedNDArray<T, 2>& a, const vector<vector<T>>& v) {
const array<size_t, 2> want_shape({v.size(), v[1].size()});
for (const vector<T>& row : v) {
EXPECT_EQ(row.size(), want_shape[1]);
}
const std::array<size_t, 2> got_shape = a.shape();
CheckEqual(got_shape, want_shape);
EXPECT_EQ(a.size(), want_shape[0] * want_shape[1]);
for (size_t row_index = 0; row_index < v.size(); ++row_index) {
vector<T> want_row = v[row_index];
Span<const T> got_row = a[{row_index}];
EXPECT_EQ(got_row.size(), want_row.size()) << "row_index=" << row_index;
for (size_t column_index = 0; column_index < got_row.size();
column_index++) {
EXPECT_EQ(a[{row_index}][column_index], want_row[column_index])
<< "row_index=" << row_index << ", column_index=" << column_index;
EXPECT_EQ(got_row[column_index], want_row[column_index])
<< "row_index=" << row_index << ", column_index=" << column_index;
EXPECT_EQ(*(a[{row_index}].data() + column_index), want_row[column_index])
<< "row_index=" << row_index << ", column_index=" << column_index;
}
}
}
TEST(AlignedAllocatorTest, AlignedNDArray) {
AlignedNDArray<float, 1> a1({4});
CheckEqual(a1, {0, 0, 0, 0});
a1[{}][2] = 3.4f;
CheckEqual(a1, {0, 0, 3.4f, 0});
AlignedNDArray<float, 2> a2({2, 3});
CheckEqual(a2, {{0, 0, 0}, {0, 0, 0}});
a2[{1}][1] = 5.1f;
CheckEqual(a2, {{0, 0, 0}, {0, 5.1f, 0}});
float f0[] = {1.0f, 2.0f, 3.0f};
float f1[] = {4.0f, 5.0f, 6.0f};
hwy::CopyBytes(f0, a2[{0}].data(), 3 * sizeof(float));
hwy::CopyBytes(f1, a2[{1}].data(), 3 * sizeof(float));
CheckEqual(a2, {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}});
}
// Tests that each innermost row in an AlignedNDArray is aligned to the max
// bytes available for SIMD operations on this architecture.
TEST(AlignedAllocatorTest, AlignedNDArrayAlignment) {
AlignedNDArray<float, 4> a({3, 3, 3, 3});
for (size_t d0 = 0; d0 < a.shape()[0]; d0++) {
for (size_t d1 = 0; d1 < a.shape()[1]; d1++) {
for (size_t d2 = 0; d2 < a.shape()[2]; d2++) {
// Check that the address this innermost array starts at is an even
// number of VectorBytes(), which is the max bytes available for SIMD
// operations.
EXPECT_EQ(
reinterpret_cast<uintptr_t>(a[{d0, d1, d2}].data()) % VectorBytes(),
0);
}
}
}
}
TEST(AlignedAllocatorTest, SpanCopyAssignment) {
AlignedNDArray<float, 2> a({2, 2});
CheckEqual(a, {{0.0f, 0.0f}, {0.0f, 0.0f}});
a[{0}] = {1.0f, 2.0f};
a[{1}] = {3.0f, 4.0f};
CheckEqual(a, {{1.0f, 2.0f}, {3.0f, 4.0f}});
}
TEST(AlignedAllocatorTest, AlignedNDArrayTruncate) {
AlignedNDArray<size_t, 4> a({8, 8, 8, 8});
const size_t last_axis_memory_shape = a.memory_shape()[3];
const auto compute_value = [&](const std::array<size_t, 4>& index) {
return index[0] * 8 * 8 * 8 + index[1] * 8 * 8 + index[2] * 8 * 8 +
index[3];
};
for (size_t axis0 = 0; axis0 < a.shape()[0]; ++axis0) {
for (size_t axis1 = 0; axis1 < a.shape()[1]; ++axis1) {
for (size_t axis2 = 0; axis2 < a.shape()[2]; ++axis2) {
for (size_t axis3 = 0; axis3 < a.shape()[3]; ++axis3) {
a[{axis0, axis1, axis2}][axis3] =
compute_value({axis0, axis1, axis2, axis3});
}
}
}
}
const auto verify_values = [&](const AlignedNDArray<size_t, 4>& array) {
for (size_t axis0 = 0; axis0 < array.shape()[0]; ++axis0) {
for (size_t axis1 = 0; axis1 < array.shape()[1]; ++axis1) {
for (size_t axis2 = 0; axis2 < array.shape()[2]; ++axis2) {
for (size_t axis3 = 0; axis3 < array.shape()[3]; ++axis3) {
EXPECT_EQ((array[{axis0, axis1, axis2}][axis3]),
(compute_value({axis0, axis1, axis2, axis3})));
}
}
}
}
};
a.truncate({7, 7, 7, 7});
EXPECT_EQ(a.shape()[0], 7);
EXPECT_EQ(a.shape()[1], 7);
EXPECT_EQ(a.shape()[2], 7);
EXPECT_EQ(a.shape()[3], 7);
EXPECT_EQ(a.memory_shape()[0], 8);
EXPECT_EQ(a.memory_shape()[1], 8);
EXPECT_EQ(a.memory_shape()[2], 8);
EXPECT_EQ(a.memory_shape()[3], last_axis_memory_shape);
verify_values(a);
a.truncate({6, 5, 4, 3});
EXPECT_EQ(a.shape()[0], 6);
EXPECT_EQ(a.shape()[1], 5);
EXPECT_EQ(a.shape()[2], 4);
EXPECT_EQ(a.shape()[3], 3);
EXPECT_EQ(a.memory_shape()[0], 8);
EXPECT_EQ(a.memory_shape()[1], 8);
EXPECT_EQ(a.memory_shape()[2], 8);
EXPECT_EQ(a.memory_shape()[3], last_axis_memory_shape);
verify_values(a);
}
} // namespace
} // namespace hwy