Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions runtime/core/device_memory_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/runtime/core/device_memory_buffer.h>

namespace executorch::runtime {

Result<DeviceMemoryBuffer> DeviceMemoryBuffer::create(
size_t size,
etensor::DeviceType type,
etensor::DeviceIndex index) {
DeviceAllocator* allocator = get_device_allocator(type);
if (allocator == nullptr) {
ET_LOG(
Error,
"No device allocator registered for device type %d",
static_cast<int>(type));
return Error::NotFound;
}

auto result = allocator->allocate(size, index);
if (!result.ok()) {
return result.error();
}

return DeviceMemoryBuffer(result.get(), size, allocator, index);
}

} // namespace executorch::runtime
126 changes: 126 additions & 0 deletions runtime/core/device_memory_buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <cstddef>
#include <cstdint>

#include <executorch/runtime/core/device_allocator.h>
#include <executorch/runtime/core/result.h>
#include <executorch/runtime/core/span.h>

namespace executorch::runtime {

/**
* RAII wrapper that owns a single device memory allocation.
*
* On destruction, calls DeviceAllocator::deallocate() to free the memory.
* This mirrors the role of std::vector<uint8_t> for CPU planned buffers,
* but for device memory (CUDA, etc.).
*
* Move-only: cannot be copied, but can be moved to transfer ownership.
*/
class DeviceMemoryBuffer final {
public:
/**
* Creates a DeviceMemoryBuffer by allocating device memory.
*
* Looks up the DeviceAllocator for the given device type via the
* DeviceAllocatorRegistry. If no allocator is registered for the type,
* returns Error::NotFound.
*
* @param size Number of bytes to allocate.
* @param type The device type (e.g., CUDA).
* @param index The device index (e.g., 0 for cuda:0).
* @return A Result containing the DeviceMemoryBuffer on success, or an error.
*/
static Result<DeviceMemoryBuffer> create(
size_t size,
etensor::DeviceType type,
etensor::DeviceIndex index = 0);

DeviceMemoryBuffer() = default;

~DeviceMemoryBuffer() {
if (ptr_ != nullptr && allocator_ != nullptr) {
allocator_->deallocate(ptr_, device_index_);
}
}

// Move constructor: transfer ownership.
DeviceMemoryBuffer(DeviceMemoryBuffer&& other) noexcept
: ptr_(other.ptr_),
size_(other.size_),
allocator_(other.allocator_),
device_index_(other.device_index_) {
other.ptr_ = nullptr;
other.size_ = 0;
other.allocator_ = nullptr;
}

// Move assignment: release current, take ownership.
DeviceMemoryBuffer& operator=(DeviceMemoryBuffer&& other) noexcept {
if (this != &other) {
if (ptr_ != nullptr && allocator_ != nullptr) {
allocator_->deallocate(ptr_, device_index_);
}
ptr_ = other.ptr_;
size_ = other.size_;
allocator_ = other.allocator_;
device_index_ = other.device_index_;
other.ptr_ = nullptr;
other.size_ = 0;
other.allocator_ = nullptr;
}
return *this;
}

// Non-copyable.
DeviceMemoryBuffer(const DeviceMemoryBuffer&) = delete;
DeviceMemoryBuffer& operator=(const DeviceMemoryBuffer&) = delete;

/// Returns the device pointer, or nullptr if empty/moved-from.
void* data() const {
return ptr_;
}

/// Returns the size in bytes of the allocation.
size_t size() const {
return size_;
}

/**
* Returns a Span<uint8_t> wrapping the device pointer.
*
* This is intended for use with HierarchicalAllocator, which only performs
* pointer arithmetic on the span data and never dereferences it. Device
* pointers are valid for pointer arithmetic from the CPU side.
*/
Span<uint8_t> as_span() const {
return {static_cast<uint8_t*>(ptr_), size_};
}

private:
DeviceMemoryBuffer(
void* ptr,
size_t size,
DeviceAllocator* allocator,
etensor::DeviceIndex device_index)
: ptr_(ptr),
size_(size),
allocator_(allocator),
device_index_(device_index) {}

void* ptr_ = nullptr;
size_t size_ = 0;
DeviceAllocator* allocator_ = nullptr;
etensor::DeviceIndex device_index_ = 0;
};

} // namespace executorch::runtime
1 change: 1 addition & 0 deletions runtime/core/portable_type/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def define_common_targets():
"//executorch/backends/...",
"//executorch/extension/fb/dynamic_shim/...",
"//executorch/kernels/portable/cpu/...",
"//executorch/runtime/core/...",
"//executorch/runtime/core/exec_aten/...",
"//executorch/runtime/core/portable_type/test/...",
],
Expand Down
27 changes: 27 additions & 0 deletions runtime/core/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,33 @@ def define_common_targets():
visibility = ["//executorch/..."],
)

runtime.cxx_library(
name = "device_allocator",
srcs = ["device_allocator.cpp"],
exported_headers = [
"device_allocator.h",
],
exported_deps = [
":core",
"//executorch/runtime/core/portable_type:portable_type",
],
deps = [
"//executorch/runtime/platform:platform",
],
visibility = ["PUBLIC"],
)

runtime.cxx_library(
name = "device_memory_buffer",
srcs = ["device_memory_buffer.cpp"],
exported_headers = ["device_memory_buffer.h"],
exported_deps = [
":core",
":device_allocator",
],
visibility = ["PUBLIC"],
)

runtime.cxx_library(
name = "tag",
srcs = ["tag.cpp"],
Expand Down
169 changes: 169 additions & 0 deletions runtime/core/test/device_memory_buffer_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/runtime/core/device_memory_buffer.h>

#include <gtest/gtest.h>

#include <executorch/runtime/platform/runtime.h>

using executorch::runtime::DeviceAllocator;
using executorch::runtime::DeviceMemoryBuffer;
using executorch::runtime::Error;
using executorch::runtime::Result;
using executorch::runtime::get_device_allocator;
using executorch::runtime::register_device_allocator;
using executorch::runtime::etensor::DeviceIndex;
using executorch::runtime::etensor::DeviceType;

/**
* A mock DeviceAllocator for testing DeviceMemoryBuffer.
* Returns pointers into a local buffer and tracks call counts.
*/
class MockAllocator : public DeviceAllocator {
public:
explicit MockAllocator(DeviceType type) : type_(type) {}

Result<void*> allocate(size_t nbytes, DeviceIndex index) override {
allocate_count_++;
last_allocate_size_ = nbytes;
return static_cast<void*>(buffer_);
}

void deallocate(void* ptr, DeviceIndex index) override {
deallocate_count_++;
last_deallocate_ptr_ = ptr;
}

Error copy_host_to_device(
void* dst,
const void* src,
size_t nbytes,
DeviceIndex index) override {
return Error::Ok;
}

Error copy_device_to_host(
void* dst,
const void* src,
size_t nbytes,
DeviceIndex index) override {
return Error::Ok;
}

DeviceType device_type() const override {
return type_;
}

int allocate_count_ = 0;
int deallocate_count_ = 0;
size_t last_allocate_size_ = 0;
void* last_deallocate_ptr_ = nullptr;
uint8_t buffer_[256] = {};

private:
DeviceType type_;
};

// Global mock registered once before all tests run.
static MockAllocator g_mock_cuda(DeviceType::CUDA);

class DeviceMemoryBufferTest : public ::testing::Test {
protected:
static void SetUpTestSuite() {
executorch::runtime::runtime_init();
register_device_allocator(DeviceType::CUDA, &g_mock_cuda);
}

void SetUp() override {
// Reset counters before each test.
g_mock_cuda.allocate_count_ = 0;
g_mock_cuda.deallocate_count_ = 0;
g_mock_cuda.last_allocate_size_ = 0;
g_mock_cuda.last_deallocate_ptr_ = nullptr;
}
};

TEST_F(DeviceMemoryBufferTest, DefaultConstructedIsEmpty) {
DeviceMemoryBuffer buf;
EXPECT_EQ(buf.data(), nullptr);
EXPECT_EQ(buf.size(), 0);

auto span = buf.as_span();
EXPECT_EQ(span.data(), nullptr);
EXPECT_EQ(span.size(), 0);
}

TEST_F(DeviceMemoryBufferTest, CreateAllocatesAndDestructorDeallocates) {
{
auto result = DeviceMemoryBuffer::create(1024, DeviceType::CUDA, 0);
ASSERT_TRUE(result.ok());

auto buf = std::move(result.get());
EXPECT_NE(buf.data(), nullptr);
EXPECT_EQ(buf.size(), 1024);
EXPECT_EQ(g_mock_cuda.allocate_count_, 1);
EXPECT_EQ(g_mock_cuda.last_allocate_size_, 1024);
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
}
EXPECT_EQ(g_mock_cuda.deallocate_count_, 1);
EXPECT_EQ(g_mock_cuda.last_deallocate_ptr_, g_mock_cuda.buffer_);
}

TEST_F(DeviceMemoryBufferTest, CreateFailsWithNoRegisteredAllocator) {
auto result = DeviceMemoryBuffer::create(512, DeviceType::CPU, 0);
EXPECT_FALSE(result.ok());
EXPECT_EQ(result.error(), Error::NotFound);
}

TEST_F(DeviceMemoryBufferTest, MoveConstructorTransfersOwnership) {
auto result = DeviceMemoryBuffer::create(256, DeviceType::CUDA, 0);
ASSERT_TRUE(result.ok());
auto original = std::move(result.get());
void* original_ptr = original.data();

DeviceMemoryBuffer moved(std::move(original));

EXPECT_EQ(original.data(), nullptr);
EXPECT_EQ(original.size(), 0);
EXPECT_EQ(moved.data(), original_ptr);
EXPECT_EQ(moved.size(), 256);
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
}

TEST_F(DeviceMemoryBufferTest, MoveAssignmentTransfersOwnership) {
auto result = DeviceMemoryBuffer::create(128, DeviceType::CUDA, 0);
ASSERT_TRUE(result.ok());
auto original = std::move(result.get());
void* original_ptr = original.data();

DeviceMemoryBuffer target;
target = std::move(original);

EXPECT_EQ(original.data(), nullptr);
EXPECT_EQ(target.data(), original_ptr);
EXPECT_EQ(target.size(), 128);
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
}

TEST_F(DeviceMemoryBufferTest, DestructorNoOpForDefaultConstructed) {
{
DeviceMemoryBuffer buf;
}
EXPECT_EQ(g_mock_cuda.deallocate_count_, 0);
}

TEST_F(DeviceMemoryBufferTest, AsSpanWrapsDevicePointer) {
auto result = DeviceMemoryBuffer::create(2048, DeviceType::CUDA, 0);
ASSERT_TRUE(result.ok());
auto buf = std::move(result.get());

auto span = buf.as_span();
EXPECT_EQ(span.data(), static_cast<uint8_t*>(buf.data()));
EXPECT_EQ(span.size(), 2048);
}
8 changes: 8 additions & 0 deletions runtime/core/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ def define_common_targets():
TARGETS and BUCK files that call this function.
"""

runtime.cxx_test(
name = "device_memory_buffer_test",
srcs = ["device_memory_buffer_test.cpp"],
deps = [
"//executorch/runtime/core:device_memory_buffer",
],
)

runtime.cxx_test(
name = "span_test",
srcs = ["span_test.cpp"],
Expand Down
Loading