Skip to content

Commit dc02cfe

Browse files
authored
[XNNPACK][Weights Cache] Initial Weights Cache Design with NamedDataMap
Differential Revision: D70885917 Pull Request resolved: #9154
1 parent c05fd47 commit dc02cfe

File tree

6 files changed

+705
-0
lines changed

6 files changed

+705
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
10+
#include <executorch/runtime/core/error.h>
11+
#include <executorch/runtime/core/memory_allocator.h>
12+
#include <sys/stat.h>
13+
#include <xnnpack.h>
14+
#include <string>
15+
#include <vector>
16+
17+
namespace executorch {
18+
namespace backends {
19+
namespace xnnpack {
20+
namespace delegate {
21+
22+
using executorch::runtime::MemoryAllocator;
23+
using executorch::runtime::NamedDataMap;
24+
25+
XNNWeightsCache::XNNWeightsCache() {
26+
weights_cache_.context = this;
27+
weights_cache_.look_up = (size_t(*)(
28+
void*, const xnn_weights_cache_look_up_key*))XNNWeightsCache::look_up;
29+
weights_cache_.reserve_space =
30+
(void* (*)(void*, size_t))XNNWeightsCache::reserve_space;
31+
weights_cache_.look_up_or_insert =
32+
(size_t(*)(void*, const xnn_weights_cache_look_up_key*, void*, size_t))
33+
XNNWeightsCache::look_up_or_insert;
34+
weights_cache_.is_finalized = (bool (*)(void*))XNNWeightsCache::is_finalized;
35+
weights_cache_.offset_to_addr =
36+
(void* (*)(void*, size_t))XNNWeightsCache::offset_to_addr;
37+
weights_cache_.delete_cache =
38+
(enum xnn_status(*)(void*))XNNWeightsCache::delete_cache;
39+
}
40+
41+
Error XNNWeightsCache::initialize_for_runtime(
42+
MemoryAllocator* runtime_allocator,
43+
const NamedDataMap* named_data_map) {
44+
runtime_allocator_ = runtime_allocator;
45+
named_data_map_ = named_data_map;
46+
is_finalized_ = false;
47+
48+
return Error::Ok;
49+
}
50+
51+
Result<std::vector<std::string>> XNNWeightsCache::finalize_for_runtime() {
52+
is_finalized_ = true;
53+
54+
// All data has been packed by create_runtime
55+
// so we clear the unpacked data as it is no longer needed
56+
for (FreeableBuffer& buffer : unpacked_data_) {
57+
buffer.Free();
58+
}
59+
unpacked_data_.clear();
60+
unpacked_data_to_name_.clear();
61+
62+
std::vector<std::string> packed_data_names;
63+
// update the reference count of all the packed data
64+
// used by this runtime
65+
for (auto& entry : name_to_packed_data_metadata_) {
66+
if (entry.second.in_current_runtime) {
67+
entry.second.ref_count++;
68+
entry.second.in_current_runtime = false;
69+
packed_data_names.push_back(entry.first);
70+
}
71+
}
72+
73+
return packed_data_names;
74+
}
75+
76+
Result<const uint8_t*> XNNWeightsCache::load_unpacked_data(
77+
const std::string& name) {
78+
Result<FreeableBuffer> named_data = named_data_map_->get_data(name.c_str());
79+
if (!named_data.ok()) {
80+
ET_LOG(Error, "Failed to load constant data for key %s", name.c_str());
81+
return Error::InvalidExternalData;
82+
}
83+
const uint8_t* data_pointer =
84+
static_cast<const uint8_t*>(named_data.get().data());
85+
unpacked_data_.push_back(std::move(named_data.get()));
86+
unpacked_data_to_name_[data_pointer] = name;
87+
88+
return data_pointer;
89+
}
90+
91+
Error XNNWeightsCache::delete_packed_data(
92+
const std::vector<std::string>& packed_data_names) {
93+
if (!is_finalized_) {
94+
ET_LOG(
95+
Error,
96+
"Error, attempted to delete packed data from the cache but the cache is not finalized");
97+
return Error::InvalidArgument;
98+
}
99+
for (const std::string& name : packed_data_names) {
100+
auto entry = name_to_packed_data_metadata_.find(name);
101+
if (entry == name_to_packed_data_metadata_.end()) {
102+
ET_LOG(
103+
Error,
104+
"Error, attempted to deleted packed data: %s, from the cache but it wasn't found",
105+
name.c_str());
106+
return Error::InvalidArgument;
107+
} else {
108+
entry->second.ref_count--;
109+
if (entry->second.ref_count == 0) {
110+
void* packed_data_ptr = packed_data_ptrs_[entry->second.offset];
111+
// Erase the key/value from the map frees the pointer holding the packed
112+
// data
113+
packed_pointer_to_container_.erase(packed_data_ptr);
114+
// remove the pointer from the packed_data_ptrs_
115+
packed_data_ptrs_[entry->second.offset] = nullptr;
116+
// Erase the name to packed metadata entry
117+
name_to_packed_data_metadata_.erase(entry->first);
118+
}
119+
}
120+
}
121+
122+
return Error::Ok;
123+
}
124+
125+
size_t XNNWeightsCache::look_up(
126+
XNNWeightsCache* context,
127+
const xnn_weights_cache_look_up_key* cache_key) {
128+
const void* unpacked_weights_ptr = cache_key->kernel;
129+
const void* unpacked_bias_ptr = cache_key->bias;
130+
auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr);
131+
132+
// Check if weight_pointer has been cached
133+
if (entry == context->unpacked_data_to_name_.end()) {
134+
return SIZE_MAX;
135+
}
136+
137+
std::string weight_bias_name = entry->second;
138+
139+
// Check if bias_pointer has been cached
140+
if (unpacked_bias_ptr != nullptr) {
141+
auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr);
142+
if (bias_entry != context->unpacked_data_to_name_.end()) {
143+
weight_bias_name.append(bias_entry->second);
144+
}
145+
}
146+
147+
// check if weight_bias_name has been packed already
148+
auto packed_weight_entry =
149+
context->name_to_packed_data_metadata_.find(weight_bias_name);
150+
if (packed_weight_entry == context->name_to_packed_data_metadata_.end()) {
151+
return SIZE_MAX;
152+
}
153+
packed_weight_entry->second.in_current_runtime = true;
154+
155+
return packed_weight_entry->second.offset;
156+
}
157+
158+
void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) {
159+
// MemoryAllocator* allocator = context->runtime_allocator_;
160+
// void* reserved_pointer = allocator->allocate(n,
161+
// context->kPackedAllocationAlignment);
162+
163+
// return reserved_pointer;
164+
std::string data_container;
165+
data_container.resize(n + context->kPackedAllocationAlignment);
166+
void* maybe_aligned_space = data_container.data();
167+
void* aligned_space = (void*)((intptr_t)maybe_aligned_space + 64 -
168+
(intptr_t)maybe_aligned_space % 64);
169+
170+
context->packed_pointer_to_container_[aligned_space] =
171+
std::move(data_container);
172+
return aligned_space;
173+
}
174+
175+
size_t XNNWeightsCache::look_up_or_insert(
176+
XNNWeightsCache* context,
177+
const xnn_weights_cache_look_up_key* cache_key,
178+
void* ptr,
179+
size_t size) {
180+
size_t offset = context->look_up(context, cache_key);
181+
182+
if (offset != SIZE_MAX) {
183+
void* saved_ptr = context->offset_to_addr(context, offset);
184+
if (0 == memcmp(ptr, saved_ptr, size)) {
185+
return offset;
186+
}
187+
// Failure, cache is out of date
188+
return SIZE_MAX;
189+
}
190+
191+
// Add to Cache if it is not finalized
192+
size_t next_offset = context->packed_data_ptrs_.size();
193+
auto entry = context->unpacked_data_to_name_.find(cache_key->kernel);
194+
195+
// Check if weight_pointer has been cached
196+
if (entry != context->unpacked_data_to_name_.end()) {
197+
std::string weight_bias_name = entry->second;
198+
if (cache_key->bias != nullptr) {
199+
auto bias_entry = context->unpacked_data_to_name_.find(cache_key->bias);
200+
if (bias_entry != context->unpacked_data_to_name_.end()) {
201+
weight_bias_name.append(bias_entry->second);
202+
}
203+
}
204+
PackedDataMeta packed_data_metadata = {
205+
.offset = next_offset,
206+
.ref_count =
207+
0, // ref_count is only incremented after finalizing for runtime
208+
.in_current_runtime = true};
209+
context->name_to_packed_data_metadata_[weight_bias_name] =
210+
packed_data_metadata;
211+
} else {
212+
ET_LOG(
213+
Info,
214+
"Warning: Unpacked weight and bias were not registered with names, "
215+
"this will add new cache entries for packed data and may affect performance.");
216+
}
217+
context->packed_data_ptrs_.push_back(ptr);
218+
219+
return next_offset;
220+
}
221+
222+
bool XNNWeightsCache::is_finalized(XNNWeightsCache* context) {
223+
return context->is_finalized_;
224+
}
225+
226+
void* XNNWeightsCache::offset_to_addr(XNNWeightsCache* context, size_t offset) {
227+
return context->packed_data_ptrs_[offset];
228+
}
229+
230+
enum xnn_status XNNWeightsCache::delete_cache(XNNWeightsCache* context) {
231+
return xnn_status_success;
232+
}
233+
234+
} // namespace delegate
235+
} // namespace xnnpack
236+
} // namespace backends
237+
} // namespace executorch

0 commit comments

Comments
 (0)