Skip to content

Commit 51a2f8b

Browse files
committed
Decouple import/export encodings from core names
This commit decouples the string encodings listed for imports/exports from their core wasm names to instead being registered with WIT-level constructs instead. Previously the parsing phase of a module would register a string encoding for core wasm import/export names but this subverted the logic of validation where detection of how exactly an import lines up with WIT-level items is determined. The goal of this commit is to decouple this relation. Worlds are encoding into custom sections with a known string encoding for all imports/exports of that world. This can possibly differ for different parts of an application to theoretically enable one interface to be imported with UTF-8 and another with UTF-16. This means that encodings are tracked per-import/export rather than per-world. Previously this process would assume that there is a single name for an import's/export's encoding but with new detection and names coming down the line this is no longer going to be the case. For example with the new names in WebAssembly/component-model#378 there are new names to be supported meaning that there's not one single name to register encodings with. To help bridge this gap the abstraction here is changed to where metadata for a module records string encodings on a WIT level, for example per WIT import/export, instead of per core wasm import/export. Then during encoding of a component the WIT level constructs are matched up instead of the core names to determine the string encoding in the lift/lower operation. The end goal is that the connection between core wasm names and WIT names continues to be decoupled where validation is the only location concerned about this.
1 parent 464aeab commit 51a2f8b

File tree

4 files changed

+162
-117
lines changed

4 files changed

+162
-117
lines changed

crates/wit-component/src/encoding.rs

+34-26
Original file line numberDiff line numberDiff line change
@@ -655,13 +655,19 @@ impl<'a> EncodingState<'a> {
655655
.root_import_type_encoder(None)
656656
.encode_func_type(resolve, func)?;
657657
let core_name = world_func_core_names[&func.name];
658-
let idx = self.encode_lift(module, &core_name, None, func, ty)?;
658+
let idx = self.encode_lift(module, &core_name, export_name, func, ty)?;
659659
self.component
660660
.export(&export_string, ComponentExportKind::Func, idx, None);
661661
}
662662
WorldItem::Interface { id, .. } => {
663663
let core_names = interface_func_core_names.get(id);
664-
self.encode_interface_export(&export_string, module, *id, core_names)?;
664+
self.encode_interface_export(
665+
&export_string,
666+
module,
667+
export_name,
668+
*id,
669+
core_names,
670+
)?;
665671
}
666672
WorldItem::Type(_) => unreachable!(),
667673
}
@@ -674,6 +680,7 @@ impl<'a> EncodingState<'a> {
674680
&mut self,
675681
export_name: &str,
676682
module: CustomModule<'_>,
683+
key: &WorldKey,
677684
export: InterfaceId,
678685
interface_func_core_names: Option<&IndexMap<&str, &str>>,
679686
) -> Result<()> {
@@ -691,9 +698,7 @@ impl<'a> EncodingState<'a> {
691698
for (_, func) in &resolve.interfaces[export].functions {
692699
let core_name = interface_func_core_names.unwrap()[func.name.as_str()];
693700
let ty = root.encode_func_type(resolve, func)?;
694-
let func_index = root
695-
.state
696-
.encode_lift(module, &core_name, Some(export), func, ty)?;
701+
let func_index = root.state.encode_lift(module, &core_name, key, func, ty)?;
697702
imports.push((
698703
import_func_name(func),
699704
ComponentExportKind::Func,
@@ -986,7 +991,7 @@ impl<'a> EncodingState<'a> {
986991
&mut self,
987992
module: CustomModule<'_>,
988993
core_name: &str,
989-
interface: Option<InterfaceId>,
994+
key: &WorldKey,
990995
func: &Function,
991996
ty: u32,
992997
) -> Result<u32> {
@@ -997,16 +1002,19 @@ impl<'a> EncodingState<'a> {
9971002

9981003
let options = RequiredOptions::for_export(resolve, func);
9991004

1000-
let encoding = metadata.export_encodings[core_name];
1005+
let encoding = metadata
1006+
.export_encodings
1007+
.get(resolve, key, &func.name)
1008+
.unwrap();
10011009
let exports = self.info.exports_for(module);
10021010
let realloc_index = exports
1003-
.export_realloc_for(interface, func)
1011+
.export_realloc_for(key, func)
10041012
.map(|name| self.core_alias_export(instance_index, name, ExportKind::Func));
10051013
let mut options = options
10061014
.into_iter(encoding, self.memory_index, realloc_index)?
10071015
.collect::<Vec<_>>();
10081016

1009-
if let Some(post_return) = exports.post_return(interface, func) {
1017+
if let Some(post_return) = exports.post_return(key, func) {
10101018
let post_return = self.core_alias_export(instance_index, post_return, ExportKind::Func);
10111019
options.push(CanonicalOption::PostReturn(post_return));
10121020
}
@@ -1379,7 +1387,7 @@ impl<'a> EncodingState<'a> {
13791387
log::trace!("attempting to materialize import of `{module}::{field}` for {for_module:?}");
13801388
let resolve = &self.info.encoder.metadata.resolve;
13811389
let name_tmp;
1382-
let (key, name) = match import {
1390+
let (key, name, interface_key) = match import {
13831391
// Main module dependencies on an adapter in use are done with an
13841392
// indirection here, so load the shim function and use that.
13851393
Import::AdapterExport(_) => {
@@ -1446,18 +1454,17 @@ impl<'a> EncodingState<'a> {
14461454
// through to the code below. This is where these are connected to a
14471455
// WIT `ImportedInterface` one way or another with the name that was
14481456
// detected during validation.
1449-
Import::ImportedResourceDrop(key, id) => {
1457+
Import::ImportedResourceDrop(key, iface, id) => {
14501458
let ty = &resolve.types[*id];
14511459
let name = ty.name.as_ref().unwrap();
14521460
name_tmp = format!("{RESOURCE_DROP}{name}");
1453-
(key.as_ref(), &name_tmp)
1461+
(key, &name_tmp, iface.map(|_| resolve.name_world_key(key)))
14541462
}
1455-
Import::WorldFunc(name) => (None, name),
1456-
Import::InterfaceFunc(key, _, name) => (Some(key), name),
1463+
Import::WorldFunc(key, name) => (key, name, None),
1464+
Import::InterfaceFunc(key, _, name) => (key, name, Some(resolve.name_world_key(key))),
14571465
};
14581466

1459-
let interface = key.map(|key| resolve.name_world_key(key));
1460-
let import = &self.info.import_map[&interface];
1467+
let import = &self.info.import_map[&interface_key];
14611468
let (index, _, lowering) = import.lowerings.get_full(name).unwrap();
14621469
let metadata = self.info.module_metadata_for(for_module);
14631470

@@ -1480,12 +1487,12 @@ impl<'a> EncodingState<'a> {
14801487
// created, so the specific export is loaded here and used as an
14811488
// import.
14821489
Lowering::Indirect { .. } => {
1483-
let encoding = metadata.import_encodings[&(module.to_string(), field.to_string())];
1490+
let encoding = metadata.import_encodings.get(resolve, key, name).unwrap();
14841491
self.core_alias_export(
14851492
self.shim_instance_index
14861493
.expect("shim should be instantiated"),
14871494
&shims.shims[&ShimKind::IndirectLowering {
1488-
interface: interface.clone(),
1495+
interface: interface_key,
14891496
index,
14901497
realloc: for_module,
14911498
encoding,
@@ -1696,7 +1703,7 @@ impl<'a> Shims<'a> {
16961703
let resolve = &world.encoder.metadata.resolve;
16971704

16981705
for (module, field, import) in module_imports.imports() {
1699-
let (key, name) = match import {
1706+
let (key, name, interface_key) = match import {
17001707
// These imports don't require shims, they can be satisfied
17011708
// as-needed when required.
17021709
Import::ImportedResourceDrop(..)
@@ -1746,11 +1753,12 @@ impl<'a> Shims<'a> {
17461753
// WIT-level functions may require an indirection, so yield some
17471754
// metadata out of this `match` to the loop below to figure that
17481755
// out.
1749-
Import::InterfaceFunc(key, _, name) => (Some(key), name),
1750-
Import::WorldFunc(name) => (None, name),
1756+
Import::InterfaceFunc(key, _, name) => {
1757+
(key, name, Some(resolve.name_world_key(key)))
1758+
}
1759+
Import::WorldFunc(key, name) => (key, name, None),
17511760
};
1752-
let key = key.map(|key| resolve.name_world_key(key));
1753-
let interface = &world.import_map[&key];
1761+
let interface = &world.import_map[&interface_key];
17541762
let (index, _, lowering) = interface.lowerings.get_full(name).unwrap();
17551763
let shim_name = self.shims.len().to_string();
17561764
match lowering {
@@ -1760,9 +1768,9 @@ impl<'a> Shims<'a> {
17601768
log::debug!(
17611769
"shim {shim_name} is import `{module}::{field}` lowering {index} `{name}`",
17621770
);
1763-
let encoding = *metadata
1771+
let encoding = metadata
17641772
.import_encodings
1765-
.get(&(module.to_string(), field.to_string()))
1773+
.get(resolve, key, name)
17661774
.ok_or_else(|| {
17671775
anyhow::anyhow!(
17681776
"missing component metadata for import of \
@@ -1774,7 +1782,7 @@ impl<'a> Shims<'a> {
17741782
debug_name: format!("indirect-{module}-{field}"),
17751783
options: *options,
17761784
kind: ShimKind::IndirectLowering {
1777-
interface: key,
1785+
interface: interface_key,
17781786
index,
17791787
realloc: for_module,
17801788
encoding,

crates/wit-component/src/encoding/world.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ impl<'a> ComponentWorld<'a> {
250250
.chain(self.info.imports.imports())
251251
{
252252
match import {
253-
Import::WorldFunc(name) => {
253+
Import::WorldFunc(_, name) => {
254254
required
255255
.interface_funcs
256256
.entry(None)
@@ -264,7 +264,7 @@ impl<'a> ComponentWorld<'a> {
264264
.or_default()
265265
.insert(name);
266266
}
267-
Import::ImportedResourceDrop(_, id) => {
267+
Import::ImportedResourceDrop(_, _, id) => {
268268
required.resource_drops.insert(*id);
269269
}
270270
_ => {}

crates/wit-component/src/metadata.rs

+105-67
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
//! The dual of `encode` is the `decode_custom_section` fucntion which decodes
4242
//! the three arguments originally passed to `encode`.
4343
44-
use crate::validation::BARE_FUNC_MODULE_NAME;
4544
use crate::{DecodedWasm, StringEncoding};
4645
use anyhow::{bail, Context, Result};
4746
use indexmap::{IndexMap, IndexSet};
@@ -112,11 +111,105 @@ impl Default for Bindgen {
112111
pub struct ModuleMetadata {
113112
/// Per-function options imported into the core wasm module, currently only
114113
/// related to string encoding.
115-
pub import_encodings: IndexMap<(String, String), StringEncoding>,
114+
pub import_encodings: EncodingMap,
116115

117116
/// Per-function options exported from the core wasm module, currently only
118117
/// related to string encoding.
119-
pub export_encodings: IndexMap<String, StringEncoding>,
118+
pub export_encodings: EncodingMap,
119+
}
120+
121+
/// Internal map that keeps track of encodings for various world imports and
122+
/// exports.
123+
///
124+
/// Stored in [`ModuleMetadata`].
125+
#[derive(Default)]
126+
pub struct EncodingMap {
127+
/// A map of an "identifying string" for world items to what string
128+
/// encoding the import or export is using.
129+
///
130+
/// The keys of this map are created by `EncodingMap::key` and are
131+
/// specifically chosen to be able to be looked up during both insertion and
132+
/// fetching. Note that in particular this map does not use `*Id` types such
133+
/// as `InterfaceId` from `wit_parser`. This is due to the fact that during
134+
/// world merging new interfaces are created for named imports (e.g. `import
135+
/// x: interface { ... }`) as inline interfaces are copied from one world to
136+
/// another. Additionally during world merging different interfaces at the
137+
/// same version may be deduplicated.
138+
///
139+
/// For these reasons a string-based key is chosen to avoid juggling IDs
140+
/// through the world merging process. Additionally versions are chopped off
141+
/// for now to help with a problem such as:
142+
///
143+
/// * The main module imports a:b/[email protected]
144+
/// * An adapter imports a:b/[email protected]
145+
/// * The final world uses a:b/[email protected], but the main module has no
146+
/// encoding listed for that exact item.
147+
///
148+
/// By chopping off versions this is able to get everything registered
149+
/// correctly even in the fact of merging interfaces and worlds.
150+
encodings: IndexMap<String, StringEncoding>,
151+
}
152+
153+
impl EncodingMap {
154+
fn insert_all(
155+
&mut self,
156+
resolve: &Resolve,
157+
set: &IndexMap<WorldKey, WorldItem>,
158+
encoding: StringEncoding,
159+
) {
160+
for (name, item) in set {
161+
match item {
162+
WorldItem::Function(func) => {
163+
let key = self.key(resolve, name, &func.name);
164+
self.encodings.insert(key, encoding);
165+
}
166+
WorldItem::Interface { id, .. } => {
167+
for (func, _) in resolve.interfaces[*id].functions.iter() {
168+
let key = self.key(resolve, name, func);
169+
self.encodings.insert(key, encoding);
170+
}
171+
}
172+
WorldItem::Type(_) => {}
173+
}
174+
}
175+
}
176+
177+
/// Looks up the encoding of the function `func` which is scoped under `key`
178+
/// in the world in question.
179+
pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option<StringEncoding> {
180+
let key = self.key(resolve, key, func);
181+
self.encodings.get(&key).copied()
182+
}
183+
184+
fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String {
185+
format!(
186+
"{}/{func}",
187+
match key {
188+
WorldKey::Name(name) => name.to_string(),
189+
WorldKey::Interface(id) => {
190+
let iface = &resolve.interfaces[*id];
191+
let pkg = &resolve.packages[iface.package.unwrap()];
192+
format!(
193+
"{}:{}/{}",
194+
pkg.name.namespace,
195+
pkg.name.name,
196+
iface.name.as_ref().unwrap()
197+
)
198+
}
199+
}
200+
)
201+
}
202+
203+
fn merge(&mut self, other: EncodingMap) -> Result<()> {
204+
for (key, encoding) in other.encodings {
205+
if let Some(prev) = self.encodings.insert(key.clone(), encoding) {
206+
if prev != encoding {
207+
bail!("conflicting string encodings specified for `{key}`");
208+
}
209+
}
210+
}
211+
Ok(())
212+
}
120213
}
121214

122215
/// This function will parse the core `wasm` binary given as input and return a
@@ -313,38 +406,18 @@ impl Bindgen {
313406
producers,
314407
} = other;
315408

316-
let world = self
409+
let remap = self
317410
.resolve
318411
.merge(resolve)
319-
.context("failed to merge WIT package sets together")?
320-
.map_world(world, None)?;
412+
.context("failed to merge WIT package sets together")?;
413+
let world = remap.map_world(world, None)?;
321414
let exports = self.resolve.worlds[world].exports.keys().cloned().collect();
322415
self.resolve
323416
.merge_worlds(world, self.world)
324417
.context("failed to merge worlds from two documents")?;
325418

326-
for (name, encoding) in export_encodings {
327-
let prev = self
328-
.metadata
329-
.export_encodings
330-
.insert(name.clone(), encoding);
331-
if let Some(prev) = prev {
332-
if prev != encoding {
333-
bail!("conflicting string encodings specified for export `{name}`");
334-
}
335-
}
336-
}
337-
for ((module, name), encoding) in import_encodings {
338-
let prev = self
339-
.metadata
340-
.import_encodings
341-
.insert((module.clone(), name.clone()), encoding);
342-
if let Some(prev) = prev {
343-
if prev != encoding {
344-
bail!("conflicting string encodings specified for import `{module}::{name}`");
345-
}
346-
}
347-
}
419+
self.metadata.import_encodings.merge(import_encodings)?;
420+
self.metadata.export_encodings.merge(export_encodings)?;
348421
if let Some(producers) = producers {
349422
if let Some(mine) = &mut self.producers {
350423
mine.merge(&producers);
@@ -364,45 +437,10 @@ impl ModuleMetadata {
364437
let mut ret = ModuleMetadata::default();
365438

366439
let world = &resolve.worlds[world];
367-
for (name, item) in world.imports.iter() {
368-
let name = resolve.name_world_key(name);
369-
match item {
370-
WorldItem::Function(_) => {
371-
let prev = ret
372-
.import_encodings
373-
.insert((BARE_FUNC_MODULE_NAME.to_string(), name.clone()), encoding);
374-
assert!(prev.is_none());
375-
}
376-
WorldItem::Interface { id, .. } => {
377-
for (func, _) in resolve.interfaces[*id].functions.iter() {
378-
let prev = ret
379-
.import_encodings
380-
.insert((name.clone(), func.clone()), encoding);
381-
assert!(prev.is_none());
382-
}
383-
}
384-
WorldItem::Type(_) => {}
385-
}
386-
}
387-
388-
for (name, item) in world.exports.iter() {
389-
let name = resolve.name_world_key(name);
390-
match item {
391-
WorldItem::Function(func) => {
392-
let name = func.core_export_name(None).into_owned();
393-
let prev = ret.export_encodings.insert(name.clone(), encoding);
394-
assert!(prev.is_none());
395-
}
396-
WorldItem::Interface { id, .. } => {
397-
for (_, func) in resolve.interfaces[*id].functions.iter() {
398-
let name = func.core_export_name(Some(&name)).into_owned();
399-
let prev = ret.export_encodings.insert(name, encoding);
400-
assert!(prev.is_none());
401-
}
402-
}
403-
WorldItem::Type(_) => {}
404-
}
405-
}
440+
ret.export_encodings
441+
.insert_all(resolve, &world.exports, encoding);
442+
ret.import_encodings
443+
.insert_all(resolve, &world.imports, encoding);
406444

407445
ret
408446
}

0 commit comments

Comments
 (0)