Decouple import/export encodings from core names

alexcrichton · alexcrichton · commit 51a2f8be16ed · 2024-09-28T05:05:45.000-07:00
This commit decouples the string encodings listed for imports/exports from their core wasm names to instead being registered with WIT-level constructs instead. Previously the parsing phase of a module would register a string encoding for core wasm import/export names but this subverted the logic of validation where detection of how exactly an import lines up with WIT-level items is determined. The goal of this commit is to decouple this relation. Worlds are encoding into custom sections with a known string encoding for all imports/exports of that world. This can possibly differ for different parts of an application to theoretically enable one interface to be imported with UTF-8 and another with UTF-16. This means that encodings are tracked per-import/export rather than per-world. Previously this process would assume that there is a single name for an import's/export's encoding but with new detection and names coming down the line this is no longer going to be the case. For example with the new names in WebAssembly/component-model#378 there are new names to be supported meaning that there's not one single name to register encodings with. To help bridge this gap the abstraction here is changed to where metadata for a module records string encodings on a WIT level, for example per WIT import/export, instead of per core wasm import/export. Then during encoding of a component the WIT level constructs are matched up instead of the core names to determine the string encoding in the lift/lower operation. The end goal is that the connection between core wasm names and WIT names continues to be decoupled where validation is the only location concerned about this.
diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs
@@ -655,13 +655,19 @@ impl<'a> EncodingState<'a> {
                         .root_import_type_encoder(None)
                         .encode_func_type(resolve, func)?;
                     let core_name = world_func_core_names[&func.name];
-                    let idx = self.encode_lift(module, &core_name, None, func, ty)?;
+                    let idx = self.encode_lift(module, &core_name, export_name, func, ty)?;
                     self.component
                         .export(&export_string, ComponentExportKind::Func, idx, None);
                 }
                 WorldItem::Interface { id, .. } => {
                     let core_names = interface_func_core_names.get(id);
-                    self.encode_interface_export(&export_string, module, *id, core_names)?;
+                    self.encode_interface_export(
+                        &export_string,
+                        module,
+                        export_name,
+                        *id,
+                        core_names,
+                    )?;
                 }
                 WorldItem::Type(_) => unreachable!(),
             }
@@ -674,6 +680,7 @@ impl<'a> EncodingState<'a> {
         &mut self,
         export_name: &str,
         module: CustomModule<'_>,
+        key: &WorldKey,
         export: InterfaceId,
         interface_func_core_names: Option<&IndexMap<&str, &str>>,
     ) -> Result<()> {
@@ -691,9 +698,7 @@ impl<'a> EncodingState<'a> {
         for (_, func) in &resolve.interfaces[export].functions {
             let core_name = interface_func_core_names.unwrap()[func.name.as_str()];
             let ty = root.encode_func_type(resolve, func)?;
-            let func_index = root
-                .state
-                .encode_lift(module, &core_name, Some(export), func, ty)?;
+            let func_index = root.state.encode_lift(module, &core_name, key, func, ty)?;
             imports.push((
                 import_func_name(func),
                 ComponentExportKind::Func,
@@ -986,7 +991,7 @@ impl<'a> EncodingState<'a> {
         &mut self,
         module: CustomModule<'_>,
         core_name: &str,
-        interface: Option<InterfaceId>,
+        key: &WorldKey,
         func: &Function,
         ty: u32,
     ) -> Result<u32> {
@@ -997,16 +1002,19 @@ impl<'a> EncodingState<'a> {
 
         let options = RequiredOptions::for_export(resolve, func);
 
-        let encoding = metadata.export_encodings[core_name];
+        let encoding = metadata
+            .export_encodings
+            .get(resolve, key, &func.name)
+            .unwrap();
         let exports = self.info.exports_for(module);
         let realloc_index = exports
-            .export_realloc_for(interface, func)
+            .export_realloc_for(key, func)
             .map(|name| self.core_alias_export(instance_index, name, ExportKind::Func));
         let mut options = options
             .into_iter(encoding, self.memory_index, realloc_index)?
             .collect::<Vec<_>>();
 
-        if let Some(post_return) = exports.post_return(interface, func) {
+        if let Some(post_return) = exports.post_return(key, func) {
             let post_return = self.core_alias_export(instance_index, post_return, ExportKind::Func);
             options.push(CanonicalOption::PostReturn(post_return));
         }
@@ -1379,7 +1387,7 @@ impl<'a> EncodingState<'a> {
         log::trace!("attempting to materialize import of `{module}::{field}` for {for_module:?}");
         let resolve = &self.info.encoder.metadata.resolve;
         let name_tmp;
-        let (key, name) = match import {
+        let (key, name, interface_key) = match import {
             // Main module dependencies on an adapter in use are done with an
             // indirection here, so load the shim function and use that.
             Import::AdapterExport(_) => {
@@ -1446,18 +1454,17 @@ impl<'a> EncodingState<'a> {
             // through to the code below. This is where these are connected to a
             // WIT `ImportedInterface` one way or another with the name that was
             // detected during validation.
-            Import::ImportedResourceDrop(key, id) => {
+            Import::ImportedResourceDrop(key, iface, id) => {
                 let ty = &resolve.types[*id];
                 let name = ty.name.as_ref().unwrap();
                 name_tmp = format!("{RESOURCE_DROP}{name}");
-                (key.as_ref(), &name_tmp)
+                (key, &name_tmp, iface.map(|_| resolve.name_world_key(key)))
             }
-            Import::WorldFunc(name) => (None, name),
-            Import::InterfaceFunc(key, _, name) => (Some(key), name),
+            Import::WorldFunc(key, name) => (key, name, None),
+            Import::InterfaceFunc(key, _, name) => (key, name, Some(resolve.name_world_key(key))),
         };
 
-        let interface = key.map(|key| resolve.name_world_key(key));
-        let import = &self.info.import_map[&interface];
+        let import = &self.info.import_map[&interface_key];
         let (index, _, lowering) = import.lowerings.get_full(name).unwrap();
         let metadata = self.info.module_metadata_for(for_module);
 
@@ -1480,12 +1487,12 @@ impl<'a> EncodingState<'a> {
             // created, so the specific export is loaded here and used as an
             // import.
             Lowering::Indirect { .. } => {
-                let encoding = metadata.import_encodings[&(module.to_string(), field.to_string())];
+                let encoding = metadata.import_encodings.get(resolve, key, name).unwrap();
                 self.core_alias_export(
                     self.shim_instance_index
                         .expect("shim should be instantiated"),
                     &shims.shims[&ShimKind::IndirectLowering {
-                        interface: interface.clone(),
+                        interface: interface_key,
                         index,
                         realloc: for_module,
                         encoding,
@@ -1696,7 +1703,7 @@ impl<'a> Shims<'a> {
         let resolve = &world.encoder.metadata.resolve;
 
         for (module, field, import) in module_imports.imports() {
-            let (key, name) = match import {
+            let (key, name, interface_key) = match import {
                 // These imports don't require shims, they can be satisfied
                 // as-needed when required.
                 Import::ImportedResourceDrop(..)
@@ -1746,11 +1753,12 @@ impl<'a> Shims<'a> {
                 // WIT-level functions may require an indirection, so yield some
                 // metadata out of this `match` to the loop below to figure that
                 // out.
-                Import::InterfaceFunc(key, _, name) => (Some(key), name),
-                Import::WorldFunc(name) => (None, name),
+                Import::InterfaceFunc(key, _, name) => {
+                    (key, name, Some(resolve.name_world_key(key)))
+                }
+                Import::WorldFunc(key, name) => (key, name, None),
             };
-            let key = key.map(|key| resolve.name_world_key(key));
-            let interface = &world.import_map[&key];
+            let interface = &world.import_map[&interface_key];
             let (index, _, lowering) = interface.lowerings.get_full(name).unwrap();
             let shim_name = self.shims.len().to_string();
             match lowering {
@@ -1760,9 +1768,9 @@ impl<'a> Shims<'a> {
                     log::debug!(
                         "shim {shim_name} is import `{module}::{field}` lowering {index} `{name}`",
                     );
-                    let encoding = *metadata
+                    let encoding = metadata
                         .import_encodings
-                        .get(&(module.to_string(), field.to_string()))
+                        .get(resolve, key, name)
                         .ok_or_else(|| {
                             anyhow::anyhow!(
                                 "missing component metadata for import of \
@@ -1774,7 +1782,7 @@ impl<'a> Shims<'a> {
                         debug_name: format!("indirect-{module}-{field}"),
                         options: *options,
                         kind: ShimKind::IndirectLowering {
-                            interface: key,
+                            interface: interface_key,
                             index,
                             realloc: for_module,
                             encoding,
diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs
@@ -250,7 +250,7 @@ impl<'a> ComponentWorld<'a> {
             .chain(self.info.imports.imports())
         {
             match import {
-                Import::WorldFunc(name) => {
+                Import::WorldFunc(_, name) => {
                     required
                         .interface_funcs
                         .entry(None)
@@ -264,7 +264,7 @@ impl<'a> ComponentWorld<'a> {
                         .or_default()
                         .insert(name);
                 }
-                Import::ImportedResourceDrop(_, id) => {
+                Import::ImportedResourceDrop(_, _, id) => {
                     required.resource_drops.insert(*id);
                 }
                 _ => {}
diff --git a/crates/wit-component/src/metadata.rs b/crates/wit-component/src/metadata.rs
@@ -41,7 +41,6 @@
 //! The dual of `encode` is the `decode_custom_section` fucntion which decodes
 //! the three arguments originally passed to `encode`.
 
-use crate::validation::BARE_FUNC_MODULE_NAME;
 use crate::{DecodedWasm, StringEncoding};
 use anyhow::{bail, Context, Result};
 use indexmap::{IndexMap, IndexSet};
@@ -112,11 +111,105 @@ impl Default for Bindgen {
 pub struct ModuleMetadata {
     /// Per-function options imported into the core wasm module, currently only
     /// related to string encoding.
-    pub import_encodings: IndexMap<(String, String), StringEncoding>,
+    pub import_encodings: EncodingMap,
 
     /// Per-function options exported from the core wasm module, currently only
     /// related to string encoding.
-    pub export_encodings: IndexMap<String, StringEncoding>,
+    pub export_encodings: EncodingMap,
+}
+
+/// Internal map that keeps track of encodings for various world imports and
+/// exports.
+///
+/// Stored in [`ModuleMetadata`].
+#[derive(Default)]
+pub struct EncodingMap {
+    /// A map of an "identifying string" for world items to what string
+    /// encoding the import or export is using.
+    ///
+    /// The keys of this map are created by `EncodingMap::key` and are
+    /// specifically chosen to be able to be looked up during both insertion and
+    /// fetching. Note that in particular this map does not use `*Id` types such
+    /// as `InterfaceId` from `wit_parser`. This is due to the fact that during
+    /// world merging new interfaces are created for named imports (e.g. `import
+    /// x: interface { ... }`) as inline interfaces are copied from one world to
+    /// another. Additionally during world merging different interfaces at the
+    /// same version may be deduplicated.
+    ///
+    /// For these reasons a string-based key is chosen to avoid juggling IDs
+    /// through the world merging process. Additionally versions are chopped off
+    /// for now to help with a problem such as:
+    ///
+    /// * The main module imports a:b/c@0.1.0
+    /// * An adapter imports a:b/c@0.1.1
+    /// * The final world uses a:b/c@0.1.1, but the main module has no
+    ///   encoding listed for that exact item.
+    ///
+    /// By chopping off versions this is able to get everything registered
+    /// correctly even in the fact of merging interfaces and worlds.
+    encodings: IndexMap<String, StringEncoding>,
+}
+
+impl EncodingMap {
+    fn insert_all(
+        &mut self,
+        resolve: &Resolve,
+        set: &IndexMap<WorldKey, WorldItem>,
+        encoding: StringEncoding,
+    ) {
+        for (name, item) in set {
+            match item {
+                WorldItem::Function(func) => {
+                    let key = self.key(resolve, name, &func.name);
+                    self.encodings.insert(key, encoding);
+                }
+                WorldItem::Interface { id, .. } => {
+                    for (func, _) in resolve.interfaces[*id].functions.iter() {
+                        let key = self.key(resolve, name, func);
+                        self.encodings.insert(key, encoding);
+                    }
+                }
+                WorldItem::Type(_) => {}
+            }
+        }
+    }
+
+    /// Looks up the encoding of the function `func` which is scoped under `key`
+    /// in the world in question.
+    pub fn get(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> Option<StringEncoding> {
+        let key = self.key(resolve, key, func);
+        self.encodings.get(&key).copied()
+    }
+
+    fn key(&self, resolve: &Resolve, key: &WorldKey, func: &str) -> String {
+        format!(
+            "{}/{func}",
+            match key {
+                WorldKey::Name(name) => name.to_string(),
+                WorldKey::Interface(id) => {
+                    let iface = &resolve.interfaces[*id];
+                    let pkg = &resolve.packages[iface.package.unwrap()];
+                    format!(
+                        "{}:{}/{}",
+                        pkg.name.namespace,
+                        pkg.name.name,
+                        iface.name.as_ref().unwrap()
+                    )
+                }
+            }
+        )
+    }
+
+    fn merge(&mut self, other: EncodingMap) -> Result<()> {
+        for (key, encoding) in other.encodings {
+            if let Some(prev) = self.encodings.insert(key.clone(), encoding) {
+                if prev != encoding {
+                    bail!("conflicting string encodings specified for `{key}`");
+                }
+            }
+        }
+        Ok(())
+    }
 }
 
 /// This function will parse the core `wasm` binary given as input and return a
@@ -313,38 +406,18 @@ impl Bindgen {
             producers,
         } = other;
 
-        let world = self
+        let remap = self
             .resolve
             .merge(resolve)
-            .context("failed to merge WIT package sets together")?
-            .map_world(world, None)?;
+            .context("failed to merge WIT package sets together")?;
+        let world = remap.map_world(world, None)?;
         let exports = self.resolve.worlds[world].exports.keys().cloned().collect();
         self.resolve
             .merge_worlds(world, self.world)
             .context("failed to merge worlds from two documents")?;
 
-        for (name, encoding) in export_encodings {
-            let prev = self
-                .metadata
-                .export_encodings
-                .insert(name.clone(), encoding);
-            if let Some(prev) = prev {
-                if prev != encoding {
-                    bail!("conflicting string encodings specified for export `{name}`");
-                }
-            }
-        }
-        for ((module, name), encoding) in import_encodings {
-            let prev = self
-                .metadata
-                .import_encodings
-                .insert((module.clone(), name.clone()), encoding);
-            if let Some(prev) = prev {
-                if prev != encoding {
-                    bail!("conflicting string encodings specified for import `{module}::{name}`");
-                }
-            }
-        }
+        self.metadata.import_encodings.merge(import_encodings)?;
+        self.metadata.export_encodings.merge(export_encodings)?;
         if let Some(producers) = producers {
             if let Some(mine) = &mut self.producers {
                 mine.merge(&producers);
@@ -364,45 +437,10 @@ impl ModuleMetadata {
         let mut ret = ModuleMetadata::default();
 
         let world = &resolve.worlds[world];
-        for (name, item) in world.imports.iter() {
-            let name = resolve.name_world_key(name);
-            match item {
-                WorldItem::Function(_) => {
-                    let prev = ret
-                        .import_encodings
-                        .insert((BARE_FUNC_MODULE_NAME.to_string(), name.clone()), encoding);
-                    assert!(prev.is_none());
-                }
-                WorldItem::Interface { id, .. } => {
-                    for (func, _) in resolve.interfaces[*id].functions.iter() {
-                        let prev = ret
-                            .import_encodings
-                            .insert((name.clone(), func.clone()), encoding);
-                        assert!(prev.is_none());
-                    }
-                }
-                WorldItem::Type(_) => {}
-            }
-        }
-
-        for (name, item) in world.exports.iter() {
-            let name = resolve.name_world_key(name);
-            match item {
-                WorldItem::Function(func) => {
-                    let name = func.core_export_name(None).into_owned();
-                    let prev = ret.export_encodings.insert(name.clone(), encoding);
-                    assert!(prev.is_none());
-                }
-                WorldItem::Interface { id, .. } => {
-                    for (_, func) in resolve.interfaces[*id].functions.iter() {
-                        let name = func.core_export_name(Some(&name)).into_owned();
-                        let prev = ret.export_encodings.insert(name, encoding);
-                        assert!(prev.is_none());
-                    }
-                }
-                WorldItem::Type(_) => {}
-            }
-        }
+        ret.export_encodings
+            .insert_all(resolve, &world.exports, encoding);
+        ret.import_encodings
+            .insert_all(resolve, &world.imports, encoding);
 
         ret
     }
diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs

Original file line number	Diff line number	Diff line change
`@@ -250,7 +250,7 @@ impl<'a> ComponentWorld<'a> {`
`250`	`250`	`.chain(self.info.imports.imports())`
`251`	`251`	`{`
`252`	`252`	`match import {`
`253`		`- Import::WorldFunc(name) => {`
	`253`	`+ Import::WorldFunc(_, name) => {`
`254`	`254`	`required`
`255`	`255`	`.interface_funcs`
`256`	`256`	`.entry(None)`
`@@ -264,7 +264,7 @@ impl<'a> ComponentWorld<'a> {`
`264`	`264`	`.or_default()`
`265`	`265`	`.insert(name);`
`266`	`266`	`}`
`267`		`- Import::ImportedResourceDrop(_, id) => {`
	`267`	`+ Import::ImportedResourceDrop(_, _, id) => {`
`268`	`268`	`required.resource_drops.insert(*id);`
`269`	`269`	`}`
`270`	`270`	`_ => {}`