mozilla
diff --git a/‎.gitattributes
Lines changed: 1 addition & 0 deletions b/‎.gitattributes
Lines changed: 1 addition & 0 deletions
diff --git a/‎Taskfile.yml
Lines changed: 13 additions & 8 deletions b/‎Taskfile.yml
Lines changed: 13 additions & 8 deletions
diff --git a/‎inference/.gitignore
Lines changed: 11 additions & 8 deletions b/‎inference/.gitignore
Lines changed: 11 additions & 8 deletions
diff --git a/‎inference/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎inference/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎inference/scripts/build-wasm.py
Lines changed: 82 additions & 8 deletions b/‎inference/scripts/build-wasm.py
Lines changed: 82 additions & 8 deletions
diff --git a/‎inference/scripts/clean.sh
Lines changed: 9 additions & 8 deletions b/‎inference/scripts/clean.sh
Lines changed: 9 additions & 8 deletions
diff --git a/‎inference/scripts/test-wasm.py
Lines changed: 95 additions & 0 deletions b/‎inference/scripts/test-wasm.py
Lines changed: 95 additions & 0 deletions
diff --git a/‎inference/src/tests/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎inference/src/tests/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎inference/src/tests/units/CMakeLists.txt
Lines changed: 2 additions & 2 deletions b/‎inference/src/tests/units/CMakeLists.txt
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1 @@
+inference/**/*.gz filter=lfs diff=lfs merge=lfs -text
@@ -101,16 +101,19 @@ tasks:
 
   inference-test-wasm:
     desc: Run inference build-wasm JS tests.
-    deps:
-      - task: inference-build-wasm
-        vars:
-          # When the host system is macOS, the WASM build fails when
-          # building with multiple threads in the Docker container.
-          # If the host system is macOS, pass -j 1.
-          CLI_ARGS: '{{if eq (env "HOST_OS") "Darwin"}}-j 1{{end}}'
     cmds:
       - >-
-          cd inference/wasm/tests && npm install && npm run test
+          ./inference/scripts/test-wasm.py {{.CLI_ARGS}}
+
+  lint-eslint:
+    desc: Checks the styling of the JS code with eslint.
+    cmds:
+      - cd ./inference/wasm/tests && npm install && npm run lint
+
+  lint-eslint-fix:
+    desc: Fixes the styling of the JS code with eslint.
+    cmds:
+      - cd ./inference/wasm/tests && npm install && npm run lint:fix
 
   lint-black:
     desc: Checks the styling of the Python code with Black.
@@ -141,12 +144,14 @@ tasks:
   lint-fix:
     desc: Fix all automatically fixable errors. This is useful to run before pushing.
     cmds:
+      - task: lint-eslint-fix
       - task: lint-black-fix
       - task: lint-ruff-fix
 
   lint:
     desc: Run all available linting tools.
     cmds:
+      - task: lint-eslint
       - task: lint-black
       - task: lint-ruff
 
 
@@ -15,15 +15,18 @@ compile_commands.json
 CTestTestfile.cmake
 _deps
 
+# Build paths
+build
+build-local
+build-native
+build-wasm
 
-/build
-/build-local
-/build-native
-/build-wasm
-models
-wasm/test_page/node_modules
-wasm/module/worker/bergamot-translator-worker.*
-wasm/module/browsermt-bergamot-translator-*.tgz
+# WASM
+wasm/tests/generated
+wasm/tests/models/**/*.bin
+wasm/tests/models/**/*.spm
+wasm/tests/node_modules
+wasm/tests/.vitest-reports
 
 # VSCode
 .vscode
@@ -162,7 +162,7 @@ if(COMPILE_WASM)
     -sEXPORTED_FUNCTIONS=[_int8PrepareAFallback,_int8PrepareBFallback,_int8PrepareBFromTransposedFallback,_int8PrepareBFromQuantizedTransposedFallback,_int8PrepareBiasFallback,_int8MultiplyAndAddBiasFallback,_int8SelectColumnsOfBFallback]
     # Necessary for mozintgemm linking. This prepares the `wasmMemory` variable ahead of time as
     # opposed to delegating that task to the wasm binary itself. This way we can link MozIntGEMM
-    # module to the same memory as the main bergamot-translator module.
+    # module to the same memory as the main bergamot-translator-source module.
     -sIMPORTED_MEMORY=1
     # Dynamic execution is either frowned upon or blocked inside browser extensions
     -sDYNAMIC_EXECUTION=0
 
@@ -18,11 +18,12 @@
 MARIAN_PATH = os.path.join(THIRD_PARTY_PATH, "browsermt-marian-dev")
 EMSDK_PATH = os.path.join(THIRD_PARTY_PATH, "emsdk")
 EMSDK_ENV_PATH = os.path.join(EMSDK_PATH, "emsdk_env.sh")
-WASM_PATH = os.path.join(BUILD_PATH, "bergamot-translator-worker.wasm")
-JS_PATH = os.path.join(BUILD_PATH, "bergamot-translator-worker.js")
+WASM_ARTIFACT = os.path.join(BUILD_PATH, "bergamot-translator.wasm")
+JS_ARTIFACT = os.path.join(BUILD_PATH, "bergamot-translator.js")
 PATCHES_PATH = os.path.join(INFERENCE_PATH, "patches")
 BUILD_DIRECTORY = os.path.join(INFERENCE_PATH, "build-wasm")
-GEMM_SCRIPT = os.path.join(INFERENCE_PATH, "wasm", "patch-artifacts-import-gemm-module.sh")
+WASM_PATH = os.path.join(INFERENCE_PATH, "wasm")
+GEMM_SCRIPT = os.path.join(WASM_PATH, "patch-artifacts-import-gemm-module.sh")
 DETECT_DOCKER_SCRIPT = os.path.join(SCRIPTS_PATH, "detect-docker.sh")
 
 patches = [
@@ -95,6 +96,56 @@ def revert_git_patch(repo_path, patch_path):
     subprocess.check_call(["git", "apply", "-R", "--reject", patch_path], cwd=PROJECT_ROOT_PATH)
 
 
+def prepare_js_artifact():
+    """
+    Prepares the Bergamot JS artifact for use in Gecko by adding the proper license header
+    to the file, including helpful memory-growth logging, and wrapping the generated code
+    in a single function that both takes and returns the Bergamot WASM module.
+    """
+    # Start with the license header and function wrapper
+    source = (
+        "\n".join(
+            [
+                "/* This Source Code Form is subject to the terms of the Mozilla Public",
+                " * License, v. 2.0. If a copy of the MPL was not distributed with this",
+                " * file, You can obtain one at http://mozilla.org/MPL/2.0/. */",
+                "",
+                "function loadBergamot(Module) {",
+                "",
+            ]
+        )
+        + "\n"
+    )
+
+    # Read the original JS file and indent its content
+    with open(JS_ARTIFACT, "r", encoding="utf8") as file:
+        for line in file:
+            source += "  " + line
+
+    # Close the function wrapper
+    source += "\n  return Module;\n}"
+
+    # Use the Module's printing
+    source = source.replace("console.log(", "Module.print(")
+
+    # Add instrumentation to log memory size information
+    source = source.replace(
+        "function updateGlobalBufferAndViews(buf) {",
+        """
+          function updateGlobalBufferAndViews(buf) {
+            const mb = (buf.byteLength / 1_000_000).toFixed();
+            Module.print(
+              `Growing wasm buffer to ${mb}MB (${buf.byteLength} bytes).`
+            );
+        """,
+    )
+
+    print(f"\n📄 Updating {JS_ARTIFACT} in place")
+    # Write the modified content back to the original file
+    with open(JS_ARTIFACT, "w", encoding="utf8") as file:
+        file.write(source)
+
+
 def build_bergamot(args: Optional[list[str]]):
     if args.clobber and os.path.exists(BUILD_PATH):
         shutil.rmtree(BUILD_PATH)
@@ -127,7 +178,18 @@ def run_shell(command):
         print("\n🏃 Running CMake for Bergamot\n")
         run_shell(f"emcmake cmake -DCOMPILE_WASM=on -DWORMHOLE=off {flags} {INFERENCE_PATH}")
 
-        cores = args.j if args.j else multiprocessing.cpu_count()
+        if args.j:
+            # If -j is specified explicitly, use it.
+            cores = args.j
+        elif os.getenv("HOST_OS") == "Darwin":
+            # There is an issue building with multiple cores when the Linux Docker container is
+            # running on a macOS host system. If the Docker container was created with HOST_OS
+            # set to Darwin, we should use only 1 core to build.
+            cores = 1
+        else:
+            # Otherwise, build with as many cores as we have.
+            cores = multiprocessing.cpu_count()
+
         print(f"\n🏃 Building Bergamot with emmake using {cores} cores\n")
 
         try:
@@ -142,14 +204,14 @@ def run_shell(command):
         subprocess.check_call(["bash", GEMM_SCRIPT, BUILD_PATH])
 
         print("\n✅ Build complete\n")
-        print("  " + JS_PATH)
-        print("  " + WASM_PATH)
+        print("  " + JS_ARTIFACT)
+        print("  " + WASM_ARTIFACT)
 
         # Get the sizes of the build artifacts.
-        wasm_size = os.path.getsize(WASM_PATH)
+        wasm_size = os.path.getsize(WASM_ARTIFACT)
         gzip_size = int(
             subprocess.run(
-                f"gzip -c {WASM_PATH} | wc -c",
+                f"gzip -c {WASM_ARTIFACT} | wc -c",
                 check=True,
                 shell=True,
                 capture_output=True,
@@ -158,6 +220,8 @@ def run_shell(command):
         print(f"  Uncompressed wasm size: {to_human_readable(wasm_size)}")
         print(f"  Compressed wasm size: {to_human_readable(gzip_size)}")
 
+        prepare_js_artifact()
+
     finally:
         print("\n🖌️ Reverting the source code patches\n")
         for repo_path, patch_path in patches[::-1]:
@@ -167,6 +231,16 @@ def run_shell(command):
 def main():
     args = parser.parse_args()
 
+    if (
+        os.path.exists(BUILD_PATH)
+        and os.path.isdir(BUILD_PATH)
+        and os.listdir(BUILD_PATH)
+        and not args.clobber
+    ):
+        print(f"\n🏗️  Build directory {BUILD_PATH} already exists and is non-empty.\n")
+        print("   Pass the --clobber flag to rebuild if desired.")
+        return
+
     if not os.path.exists(THIRD_PARTY_PATH):
         os.mkdir(THIRD_PARTY_PATH)
 
 
@@ -10,20 +10,21 @@ cd "$(dirname $0)/.."
 # List of directories to clean
 dirs=("build-local" "build-wasm" "emsdk")
 
-# Flag to track if any directories were cleaned
-cleaned=false
-
 # Check and remove directories
 for dir in "${dirs[@]}"; do
     if [ -d "$dir" ]; then
         echo "Removing $dir..."
         rm -rf "$dir"
-        cleaned=true
     fi
 done
 
-# If no directories were cleaned, print a message
-if [ "$cleaned" = false ]; then
-    echo "Nothing to clean"
-fi
+echo "Removing generated wasm artifacts..."
+rm -rf wasm/tests/generated/*.js
+rm -rf wasm/tests/generated/*.wasm
+rm -rf wasm/tests/generated/*.sha256
+
+echo "Removing extracted model files..."
+rm -rf wasm/tests/models/**/*.bin
+rm -rf wasm/tests/models/**/*.spm
 
+echo
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+import argparse
+import hashlib
+import os
+import shutil
+import subprocess
+import sys
+
+SCRIPTS_PATH = os.path.realpath(os.path.dirname(__file__))
+INFERENCE_PATH = os.path.dirname(SCRIPTS_PATH)
+BUILD_PATH = os.path.join(INFERENCE_PATH, "build-wasm")
+WASM_PATH = os.path.join(INFERENCE_PATH, "wasm")
+WASM_TESTS_PATH = os.path.join(WASM_PATH, "tests")
+GENERATED_PATH = os.path.join(WASM_TESTS_PATH, "generated")
+MODELS_PATH = os.path.join(WASM_TESTS_PATH, "models")
+WASM_ARTIFACT = os.path.join(BUILD_PATH, "bergamot-translator.wasm")
+JS_ARTIFACT = os.path.join(BUILD_PATH, "bergamot-translator.js")
+JS_ARTIFACT_HASH = os.path.join(GENERATED_PATH, "bergamot-translator.js.sha256")
+
+
+def calculate_sha256(file_path):
+    sha256_hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha256_hash.update(byte_block)
+    return sha256_hash.hexdigest()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Test WASM by building and handling artifacts.",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+
+    parser.add_argument("--clobber", action="store_true", help="Clobber the build artifacts")
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Build with debug symbols, useful for profiling",
+    )
+    parser.add_argument(
+        "-j",
+        type=int,
+        help="Number of cores to use for building (default: all available cores)",
+    )
+    args = parser.parse_args()
+
+    build_wasm_script = os.path.join(SCRIPTS_PATH, "build-wasm.py")
+    build_command = [sys.executable, build_wasm_script]
+    if args.clobber:
+        build_command.append("--clobber")
+    if args.debug:
+        build_command.append("--debug")
+    if args.j:
+        build_command.extend(["-j", str(args.j)])
+
+    print("\n🚀 Starting build-wasm.py")
+    subprocess.run(build_command, check=True)
+
+    print("\n📥 Pulling translations model files with git lfs\n")
+    subprocess.run(["git", "lfs", "pull"], cwd=MODELS_PATH, check=True)
+    print(f"   Pulled all files in {MODELS_PATH}")
+
+    print("\n📁 Copying generated build artifacts to the WASM test directory\n")
+
+    os.makedirs(GENERATED_PATH, exist_ok=True)
+    shutil.copy2(WASM_ARTIFACT, GENERATED_PATH)
+    shutil.copy2(JS_ARTIFACT, GENERATED_PATH)
+
+    print(f"   Copied the following artifacts to {GENERATED_PATH}:")
+    print(f"     - {JS_ARTIFACT}")
+    print(f"     - {WASM_ARTIFACT}")
+
+    print(f"\n🔑 Calculating SHA-256 hash of {JS_ARTIFACT}\n")
+    hash_value = calculate_sha256(JS_ARTIFACT)
+    with open(JS_ARTIFACT_HASH, "w") as hash_file:
+        hash_file.write(f"{hash_value}  {os.path.basename(JS_ARTIFACT)}\n")
+    print(f"   Hash of {JS_ARTIFACT} written to")
+    print(f"   {JS_ARTIFACT_HASH}")
+
+    print("\n📂 Decompressing model files required for WASM testing\n")
+    subprocess.run(["gzip", "-dkrf", MODELS_PATH], check=True)
+    print(f"   Decompressed models in {MODELS_PATH}\n")
+
+    print("\n🔧 Installing npm dependencies for WASM JS tests\n")
+    subprocess.run(["npm", "install"], cwd=WASM_TESTS_PATH, check=True)
+
+    print("\n📊 Running Translations WASM JS tests\n")
+    subprocess.run(["npm", "run", "test"], cwd=WASM_TESTS_PATH, check=True)
+
+    print("\n✅ test-wasm.py completed successfully.\n")
+
+
+if __name__ == "__main__":
+    main()
@@ -16,7 +16,7 @@ if(NOT MSVC)
   set(TEST_BINARIES async blocking intgemm-resolve wasm)
   foreach(binary ${TEST_BINARIES})
       add_executable("${binary}" "${binary}.cpp")
-      target_link_libraries("${binary}" bergamot-translator)
+      target_link_libraries("${binary}" bergamot-translator-source)
       set_target_properties("${binary}" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests/")
   endforeach(binary)
 
 
@@ -11,9 +11,9 @@ foreach(test ${UNIT_TESTS})
   target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
 
   if(CUDA_FOUND)
-    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
+    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator-source)
   else(CUDA_FOUND)
-    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
+    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator-source)
   endif(CUDA_FOUND)
 
   if(msvc)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+inference/*/.gz filter=lfs diff=lfs merge=lfs -text`