pytorch
diff --git a/‎.buckconfig
+3 b/‎.buckconfig
+3
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
+1-1 b/‎.ci/docker/ci_commit_pins/pytorch.txt
+1-1
diff --git a/‎.ci/docker/common/install_java.sh
+12 b/‎.ci/docker/common/install_java.sh
+12
diff --git a/‎.ci/docker/ubuntu/Dockerfile
+4 b/‎.ci/docker/ubuntu/Dockerfile
+4
diff --git a/‎.ci/scripts/build_llama_android.sh
+4 b/‎.ci/scripts/build_llama_android.sh
+4
diff --git a/‎.ci/scripts/gather_test_models.py
+1 b/‎.ci/scripts/gather_test_models.py
+1
diff --git a/‎.ci/scripts/test_ane_static_llama.sh
+33 b/‎.ci/scripts/test_ane_static_llama.sh
+33
diff --git a/‎.ci/scripts/test_llama.sh
+4 b/‎.ci/scripts/test_llama.sh
+4
diff --git a/‎.ci/scripts/test_model.sh
+17-1 b/‎.ci/scripts/test_model.sh
+17-1
diff --git a/‎.ci/scripts/unittest-buck2.sh
+12-4 b/‎.ci/scripts/unittest-buck2.sh
+12-4
diff --git a/‎.github/workflows/doc-build.yml
+7 b/‎.github/workflows/doc-build.yml
+7
diff --git a/‎.github/workflows/trunk.yml
+23-1 b/‎.github/workflows/trunk.yml
+23-1
diff --git a/‎.github/workflows/update-viablestrict.yml
+1-1 b/‎.github/workflows/update-viablestrict.yml
+1-1
diff --git a/‎.gitmodules
+3-9 b/‎.gitmodules
+3-9
diff --git a/‎.lintrunner.toml
+2 b/‎.lintrunner.toml
+2
diff --git a/‎CMakeLists.txt
+5-5 b/‎CMakeLists.txt
+5-5
diff --git a/‎CODEOWNERS
+16-16 b/‎CODEOWNERS
+16-16
diff --git a/‎Test.cmake
-1 b/‎Test.cmake
-1
@@ -39,3 +39,6 @@
 
 [buck2]
 restarter=true
+
+[oss]
+folly_cxx_tests = False
@@ -1 +1 @@
-27e35de6c288bffad1b4d18b393579c1d1a95547
+08434df1f2f88c9770e59246caa2ff9c6f613270
@@ -0,0 +1,12 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+apt-get update
+
+apt-get install -y --no-install-recommends openjdk-17-jdk
@@ -30,6 +30,10 @@ ARG BUCK2_VERSION
 COPY ./common/install_buck.sh install_buck.sh
 RUN bash ./install_buck.sh && rm install_buck.sh
 
+# Install java
+COPY ./common/install_java.sh install_java.sh
+RUN bash ./install_java.sh && rm install_java.sh
+
 # Setup user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 
@@ -42,6 +42,10 @@ install_executorch_and_backend_lib() {
 
 build_llama_runner() {
     echo "Building llama runner for Android..."
+    pushd extension/llm/tokenizers
+    echo "Updating tokenizers submodule"
+    git submodule update --init
+    popd
     ANDROID_ABI=arm64-v8a
     cmake -DBUCK2="${BUCK2}" \
     -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake  \
 
@@ -30,6 +30,7 @@
         "dl3": "linux.4xlarge.memory",
         "emformer_join": "linux.4xlarge.memory",
         "emformer_predict": "linux.4xlarge.memory",
+        "phi-4-mini": "linux.4xlarge.memory",
     }
 }
 
 
@@ -0,0 +1,33 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# Update tokenizers submodule
+pushd $EXECUTORCH_ROOT/extension/llm/tokenizers
+echo "Update tokenizers submodule"
+git submodule update --init
+popd
+
+pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
+
+# Download stories llama110m artifacts
+download_stories_model_artifacts
+
+python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
+
+popd
@@ -173,6 +173,10 @@ cmake_install_executorch_libraries() {
 
 cmake_build_llama_runner() {
     echo "Building llama runner"
+    pushd extension/llm/tokenizers
+    echo "Updating tokenizers submodule"
+    git submodule update --init
+    popd
     dir="examples/models/llama"
     retry cmake \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
 
@@ -100,6 +100,15 @@ test_model() {
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
+  if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
+      # Install requirements for export_llama
+      bash examples/models/llama/install_requirements.sh
+      # Test export_llama script: python3 -m examples.models.llama.export_llama.
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
+      run_portable_executor_runner
+      rm "./${MODEL_NAME}.pte"
+      return
+  fi
 
   # Export a basic .pte and run the model.
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
@@ -164,6 +173,7 @@ test_model_with_qnn() {
   export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
   export PYTHONPATH=$EXECUTORCH_ROOT/..
 
+  EXTRA_FLAGS=""
   if [[ "${MODEL_NAME}" == "dl3" ]]; then
     EXPORT_SCRIPT=deeplab_v3
   elif [[ "${MODEL_NAME}" == "mv3" ]]; then
@@ -176,6 +186,12 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=inception_v3
   elif [[ "${MODEL_NAME}" == "vit" ]]; then
     EXPORT_SCRIPT=torchvision_vit
+  elif [[ "${MODEL_NAME}" == "mb" ]]; then
+    EXPORT_SCRIPT=mobilebert_fine_tune
+    EXTRA_FLAGS="--num_epochs 1"
+    pip install scikit-learn
+  elif [[ "${MODEL_NAME}" == "w2l" ]]; then
+    EXPORT_SCRIPT=wav2letter
   elif [[ "${MODEL_NAME}" == "edsr" ]]; then
     EXPORT_SCRIPT=edsr
     # Additional deps for edsr
@@ -189,7 +205,7 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
 
@@ -8,11 +8,19 @@ set -eux
 
 # TODO: expand this to //...
 # TODO: can't query cadence & vulkan backends
+# TODO: can't query //kernels/prim_ops because of a cpp_unittest and
+# broken code in shim to read oss.folly_cxx_tests. Sending fix but it
+# needs to propagate and we need a submodule update.
 buck2 query "//backends/apple/... + //backends/example/... + \
 //backends/mediatek/... + //backends/test/... + //backends/transforms/... + \
-//backends/xnnpack/... + //configurations/... + //kernels/portable/cpu/... + \
-//runtime/... + //schema/... + //test/... + //util/..."
+//backends/xnnpack/... + //configurations/... + //kernels/aten/... + \
+//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
+//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
 
+UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
+BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
 # TODO: expand the covered scope of Buck targets.
-buck2 build //runtime/core/portable_type/...
-buck2 test //runtime/core/portable_type/...
+# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
+# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
+buck2 test $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... //runtime/backend/... //runtime/core/... \
+      //runtime/executor: //runtime/kernel/... //runtime/platform/...
@@ -68,6 +68,12 @@ jobs:
         make html
         cd ..
 
+        # Build javadoc:
+        cd extension/android
+        ./gradlew javadoc
+        cp -rf build/docs/javadoc "${RUNNER_DOCS_DIR}"
+        cd ../..
+
         # If it's main branch, add noindex tag to all .html files to exclude from Google Search indexing.
         echo "GitHub Ref: ${GITHUB_REF}"
         if [[ "${{ github.ref }}" == 'refs/heads/main' ]]; then
@@ -77,6 +83,7 @@ jobs:
         cp -rf docs/_build/html/* "${RUNNER_DOCS_DIR}"
 
         mv docs/_build/html "${RUNNER_ARTIFACT_DIR}"
+        cp -rf "${RUNNER_DOCS_DIR}"/javadoc "${RUNNER_ARTIFACT_DIR}"/html
 
         ls -R "${RUNNER_ARTIFACT_DIR}"/*/*.html
 
 
@@ -229,6 +229,28 @@ jobs:
         # see if we can import the module successfully
         ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
 
+  test-static-llama-ane:
+    name: test-static-llama-ane
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        set -eux
+        bash .ci/scripts/setup-conda.sh
+        eval "$(conda shell.bash hook)"
+
+        # Install requirements
+        sh install_requirements.sh
+        sh backends/apple/coreml/scripts/install_requirements.sh
+        python install_executorch.py --pybind coreml
+        sh examples/models/llama/install_requirements.sh
+
+        # Test ANE llama
+        sh .ci/scripts/test_ane_static_llama.sh
+
   test-llama-runner-macos:
     name: test-llama-runner-mac
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -311,7 +333,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit]
+        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
       fail-fast: false
     with:
       runner: linux.2xlarge
 
@@ -12,7 +12,7 @@ concurrency:
 jobs:
   do_update_viablestrict:
     if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     environment: ${{ (github.event_name == 'schedule') && 'update-viable-strict' || '' }}
     steps:
       - name: Update viable/strict
 
@@ -28,15 +28,9 @@
 [submodule "backends/xnnpack/third-party/pthreadpool"]
 	path = backends/xnnpack/third-party/pthreadpool
 	url = https://github.com/Maratyszcza/pthreadpool.git
-[submodule "extension/llm/third-party/abseil-cpp"]
-	path = extension/llm/third-party/abseil-cpp
-	url = https://github.com/abseil/abseil-cpp.git
-[submodule "extension/llm/third-party/re2"]
-	path = extension/llm/third-party/re2
-	url = https://github.com/google/re2.git
-[submodule "extension/llm/third-party/sentencepiece"]
-	path = extension/llm/third-party/sentencepiece
-	url = https://github.com/google/sentencepiece.git
+[submodule "extension/llm/tokenizers"]
+	path = extension/llm/tokenizers
+	url = https://github.com/pytorch-labs/tokenizers.git
 [submodule "kernels/optimized/third-party/eigen"]
 	path = kernels/optimized/third-party/eigen
 	url = https://gitlab.com/libeigen/eigen.git
 
@@ -218,6 +218,8 @@ exclude_patterns = [
     'examples/**',
     'extension/**',
     'kernels/optimized/**',
+    # Justified <functional> include.
+    'runtime/kernel/thread_parallel_interface.h',
     'scripts/**',
     'third-party/**',
     'util/**',
 
@@ -248,14 +248,15 @@ cmake_dependent_option(
   "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
 )
 
-if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
+if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
   set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
+  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
+  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
+  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
 endif()
 
-if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
-  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
+if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
   set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
-  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -748,7 +749,6 @@ endif()
 
 if(EXECUTORCH_BUILD_PTHREADPOOL
    AND EXECUTORCH_BUILD_CPUINFO
-   AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
 )
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
 endif()
 
@@ -15,7 +15,7 @@
 /backends/vulkan @SS-JIA
 /backends/xnnpack @digantdesai @mcr229
 
-/build @GregoryComer @dbort @kirklandsign
+/build @GregoryComer @kirklandsign
 
 /codegen @larryliu0820 @lucylq
 
@@ -47,36 +47,36 @@
 /extension/apple @shoumikhin
 /extension/aten_util @JacobSzwejbka
 /extension/benchmark @tarun292
-/extension/data_loader @JacobSzwejbka @lucylq @dbort
-/extension/evalue_util @GregoryComer @dbort
+/extension/data_loader @JacobSzwejbka @lucylq
+/extension/evalue_util @GregoryComer
 /extension/export_util @kimishpatel
 /extension/flat_tensor @lucylq
 /extension/gguf_util @larryliu0820
-/extension/kernel_util @kimishpatel @manuelcandales
-/extension/llm @jackzhxng @iseeyuan @larryliu0820
-/extension/memory_allocator @JacobSzwejbka @dbort
+/extension/kernel_util @kimishpatel @manuelcandales @swolchok
+/extension/llm @jackzhxng @iseeyuan @larryliu0820 @swolchok
+/extension/memory_allocator @JacobSzwejbka @swolchok
 /extension/module @shoumikhin
-/extension/parallel @kimishpatel
+/extension/parallel @kimishpatel @swolchok
 /extension/pybindings @JacobSzwejbka @larryliu0820
-/extension/pytree @JacobSzwejbka
-/extension/runner_util @dbort
+/extension/pytree @JacobSzwejbka @swolchok
+/extension/runner_util @swolchok
 /extension/tensor @shoumikhin
-/extension/testing_util @dbort
-/extension/threadpool @kimishpatel
+/extension/testing_util @swolchok
+/extension/threadpool @kimishpatel @swolchok
 /extension/training @JacobSzwejbka
 
-/kernels @manuelcandales
+/kernels @manuelcandales @swolchok
 
 /profiler @tarun292 @Gasoonjia
 
-/runtime @dbort @JacobSzwejbka @lucylq
+/runtime @JacobSzwejbka @lucylq @swolchok
 /runtime/backend @cccclai
 
-/schema @dbort @JacobSzwejbka @lucylq
+/schema @JacobSzwejbka @lucylq
 
-/scripts @GregoryComer
+/scripts @GregoryComer @swolchok
 
-/shim @larryliu0820 @GregoryComer
+/shim @larryliu0820 @GregoryComer @swolchok
 
 /third-party @GregoryComer
 
 
@@ -13,7 +13,6 @@ if(BUILD_TESTING)
   add_subdirectory(extension/evalue_util/test)
   add_subdirectory(extension/kernel_util/test)
   add_subdirectory(extension/memory_allocator/test)
-  add_subdirectory(extension/parallel/test)
   add_subdirectory(extension/pytree/test)
   add_subdirectory(kernels/portable/cpu/util/test)
   add_subdirectory(kernels/prim_ops/test)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-27e35de6c288bffad1b4d18b393579c1d1a95547`
	`1`	`+08434df1f2f88c9770e59246caa2ff9c6f613270`
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@`
`30`	`30`	`"dl3": "linux.4xlarge.memory",`
`31`	`31`	`"emformer_join": "linux.4xlarge.memory",`
`32`	`32`	`"emformer_predict": "linux.4xlarge.memory",`
	`33`	`+ "phi-4-mini": "linux.4xlarge.memory",`
`33`	`34`	`}`
`34`	`35`	`}`
`35`	`36`