Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of CoreML + Live Render Preview #370

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions backends/swift_backend/LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Copyright (C) 2022 Apple Inc. All Rights Reserved.

IMPORTANT: This Apple software is supplied to you by Apple
Inc. ("Apple") in consideration of your agreement to the following
terms, and your use, installation, modification or redistribution of
this Apple software constitutes acceptance of these terms. If you do
not agree with these terms, please do not use, install, modify or
redistribute this Apple software.

In consideration of your agreement to abide by the following terms, and
subject to these terms, Apple grants you a personal, non-exclusive
license, under Apple's copyrights in this original Apple software (the
"Apple Software"), to use, reproduce, modify and redistribute the Apple
Software, with or without modifications, in source and/or binary forms;
provided that if you redistribute the Apple Software in its entirety and
without modifications, you must retain this notice and the following
text and disclaimers in all such redistributions of the Apple Software.
Neither the name, trademarks, service marks or logos of Apple Inc. may
be used to endorse or promote products derived from the Apple Software
without specific prior written permission from Apple. Except as
expressly stated in this notice, no other rights or licenses, express or
implied, are granted by Apple herein, including but not limited to any
patent rights that may be infringed by your derivative works or by other
works in which the Apple Software may be incorporated.

The Apple Software is provided by Apple on an "AS IS" basis. APPLE
MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND
OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.

IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION,
MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED
AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE),
STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
201 changes: 201 additions & 0 deletions backends/swift_backend/main.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import CoreML
import StableDiffusion
import UniformTypeIdentifiers

import Darwin
setbuf(stdout, nil)

enum RunError: Error {
case resources(String)
case saving(String)
}

enum SchedulerOption: String {
case pndm, dpmpp
var stableDiffusionScheduler: StableDiffusionScheduler {
switch self {
case .pndm: return .pndmScheduler
case .dpmpp: return .dpmSolverMultistepScheduler
}
}
}

func log(_ str: String, term: String = "") {
print(str, terminator: term)
}

let fm = FileManager.default

struct DiffusionBee {
var resourcePath: String = fm.homeDirectoryForCurrentUser.path+"/.diffusionbee/coreml_models/coreml-stable-diffusion-v1-5_split_einsum_compiled"
var outputPath: String = fm.homeDirectoryForCurrentUser.path+"/.diffusionbee/images"
var disableSafety: Bool = true
var reduceMemory: Bool = true

var prompt: String = ""
var negativePrompt: String = ""

var imageCount: Int = 1
var seed: UInt32 = 93
var saveEvery: Int = 0

func handleProgress(
_ progress: StableDiffusionPipeline.Progress,
_ sampleTimer: SampleTimer
) {
log("\u{1B}[1A\u{1B}[K")
log("Step \(progress.step) of \(progress.stepCount) ")
log(" [")
log(String(format: "mean: %.2f, ", 1.0/sampleTimer.mean))
log(String(format: "median: %.2f, ", 1.0/sampleTimer.median))
log(String(format: "last %.2f", 1.0/sampleTimer.allSamples.last!))
log("] step/sec")
// print("sdbk dnpr "+str(i) ) # done percentage
if saveEvery > 0, progress.step % saveEvery == 0 {
let saveCount = (try? saveImages(progress.currentImages, step: progress.step, logNames: true))?.count ?? 0
log(" saved \(saveCount) image\(saveCount != 1 ? "s" : "")")
}
log("\n")
let progressPercentage = Float(progress.step) / Float(progress.stepCount)
let progressPercentageInt = Int(ceil(progressPercentage * 100))
print("sdbk dnpr \(progressPercentageInt)")
}

func saveImages(
_ images: [CGImage?],
step: Int? = nil,
logNames: Bool = false
) throws -> [String] {
let url = URL(filePath: outputPath)
var saved = [String]()
for i in 0 ..< images.count {

guard let image = images[i] else {
if logNames {
log("Image \(i) failed safety check and was not saved")
}
continue
}
let name = imageName(i, step: step)
let fileURL = url.appending(path:name)

guard let dest = CGImageDestinationCreateWithURL(fileURL as CFURL, UTType.png.identifier as CFString, 1, nil) else {
throw RunError.saving("Failed to create destination for \(fileURL)")
}
CGImageDestinationAddImage(dest, image, nil)
if !CGImageDestinationFinalize(dest) {
throw RunError.saving("Failed to save \(fileURL)")
}
if logNames {
log("Saved \(name)\n")
print("sdbk nwim \(fileURL.path)")
}
saved.append(fileURL.path)
}
return saved
}
func imageName(_ sample: Int, step: Int? = nil) -> String {
let fileCharLimit = 75
var name = "\(seed)"
name += prompt.prefix(fileCharLimit).replacingOccurrences(of: " ", with: "_")
if imageCount != 1 {
name += "_\(sample)"
}
name += ".png"
return name
}

mutating func run() throws {
guard FileManager.default.fileExists(atPath: resourcePath) else {
throw RunError.resources("Resource path does not exist \(resourcePath)")
}
let config = MLModelConfiguration()

print("sdbk mltl Loading Model")

let resourceURL = URL(filePath: resourcePath)
let computeUnits: MLComputeUnits = .cpuAndNeuralEngine

config.computeUnits = computeUnits

print("sdbk gnms Loading SD Model" )
let pipeline = try StableDiffusionPipeline(resourcesAt: resourceURL,
configuration: config,
disableSafety: disableSafety,
reduceMemory: reduceMemory)
try pipeline.loadResources()
print("sdbk mdvr 1.5CoreML")


print("sdbk mlpr -1")
print("sdbk mdld")

while true {
print("sdbk inrd") // input ready
let input = readLine()!
if input == "" {
continue
}

if !input.contains("b2s t2im") {
continue
}

let inp_str = input.replacingOccurrences(of: "b2s t2im", with: "").trimmingCharacters(in: .whitespacesAndNewlines)

var d = ["W":512, "H":512, "num_imgs":1, "ddim_steps":25, "scale":7.5, "batch_size":1, "input_image":"", "img_strength":0.5, "negative_prompt":"", "mask_image":"", "model_id":0, "custom_model_path":"", "save_every": 0] as [String : Any]

let d_ = try JSONSerialization.jsonObject(with: inp_str.data(using: .utf8)!, options: []) as! [String:Any]
for (k,v) in d_ {
d[k] = v
}
print("sdbk inwk") // working on the input

print("Sampling ...\n")
let sampleTimer = SampleTimer()
sampleTimer.start()


prompt = d["prompt"] as! String
negativePrompt = d["negative_prompt"] as! String
imageCount = d["num_imgs"] as! Int
saveEvery = d["save_every"] as! Int
let stepCount = d["ddim_steps"] as! Int
let guidanceScale = d["scale"] as! Double

seed = d["seed"] as? UInt32 ?? UInt32.random(in: 0...UInt32.max)

var scheduler: SchedulerOption = .pndm

let images = try pipeline.generateImages(
prompt: prompt,
negativePrompt: negativePrompt,
imageCount: imageCount,
stepCount: stepCount,
seed: seed,
guidanceScale: Float(guidanceScale),
scheduler: scheduler.stableDiffusionScheduler
) { progress in
sampleTimer.stop()
handleProgress(progress,sampleTimer)
if progress.stepCount != progress.step {
sampleTimer.start()
}
return true
}
let _ = try saveImages(images, logNames: true)
print("sdbk igws")


}
}
}
if #available(iOS 16.2, macOS 13.1, *) {
var diffusionbee = DiffusionBee()
try diffusionbee.run()
} else {
print("Unsupported OS")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Thank you for your interest in contributing to Core ML Stable Diffusion! Please review [CONTRIBUTING.md](../CONTRIBUTING.md) first. If you would like to proceed with making a pull request, please indicate your agreement to the terms outlined in CONTRIBUTING.md by checking the box below. If not, please go ahead and fork this repo and make your updates.

We appreciate your interest in the project!

Do not erase the below when submitting your pull request:
#########

- [ ] I agree to the terms outlined in CONTRIBUTING.md
144 changes: 144 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Swift Package
.DS_Store
/.build
/Packages
/*.xcodeproj
.swiftpm
.vscode
.*.sw?
*.docc-build
*.vs
Package.resolved

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# macOS filesystem
*.DS_Store
555 changes: 555 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/ACKNOWLEDGEMENTS

Large diffs are not rendered by default.

71 changes: 71 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the open source team at [opensource-conduct@group.apple.com](mailto:opensource-conduct@group.apple.com). All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 1.4,
available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html)
15 changes: 15 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Contribution Guide

Thank you for your interest in contributing to Core ML Stable Diffusion! This project was released for system demonstration purposes and there are limited plans for future development of the repository. While we welcome new pull requests and issues please note that our response may be limited. Forks and out-of-tree improvements are strongly encouraged.

## Submitting a Pull Request

Under the project's [LICENSE](LICENSE.md), confirmation of the following is required to merge your changes:

*"I agree that all information entered is original and owned by me, and I hereby provide an irrevocable, royalty-free license to Apple to use, modify, copy, publish, prepare derivate works of, distribute (including under the Apple Sample Code License), such information and all intellectual property therein in whole or part, in perpetuity and worldwide, without any attribution."*

By submitting a pull request, you represent that you have the right to license your contribution to Apple and the community, and agree by submitting the patch that your contributions are licensed under the [LICENSE](LICENSE.md).

## Code of Conduct

We ask that all community members read and observe our [Code of Conduct](CODE_OF_CONDUCT.md).
39 changes: 39 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
Copyright (C) 2022 Apple Inc. All Rights Reserved.

IMPORTANT: This Apple software is supplied to you by Apple
Inc. ("Apple") in consideration of your agreement to the following
terms, and your use, installation, modification or redistribution of
this Apple software constitutes acceptance of these terms. If you do
not agree with these terms, please do not use, install, modify or
redistribute this Apple software.

In consideration of your agreement to abide by the following terms, and
subject to these terms, Apple grants you a personal, non-exclusive
license, under Apple's copyrights in this original Apple software (the
"Apple Software"), to use, reproduce, modify and redistribute the Apple
Software, with or without modifications, in source and/or binary forms;
provided that if you redistribute the Apple Software in its entirety and
without modifications, you must retain this notice and the following
text and disclaimers in all such redistributions of the Apple Software.
Neither the name, trademarks, service marks or logos of Apple Inc. may
be used to endorse or promote products derived from the Apple Software
without specific prior written permission from Apple. Except as
expressly stated in this notice, no other rights or licenses, express or
implied, are granted by Apple herein, including but not limited to any
patent rights that may be infringed by your derivative works or by other
works in which the Apple Software may be incorporated.

The Apple Software is provided by Apple on an "AS IS" basis. APPLE
MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND
OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.

IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL
OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION,
MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED
AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE),
STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
43 changes: 43 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/Package.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// swift-tools-version: 5.7
// The swift-tools-version declares the minimum version of Swift required to build this package.

import PackageDescription

let package = Package(
name: "stable-diffusion",
platforms: [
.macOS(.v11),
.iOS(.v14),
],
products: [
.library(
name: "StableDiffusion",
targets: ["StableDiffusion"]),
.executable(
name: "StableDiffusionSample",
targets: ["StableDiffusionCLI"])
],
dependencies: [
.package(url: "https://github.com/apple/swift-argument-parser.git", exact: "1.2.0")
],
targets: [
.target(
name: "StableDiffusion",
dependencies: [],
path: "swift/StableDiffusion"),
.executableTarget(
name: "StableDiffusionCLI",
dependencies: [
"StableDiffusion",
.product(name: "ArgumentParser", package: "swift-argument-parser")],
path: "swift/StableDiffusionCLI"),
.testTarget(
name: "StableDiffusionTests",
dependencies: ["StableDiffusion"],
path: "swift/StableDiffusionTests",
resources: [
.copy("Resources/vocab.json"),
.copy("Resources/merges.txt")
]),
]
)
425 changes: 425 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/README.md

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._version import __version__
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.1.0"
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
#
# For licensing see accompanying LICENSE.md file.
# Copyright (C) 2022 Apple Inc. All Rights Reserved.
#

import argparse
from collections import OrderedDict

import coremltools as ct
from coremltools.converters.mil import Block, Program, Var
from coremltools.converters.mil.frontend.milproto.load import load as _milproto_to_pymil
from coremltools.converters.mil.mil import Builder as mb
from coremltools.converters.mil.mil import Placeholder
from coremltools.converters.mil.mil import types as types
from coremltools.converters.mil.mil.passes.helper import block_context_manager
from coremltools.converters.mil.mil.passes.pass_registry import PASS_REGISTRY
from coremltools.converters.mil.testing_utils import random_gen_input_feature_type

import gc

import logging

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

import numpy as np
import os
from python_coreml_stable_diffusion import torch2coreml
import shutil
import time


def _verify_output_correctness_of_chunks(full_model, first_chunk_model,
second_chunk_model):
""" Verifies the end-to-end output correctness of full (original) model versus chunked models
"""
# Generate inputs for first chunk and full model
input_dict = {}
for input_desc in full_model._spec.description.input:
input_dict[input_desc.name] = random_gen_input_feature_type(input_desc)

# Generate outputs for first chunk and full model
outputs_from_full_model = full_model.predict(input_dict)
outputs_from_first_chunk_model = first_chunk_model.predict(input_dict)

# Prepare inputs for second chunk model from first chunk's outputs and regular inputs
second_chunk_input_dict = {}
for input_desc in second_chunk_model._spec.description.input:
if input_desc.name in outputs_from_first_chunk_model:
second_chunk_input_dict[
input_desc.name] = outputs_from_first_chunk_model[
input_desc.name]
else:
second_chunk_input_dict[input_desc.name] = input_dict[
input_desc.name]

# Generate output for second chunk model
outputs_from_second_chunk_model = second_chunk_model.predict(
second_chunk_input_dict)

# Verify correctness across all outputs from second chunk and full model
for out_name in outputs_from_full_model.keys():
torch2coreml.report_correctness(
original_outputs=outputs_from_full_model[out_name],
final_outputs=outputs_from_second_chunk_model[out_name],
log_prefix=f"{out_name}")


def _load_prog_from_mlmodel(model):
""" Load MIL Program from an MLModel
"""
model_spec = model.get_spec()
start_ = time.time()
logger.info(
"Loading MLModel object into a MIL Program object (including the weights).."
)
prog = _milproto_to_pymil(
model_spec=model_spec,
specification_version=model_spec.specificationVersion,
file_weights_dir=model.weights_dir,
)
logger.info(f"Program loaded in {time.time() - start_:.1f} seconds")

return prog


def _get_op_idx_split_location(prog: Program):
""" Find the op that approximately bisects the graph as measure by weights size on each side
"""
main_block = prog.functions["main"]
total_size_in_mb = 0

for op in main_block.operations:
if op.op_type == "const" and isinstance(op.val.val, np.ndarray):
size_in_mb = op.val.val.size * op.val.val.itemsize / (1024 * 1024)
total_size_in_mb += size_in_mb
half_size = total_size_in_mb / 2

# Find the first non const op (single child), where the total cumulative size exceeds
# the half size for the first time
cumulative_size_in_mb = 0
for op in main_block.operations:
if op.op_type == "const" and isinstance(op.val.val, np.ndarray):
size_in_mb = op.val.val.size * op.val.val.itemsize / (1024 * 1024)
cumulative_size_in_mb += size_in_mb

if (cumulative_size_in_mb > half_size and op.op_type != "const"
and len(op.outputs) == 1
and len(op.outputs[0].child_ops) == 1):
op_idx = main_block.operations.index(op)
return op_idx, cumulative_size_in_mb, total_size_in_mb


def _get_first_chunk_outputs(block, op_idx):
# Get the list of all vars that go across from first program (all ops from 0 to op_idx (inclusive))
# to the second program (all ops from op_idx+1 till the end). These all vars need to be made the output
# of the first program and the input of the second program
boundary_vars = set()
for i in range(op_idx + 1):
op = block.operations[i]
for var in op.outputs:
if var.val is None: # only consider non const vars
for child_op in var.child_ops:
child_op_idx = block.operations.index(child_op)
if child_op_idx > op_idx:
boundary_vars.add(var)
return list(boundary_vars)


@block_context_manager
def _add_fp32_casts(block, boundary_vars):
new_boundary_vars = []
for var in boundary_vars:
if var.dtype != types.fp16:
new_boundary_vars.append(var)
else:
fp32_var = mb.cast(x=var, dtype="fp32", name=var.name)
new_boundary_vars.append(fp32_var)
return new_boundary_vars


def _make_first_chunk_prog(prog, op_idx):
""" Build first chunk by declaring early outputs and removing unused subgraph
"""
block = prog.functions["main"]
boundary_vars = _get_first_chunk_outputs(block, op_idx)

# Due to possible numerical issues, cast any fp16 var to fp32
new_boundary_vars = _add_fp32_casts(block, boundary_vars)

block.outputs.clear()
block.set_outputs(new_boundary_vars)
PASS_REGISTRY["common::dead_code_elimination"](prog)
return prog


def _make_second_chunk_prog(prog, op_idx):
""" Build second chunk by rebuilding a pristine MIL Program from MLModel
"""
block = prog.functions["main"]
block.opset_version = ct.target.iOS16

# First chunk outputs are second chunk inputs (e.g. skip connections)
boundary_vars = _get_first_chunk_outputs(block, op_idx)

# This op will not be included in this program. Its output var will be made into an input
boundary_op = block.operations[op_idx]

# Add all boundary ops as inputs
with block:
for var in boundary_vars:
new_placeholder = Placeholder(
sym_shape=var.shape,
dtype=var.dtype if var.dtype != types.fp16 else types.fp32,
name=var.name,
)

block._input_dict[
new_placeholder.outputs[0].name] = new_placeholder.outputs[0]

block.function_inputs = tuple(block._input_dict.values())
new_var = None
if var.dtype == types.fp16:
new_var = mb.cast(x=new_placeholder.outputs[0],
dtype="fp16",
before_op=var.op)
else:
new_var = new_placeholder.outputs[0]

block.replace_uses_of_var_after_op(
anchor_op=boundary_op,
old_var=var,
new_var=new_var,
)

PASS_REGISTRY["common::dead_code_elimination"](prog)

# Remove any unused inputs
new_input_dict = OrderedDict()
for k, v in block._input_dict.items():
if len(v.child_ops) > 0:
new_input_dict[k] = v
block._input_dict = new_input_dict
block.function_inputs = tuple(block._input_dict.values())

return prog


def main(args):
os.makedirs(args.o, exist_ok=True)

# Check filename extension
mlpackage_name = os.path.basename(args.mlpackage_path)
name, ext = os.path.splitext(mlpackage_name)
assert ext == ".mlpackage", f"`--mlpackage-path` (args.mlpackage_path) is not an .mlpackage file"

# Load CoreML model
logger.info("Loading model from {}".format(args.mlpackage_path))
start_ = time.time()
model = ct.models.MLModel(
args.mlpackage_path,
compute_units=ct.ComputeUnit.CPU_ONLY,
)
logger.info(
f"Loading {args.mlpackage_path} took {time.time() - start_:.1f} seconds"
)

# Load the MIL Program from MLModel
prog = _load_prog_from_mlmodel(model)

# Compute the incision point by bisecting the program based on weights size
op_idx, first_chunk_weights_size, total_weights_size = _get_op_idx_split_location(
prog)
main_block = prog.functions["main"]
incision_op = main_block.operations[op_idx]
logger.info(f"{args.mlpackage_path} will chunked into two pieces.")
logger.info(
f"The incision op: name={incision_op.name}, type={incision_op.op_type}, index={op_idx}/{len(main_block.operations)}"
)
logger.info(f"First chunk size = {first_chunk_weights_size:.2f} MB")
logger.info(
f"Second chunk size = {total_weights_size - first_chunk_weights_size:.2f} MB"
)

# Build first chunk (in-place modifies prog by declaring early exits and removing unused subgraph)
prog_chunk1 = _make_first_chunk_prog(prog, op_idx)

# Build the second chunk
prog_chunk2 = _make_second_chunk_prog(_load_prog_from_mlmodel(model),
op_idx)

if not args.check_output_correctness:
# Original model no longer needed in memory
del model
gc.collect()

# Convert the MIL Program objects into MLModels
logger.info("Converting the two programs")
model_chunk1 = ct.convert(
prog_chunk1,
convert_to="mlprogram",
compute_units=ct.ComputeUnit.CPU_ONLY,
minimum_deployment_target=ct.target.iOS16,
)
del prog_chunk1
gc.collect()
logger.info("Conversion of first chunk done.")

model_chunk2 = ct.convert(
prog_chunk2,
convert_to="mlprogram",
compute_units=ct.ComputeUnit.CPU_ONLY,
minimum_deployment_target=ct.target.iOS16,
)
del prog_chunk2
gc.collect()
logger.info("Conversion of second chunk done.")

# Verify output correctness
if args.check_output_correctness:
logger.info("Verifying output correctness of chunks")
_verify_output_correctness_of_chunks(
full_model=model,
first_chunk_model=model_chunk1,
second_chunk_model=model_chunk2,
)

# Remove original (non-chunked) model if requested
if args.remove_original:
logger.info(
"Removing original (non-chunked) model at {args.mlpackage_path}")
shutil.rmtree(args.mlpackage_path)
logger.info("Done.")

# Save the chunked models to disk
out_path_chunk1 = os.path.join(args.o, name + "_chunk1.mlpackage")
out_path_chunk2 = os.path.join(args.o, name + "_chunk2.mlpackage")

logger.info(
f"Saved chunks in {args.o} with the suffix _chunk1.mlpackage and _chunk2.mlpackage"
)
model_chunk1.save(out_path_chunk1)
model_chunk2.save(out_path_chunk2)
logger.info("Done.")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--mlpackage-path",
required=True,
help=
"Path to the mlpackage file to be split into two mlpackages of approximately same file size.",
)
parser.add_argument(
"-o",
required=True,
help=
"Path to output directory where the two model chunks should be saved.",
)
parser.add_argument(
"--remove-original",
action="store_true",
help=
"If specified, removes the original (non-chunked) model to avoid duplicating storage."
)
parser.add_argument(
"--check-output-correctness",
action="store_true",
help=
("If specified, compares the outputs of original Core ML model with that of pipelined CoreML model chunks and reports PSNR in dB. ",
"Enabling this feature uses more memory. Disable it if your machine runs out of memory."
))

args = parser.parse_args()
main(args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#
# For licensing see accompanying LICENSE.md file.
# Copyright (C) 2022 Apple Inc. All Rights Reserved.
#

import coremltools as ct

import logging

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

import numpy as np

import os
import time


class CoreMLModel:
""" Wrapper for running CoreML models using coremltools
"""

def __init__(self, model_path, compute_unit):
assert os.path.exists(model_path) and model_path.endswith(".mlpackage")

logger.info(f"Loading {model_path}")

start = time.time()
self.model = ct.models.MLModel(
model_path, compute_units=ct.ComputeUnit[compute_unit])
load_time = time.time() - start
logger.info(f"Done. Took {load_time:.1f} seconds.")

if load_time > LOAD_TIME_INFO_MSG_TRIGGER:
logger.info(
"Loading a CoreML model through coremltools triggers compilation every time. "
"The Swift package we provide uses precompiled Core ML models (.mlmodelc) to avoid compile-on-load."
)


DTYPE_MAP = {
65552: np.float16,
65568: np.float32,
131104: np.int32,
}

self.expected_inputs = {
input_tensor.name: {
"shape": tuple(input_tensor.type.multiArrayType.shape),
"dtype": DTYPE_MAP[input_tensor.type.multiArrayType.dataType],
}
for input_tensor in self.model._spec.description.input
}

def _verify_inputs(self, **kwargs):
for k, v in kwargs.items():
if k in self.expected_inputs:
if not isinstance(v, np.ndarray):
raise TypeError(
f"Expected numpy.ndarray, got {v} for input: {k}")

expected_dtype = self.expected_inputs[k]["dtype"]
if not v.dtype == expected_dtype:
raise TypeError(
f"Expected dtype {expected_dtype}, got {v.dtype} for input: {k}"
)

expected_shape = self.expected_inputs[k]["shape"]
if not v.shape == expected_shape:
raise TypeError(
f"Expected shape {expected_shape}, got {v.shape} for input: {k}"
)
else:
raise ValueError("Received unexpected input kwarg: {k}")

def __call__(self, **kwargs):
self._verify_inputs(**kwargs)
return self.model.predict(kwargs)


LOAD_TIME_INFO_MSG_TRIGGER = 10 # seconds


def _load_mlpackage(submodule_name, mlpackages_dir, model_version,
compute_unit):
""" Load Core ML (mlpackage) models from disk (As exported by torch2coreml.py)
"""
logger.info(f"Loading {submodule_name} mlpackage")

fname = f"Stable_Diffusion_version_{model_version}_{submodule_name}.mlpackage".replace(
"/", "_")
mlpackage_path = os.path.join(mlpackages_dir, fname)

if not os.path.exists(mlpackage_path):
raise FileNotFoundError(
f"{submodule_name} CoreML model doesn't exist at {mlpackage_path}")

return CoreMLModel(mlpackage_path, compute_unit)

def get_available_compute_units():
return tuple(cu for cu in ct.ComputeUnit._member_names_)
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#
# For licensing see accompanying LICENSE.md file.
# Copyright (C) 2022 Apple Inc. All Rights Reserved.
#

import torch
import torch.nn as nn


# Reference: https://github.com/apple/ml-ane-transformers/blob/main/ane_transformers/reference/layer_norm.py
class LayerNormANE(nn.Module):
""" LayerNorm optimized for Apple Neural Engine (ANE) execution
Note: This layer only supports normalization over the final dim. It expects `num_channels`
as an argument and not `normalized_shape` which is used by `torch.nn.LayerNorm`.
"""

def __init__(self,
num_channels,
clip_mag=None,
eps=1e-5,
elementwise_affine=True):
"""
Args:
num_channels: Number of channels (C) where the expected input data format is BC1S. S stands for sequence length.
clip_mag: Optional float value to use for clamping the input range before layer norm is applied.
If specified, helps reduce risk of overflow.
eps: Small value to avoid dividing by zero
elementwise_affine: If true, adds learnable channel-wise shift (bias) and scale (weight) parameters
"""
super().__init__()
# Principle 1: Picking the Right Data Format (machinelearning.apple.com/research/apple-neural-engine)
self.expected_rank = len("BC1S")

self.num_channels = num_channels
self.eps = eps
self.clip_mag = clip_mag
self.elementwise_affine = elementwise_affine

if self.elementwise_affine:
self.weight = nn.Parameter(torch.Tensor(num_channels))
self.bias = nn.Parameter(torch.Tensor(num_channels))

self._reset_parameters()

def _reset_parameters(self):
if self.elementwise_affine:
nn.init.ones_(self.weight)
nn.init.zeros_(self.bias)

def forward(self, inputs):
input_rank = len(inputs.size())

# Principle 1: Picking the Right Data Format (machinelearning.apple.com/research/apple-neural-engine)
# Migrate the data format from BSC to BC1S (most conducive to ANE)
if input_rank == 3 and inputs.size(2) == self.num_channels:
inputs = inputs.transpose(1, 2).unsqueeze(2)
input_rank = len(inputs.size())

assert input_rank == self.expected_rank
assert inputs.size(1) == self.num_channels

if self.clip_mag is not None:
inputs.clamp_(-self.clip_mag, self.clip_mag)

channels_mean = inputs.mean(dim=1, keepdims=True)

zero_mean = inputs - channels_mean

zero_mean_sq = zero_mean * zero_mean

denom = (zero_mean_sq.mean(dim=1, keepdims=True) + self.eps).rsqrt()

out = zero_mean * denom

if self.elementwise_affine:
out = (out + self.bias.view(1, self.num_channels, 1, 1)
) * self.weight.view(1, self.num_channels, 1, 1)

return out

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
coremltools
diffusers[torch]
torch
transformers
scipy
37 changes: 37 additions & 0 deletions backends/swift_backend/ml-stable-diffusion-main/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from setuptools import setup, find_packages

from python_coreml_stable_diffusion._version import __version__

with open('README.md') as f:
readme = f.read()

setup(
name='python_coreml_stable_diffusion',
version=__version__,
url='https://github.com/apple/ml-stable-diffusion',
description="Run Stable Diffusion on Apple Silicon with Core ML (Python and Swift)",
long_description=readme,
long_description_content_type='text/markdown',
author='Apple Inc.',
install_requires=[
"coremltools>=6.1",
"diffusers[torch]",
"torch",
"transformers",
"scipy",
"numpy<1.24",
],
packages=find_packages(),
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Operating System :: MacOS :: MacOS X",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Topic :: Artificial Intelligence",
"Topic :: Scientific/Engineering",
"Topic :: Software Development",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. and The HuggingFace Team. All Rights Reserved.

import Accelerate
import CoreML

/// A scheduler used to compute a de-noised image
///
/// This implementation matches:
/// [Hugging Face Diffusers DPMSolverMultistepScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py)
///
/// It uses the DPM-Solver++ algorithm: [code](https://github.com/LuChengTHU/dpm-solver) [paper](https://arxiv.org/abs/2211.01095).
/// Limitations:
/// - Only implemented for DPM-Solver++ algorithm (not DPM-Solver).
/// - Second order only.
/// - Assumes the model predicts epsilon.
/// - No dynamic thresholding.
/// - `midpoint` solver algorithm.
@available(iOS 16.2, macOS 13.1, *)
public final class DPMSolverMultistepScheduler: Scheduler {
public let trainStepCount: Int
public let inferenceStepCount: Int
public let betas: [Float]
public let alphas: [Float]
public let alphasCumProd: [Float]
public let timeSteps: [Int]

public let alpha_t: [Float]
public let sigma_t: [Float]
public let lambda_t: [Float]

public let solverOrder = 2
private(set) var lowerOrderStepped = 0

/// Whether to use lower-order solvers in the final steps. Only valid for less than 15 inference steps.
/// We empirically find this trick can stabilize the sampling of DPM-Solver, especially with 10 or fewer steps.
public let useLowerOrderFinal = true

// Stores solverOrder (2) items
private(set) var modelOutputs: [MLShapedArray<Float32>] = []

/// Create a scheduler that uses a second order DPM-Solver++ algorithm.
///
/// - Parameters:
/// - stepCount: Number of inference steps to schedule
/// - trainStepCount: Number of training diffusion steps
/// - betaSchedule: Method to schedule betas from betaStart to betaEnd
/// - betaStart: The starting value of beta for inference
/// - betaEnd: The end value for beta for inference
/// - Returns: A scheduler ready for its first step
public init(
stepCount: Int = 50,
trainStepCount: Int = 1000,
betaSchedule: BetaSchedule = .scaledLinear,
betaStart: Float = 0.00085,
betaEnd: Float = 0.012
) {
self.trainStepCount = trainStepCount
self.inferenceStepCount = stepCount

switch betaSchedule {
case .linear:
self.betas = linspace(betaStart, betaEnd, trainStepCount)
case .scaledLinear:
self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 })
}

self.alphas = betas.map({ 1.0 - $0 })
var alphasCumProd = self.alphas
for i in 1..<alphasCumProd.count {
alphasCumProd[i] *= alphasCumProd[i - 1]
}
self.alphasCumProd = alphasCumProd

// Currently we only support VP-type noise shedule
self.alpha_t = vForce.sqrt(self.alphasCumProd)
self.sigma_t = vForce.sqrt(vDSP.subtract([Float](repeating: 1, count: self.alphasCumProd.count), self.alphasCumProd))
self.lambda_t = zip(self.alpha_t, self.sigma_t).map { α, σ in log(α) - log(σ) }

self.timeSteps = linspace(0, Float(self.trainStepCount-1), stepCount+1).dropFirst().reversed().map { Int(round($0)) }
}

/// Convert the model output to the corresponding type the algorithm needs.
/// This implementation is for second-order DPM-Solver++ assuming epsilon prediction.
func convertModelOutput(modelOutput: MLShapedArray<Float32>, timestep: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
assert(modelOutput.scalars.count == sample.scalars.count)
let (alpha_t, sigma_t) = (self.alpha_t[timestep], self.sigma_t[timestep])

// This could be optimized with a Metal kernel if we find we need to
let x0_scalars = zip(modelOutput.scalars, sample.scalars).map { m, s in
(s - m * sigma_t) / alpha_t
}
return MLShapedArray(scalars: x0_scalars, shape: modelOutput.shape)
}

/// One step for the first-order DPM-Solver (equivalent to DDIM).
/// See https://arxiv.org/abs/2206.00927 for the detailed derivation.
/// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
func firstOrderUpdate(
modelOutput: MLShapedArray<Float32>,
timestep: Int,
prevTimestep: Int,
sample: MLShapedArray<Float32>
) -> MLShapedArray<Float32> {
let (p_lambda_t, lambda_s) = (Double(lambda_t[prevTimestep]), Double(lambda_t[timestep]))
let p_alpha_t = Double(alpha_t[prevTimestep])
let (p_sigma_t, sigma_s) = (Double(sigma_t[prevTimestep]), Double(sigma_t[timestep]))
let h = p_lambda_t - lambda_s
// x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output
let x_t = weightedSum(
[p_sigma_t / sigma_s, -p_alpha_t * (exp(-h) - 1)],
[sample, modelOutput]
)
return x_t
}

/// One step for the second-order multistep DPM-Solver++ algorithm, using the midpoint method.
/// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
func secondOrderUpdate(
modelOutputs: [MLShapedArray<Float32>],
timesteps: [Int],
prevTimestep t: Int,
sample: MLShapedArray<Float32>
) -> MLShapedArray<Float32> {
let (s0, s1) = (timesteps[back: 1], timesteps[back: 2])
let (m0, m1) = (modelOutputs[back: 1], modelOutputs[back: 2])
let (p_lambda_t, lambda_s0, lambda_s1) = (Double(lambda_t[t]), Double(lambda_t[s0]), Double(lambda_t[s1]))
let p_alpha_t = Double(alpha_t[t])
let (p_sigma_t, sigma_s0) = (Double(sigma_t[t]), Double(sigma_t[s0]))
let (h, h_0) = (p_lambda_t - lambda_s0, lambda_s0 - lambda_s1)
let r0 = h_0 / h
let D0 = m0

// D1 = (1.0 / r0) * (m0 - m1)
let D1 = weightedSum(
[1/r0, -1/r0],
[m0, m1]
)

// See https://arxiv.org/abs/2211.01095 for detailed derivations
// x_t = (
// (sigma_t / sigma_s0) * sample
// - (alpha_t * (torch.exp(-h) - 1.0)) * D0
// - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1
// )
let x_t = weightedSum(
[p_sigma_t/sigma_s0, -p_alpha_t * (exp(-h) - 1), -0.5 * p_alpha_t * (exp(-h) - 1)],
[sample, D0, D1]
)
return x_t
}

public func step(output: MLShapedArray<Float32>, timeStep t: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
let stepIndex = timeSteps.firstIndex(of: t) ?? timeSteps.count - 1
let prevTimestep = stepIndex == timeSteps.count - 1 ? 0 : timeSteps[stepIndex + 1]

let lowerOrderFinal = useLowerOrderFinal && stepIndex == timeSteps.count - 1 && timeSteps.count < 15
let lowerOrderSecond = useLowerOrderFinal && stepIndex == timeSteps.count - 2 && timeSteps.count < 15
let lowerOrder = lowerOrderStepped < 1 || lowerOrderFinal || lowerOrderSecond

let modelOutput = convertModelOutput(modelOutput: output, timestep: t, sample: sample)
if modelOutputs.count == solverOrder { modelOutputs.removeFirst() }
modelOutputs.append(modelOutput)

let prevSample: MLShapedArray<Float32>
if lowerOrder {
prevSample = firstOrderUpdate(modelOutput: modelOutput, timestep: t, prevTimestep: prevTimestep, sample: sample)
} else {
prevSample = secondOrderUpdate(
modelOutputs: modelOutputs,
timesteps: [timeSteps[stepIndex - 1], t],
prevTimestep: prevTimestep,
sample: sample
)
}
if lowerOrderStepped < solverOrder {
lowerOrderStepped += 1
}

return prevSample
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML
import Accelerate

/// A decoder model which produces RGB images from latent samples
@available(iOS 16.2, macOS 13.1, *)
public struct Decoder: ResourceManaging {

/// VAE decoder model
var model: ManagedMLModel

/// Create decoder from Core ML model
///
/// - Parameters:
/// - url: Location of compiled VAE decoder Core ML model
/// - configuration: configuration to be used when the model is loaded
/// - Returns: A decoder that will lazily load its required resources when needed or requested
public init(modelAt url: URL, configuration: MLModelConfiguration) {
self.model = ManagedMLModel(modelAt: url, configuration: configuration)
}

/// Ensure the model has been loaded into memory
public func loadResources() throws {
try model.loadResources()
}

/// Unload the underlying model to free up memory
public func unloadResources() {
model.unloadResources()
}

/// Batch decode latent samples into images
///
/// - Parameters:
/// - latents: Batch of latent samples to decode
/// - Returns: decoded images
public func decode(_ latents: [MLShapedArray<Float32>]) throws -> [CGImage] {

// Form batch inputs for model
let inputs: [MLFeatureProvider] = try latents.map { sample in
// Reference pipeline scales the latent samples before decoding
let sampleScaled = MLShapedArray<Float32>(
scalars: sample.scalars.map { $0 / 0.18215 },
shape: sample.shape)

let dict = [inputName: MLMultiArray(sampleScaled)]
return try MLDictionaryFeatureProvider(dictionary: dict)
}
let batch = MLArrayBatchProvider(array: inputs)

// Batch predict with model
let results = try model.perform { model in
try model.predictions(fromBatch: batch)
}

// Transform the outputs to CGImages
let images: [CGImage] = (0..<results.count).map { i in
let result = results.features(at: i)
let outputName = result.featureNames.first!
let output = result.featureValue(for: outputName)!.multiArrayValue!

return toRGBCGImage(MLShapedArray<Float32>(output))
}

return images
}

var inputName: String {
try! model.perform { model in
model.modelDescription.inputDescriptionsByName.first!.key
}
}

typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
typealias PixelBufferP8x3 = vImage.PixelBuffer<vImage.Planar8x3>
typealias PixelBufferIFx3 = vImage.PixelBuffer<vImage.InterleavedFx3>
typealias PixelBufferI8x3 = vImage.PixelBuffer<vImage.Interleaved8x3>

func toRGBCGImage(_ array: MLShapedArray<Float32>) -> CGImage {

// array is [N,C,H,W], where C==3
let channelCount = array.shape[1]
assert(channelCount == 3,
"Decoding model output has \(channelCount) channels, expected 3")
let height = array.shape[2]
let width = array.shape[3]

// Normalize each channel into a float between 0 and 1.0
let floatChannels = (0..<channelCount).map { i in

// Normalized channel output
let cOut = PixelBufferPFx1(width: width, height:height)

// Reference this channel in the array and normalize
array[0][i].withUnsafeShapedBufferPointer { ptr, _, strides in
let cIn = PixelBufferPFx1(data: .init(mutating: ptr.baseAddress!),
width: width, height: height,
byteCountPerRow: strides[0]*4)
// Map [-1.0 1.0] -> [0.0 1.0]
cIn.multiply(by: 0.5, preBias: 1.0, postBias: 0.0, destination: cOut)
}
return cOut
}

// Convert to interleaved and then to UInt8
let floatImage = PixelBufferIFx3(planarBuffers: floatChannels)
let uint8Image = PixelBufferI8x3(width: width, height: height)
floatImage.convert(to:uint8Image) // maps [0.0 1.0] -> [0 255] and clips

// Convert to uint8x3 to RGB CGImage (no alpha)
let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.none.rawValue)
let cgImage = uint8Image.makeCGImage(cgImageFormat:
.init(bitsPerComponent: 8,
bitsPerPixel: 3*8,
colorSpace: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: bitmapInfo)!)!

return cgImage
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import CoreML

/// A class to manage and gate access to a Core ML model
///
/// It will automatically load a model into memory when needed or requested
/// It allows one to request to unload the model from memory
@available(iOS 16.2, macOS 13.1, *)
public final class ManagedMLModel: ResourceManaging {

/// The location of the model
var modelURL: URL

/// The configuration to be used when the model is loaded
var configuration: MLModelConfiguration

/// The loaded model (when loaded)
var loadedModel: MLModel?

/// Queue to protect access to loaded model
var queue: DispatchQueue

/// Create a managed model given its location and desired loaded configuration
///
/// - Parameters:
/// - url: The location of the model
/// - configuration: The configuration to be used when the model is loaded/used
/// - Returns: A managed model that has not been loaded
public init(modelAt url: URL, configuration: MLModelConfiguration) {
self.modelURL = url
self.configuration = configuration
self.loadedModel = nil
self.queue = DispatchQueue(label: "managed.\(url.lastPathComponent)")
}

/// Instantiation and load model into memory
public func loadResources() throws {
try queue.sync {
try loadModel()
}
}

/// Unload the model if it was loaded
public func unloadResources() {
queue.sync {
loadedModel = nil
}
}

/// Perform an operation with the managed model via a supplied closure.
/// The model will be loaded and supplied to the closure and should only be
/// used within the closure to ensure all resource management is synchronized
///
/// - Parameters:
/// - body: Closure which performs and action on a loaded model
/// - Returns: The result of the closure
/// - Throws: An error if the model cannot be loaded or if the closure throws
public func perform<R>(_ body: (MLModel) throws -> R) throws -> R {
return try queue.sync {
try autoreleasepool {
try loadModel()
return try body(loadedModel!)
}
}
}

private func loadModel() throws {
if loadedModel == nil {
loadedModel = try MLModel(contentsOf: modelURL,
configuration: configuration)
}
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML

/// A random source consistent with NumPy
///
/// This implementation matches:
/// [NumPy's older randomkit.c](https://github.com/numpy/numpy/blob/v1.0/numpy/random/mtrand/randomkit.c)
///
@available(iOS 16.2, macOS 13.1, *)
struct NumPyRandomSource: RandomNumberGenerator {

struct State {
var key = [UInt32](repeating: 0, count: 624)
var pos: Int = 0
var nextGauss: Double? = nil
}

var state: State

/// Initialize with a random seed
///
/// - Parameters
/// - seed: Seed for underlying Mersenne Twister 19937 generator
/// - Returns random source
init(seed: UInt32) {
state = .init()
var s = seed & 0xffffffff
for i in 0 ..< state.key.count {
state.key[i] = s
s = UInt32((UInt64(1812433253) * UInt64(s ^ (s >> 30)) + UInt64(i) + 1) & 0xffffffff)
}
state.pos = state.key.count
state.nextGauss = nil
}

/// Generate next UInt32 using fast 32bit Mersenne Twister
mutating func nextUInt32() -> UInt32 {
let n = 624
let m = 397
let matrixA: UInt64 = 0x9908b0df
let upperMask: UInt32 = 0x80000000
let lowerMask: UInt32 = 0x7fffffff

var y: UInt32
if state.pos == state.key.count {
for i in 0 ..< (n - m) {
y = (state.key[i] & upperMask) | (state.key[i + 1] & lowerMask)
state.key[i] = state.key[i + m] ^ (y >> 1) ^ UInt32((UInt64(~(y & 1)) + 1) & matrixA)
}
for i in (n - m) ..< (n - 1) {
y = (state.key[i] & upperMask) | (state.key[i + 1] & lowerMask)
state.key[i] = state.key[i + (m - n)] ^ (y >> 1) ^ UInt32((UInt64(~(y & 1)) + 1) & matrixA)
}
y = (state.key[n - 1] & upperMask) | (state.key[0] & lowerMask)
state.key[n - 1] = state.key[m - 1] ^ (y >> 1) ^ UInt32((UInt64(~(y & 1)) + 1) & matrixA)
state.pos = 0
}
y = state.key[state.pos]
state.pos += 1

y ^= (y >> 11)
y ^= (y << 7) & 0x9d2c5680
y ^= (y << 15) & 0xefc60000
y ^= (y >> 18)

return y
}

mutating func next() -> UInt64 {
let low = nextUInt32()
let high = nextUInt32()
return (UInt64(high) << 32) | UInt64(low)
}

/// Generate next random double value
mutating func nextDouble() -> Double {
let a = Double(nextUInt32() >> 5)
let b = Double(nextUInt32() >> 6)
return (a * 67108864.0 + b) / 9007199254740992.0
}

/// Generate next random value from a standard normal
mutating func nextGauss() -> Double {
if let nextGauss = state.nextGauss {
state.nextGauss = nil
return nextGauss
}
var x1, x2, r2: Double
repeat {
x1 = 2.0 * nextDouble() - 1.0
x2 = 2.0 * nextDouble() - 1.0
r2 = x1 * x1 + x2 * x2
} while r2 >= 1.0 || r2 == 0.0

// Box-Muller transform
let f = sqrt(-2.0 * log(r2) / r2)
state.nextGauss = f * x1
return f * x2
}

/// Generates a random value from a normal distribution with given mean and standard deviation.
mutating func nextNormal(mean: Double = 0.0, stdev: Double = 1.0) -> Double {
nextGauss() * stdev + mean
}

/// Generates an array of random values from a normal distribution with given mean and standard deviation.
mutating func normalArray(count: Int, mean: Double = 0.0, stdev: Double = 1.0) -> [Double] {
(0 ..< count).map { _ in nextNormal(mean: mean, stdev: stdev) }
}

/// Generate a shaped array with scalars from a normal distribution with given mean and standard deviation.
mutating func normalShapedArray(_ shape: [Int], mean: Double = 0.0, stdev: Double = 1.0) -> MLShapedArray<Double> {
let count = shape.reduce(1, *)
return .init(scalars: normalArray(count: count, mean: mean, stdev: stdev), shape: shape)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

/// Protocol for managing internal resources
public protocol ResourceManaging {

/// Request resources to be loaded and ready if possible
func loadResources() throws

/// Request resources are unloaded / remove from memory if possible
func unloadResources()
}

extension ResourceManaging {
/// Request resources are pre-warmed by loading and unloading
func prewarmResources() throws {
try loadResources()
unloadResources()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML
import Accelerate

/// Image safety checking model
@available(iOS 16.2, macOS 13.1, *)
public struct SafetyChecker: ResourceManaging {

/// Safety checking Core ML model
var model: ManagedMLModel

/// Creates safety checker
///
/// - Parameters:
/// - url: Location of compiled safety checking Core ML model
/// - configuration: configuration to be used when the model is loaded
/// - Returns: A safety cherker that will lazily load its required resources when needed or requested
public init(modelAt url: URL, configuration: MLModelConfiguration) {
self.model = ManagedMLModel(modelAt: url, configuration: configuration)
}

/// Ensure the model has been loaded into memory
public func loadResources() throws {
try model.loadResources()
}

/// Unload the underlying model to free up memory
public func unloadResources() {
model.unloadResources()
}

typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
typealias PixelBufferP8x1 = vImage.PixelBuffer<vImage.Planar8>
typealias PixelBufferPFx3 = vImage.PixelBuffer<vImage.PlanarFx3>
typealias PixelBufferP8x3 = vImage.PixelBuffer<vImage.Planar8x3>
typealias PixelBufferIFx3 = vImage.PixelBuffer<vImage.InterleavedFx3>
typealias PixelBufferI8x3 = vImage.PixelBuffer<vImage.Interleaved8x3>
typealias PixelBufferI8x4 = vImage.PixelBuffer<vImage.Interleaved8x4>

enum SafetyCheckError: Error {
case imageResizeFailure
case imageToFloatFailure
case modelInputFailure
case unexpectedModelOutput
}

/// Check if image is safe
///
/// - Parameters:
/// - image: Image to check
/// - Returns: Whether the model considers the image to be safe
public func isSafe(_ image: CGImage) throws -> Bool {

let inputName = "clip_input"
let adjustmentName = "adjustment"
let imagesNames = "images"

let inputInfo = try model.perform { model in
model.modelDescription.inputDescriptionsByName
}
let inputShape = inputInfo[inputName]!.multiArrayConstraint!.shape

let width = inputShape[2].intValue
let height = inputShape[3].intValue

let resizedImage = try resizeToRGBA(image, width: width, height: height)

let bufferP8x3 = try getRGBPlanes(of: resizedImage)

let arrayPFx3 = normalizeToFloatShapedArray(bufferP8x3)

guard let input = try? MLDictionaryFeatureProvider(
dictionary:[
// Input that is analyzed for safety
inputName : MLMultiArray(arrayPFx3),
// No adjustment, use default threshold
adjustmentName : MLMultiArray(MLShapedArray<Float32>(scalars: [0], shape: [1])),
// Supplying dummy images to be filtered (will be ignored)
imagesNames : MLMultiArray(shape:[1, 512, 512, 3], dataType: .float16)
]
) else {
throw SafetyCheckError.modelInputFailure
}

let result = try model.perform { model in
try model.prediction(from: input)
}

let output = result.featureValue(for: "has_nsfw_concepts")

guard let unsafe = output?.multiArrayValue?[0].boolValue else {
throw SafetyCheckError.unexpectedModelOutput
}

return !unsafe
}

func resizeToRGBA(_ image: CGImage,
width: Int, height: Int) throws -> CGImage {

guard let context = CGContext(
data: nil,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: width*4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue) else {
throw SafetyCheckError.imageResizeFailure
}

context.interpolationQuality = .high
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
guard let resizedImage = context.makeImage() else {
throw SafetyCheckError.imageResizeFailure
}

return resizedImage
}

func getRGBPlanes(of rgbaImage: CGImage) throws -> PixelBufferP8x3 {
// Reference as interleaved 8 bit vImage PixelBuffer
var emptyFormat = vImage_CGImageFormat()
guard let bufferI8x4 = try? PixelBufferI8x4(
cgImage: rgbaImage,
cgImageFormat:&emptyFormat) else {
throw SafetyCheckError.imageToFloatFailure
}

// Drop the alpha channel, keeping RGB
let bufferI8x3 = PixelBufferI8x3(width: rgbaImage.width, height:rgbaImage.height)
bufferI8x4.convert(to: bufferI8x3, channelOrdering: .RGBA)

// De-interleave into 8-bit planes
return PixelBufferP8x3(interleavedBuffer: bufferI8x3)
}

func normalizeToFloatShapedArray(_ bufferP8x3: PixelBufferP8x3) -> MLShapedArray<Float32> {
let width = bufferP8x3.width
let height = bufferP8x3.height

let means = [0.485, 0.456, 0.406] as [Float]
let stds = [0.229, 0.224, 0.225] as [Float]

// Convert to normalized float 1x3xWxH input (plannar)
let arrayPFx3 = MLShapedArray<Float32>(repeating: 0.0, shape: [1, 3, width, height])
for c in 0..<3 {
arrayPFx3[0][c].withUnsafeShapedBufferPointer { ptr, _, strides in
let floatChannel = PixelBufferPFx1(data: .init(mutating: ptr.baseAddress!),
width: width, height: height,
byteCountPerRow: strides[0]*4)

bufferP8x3.withUnsafePixelBuffer(at: c) { uint8Channel in
uint8Channel.convert(to: floatChannel) // maps [0 255] -> [0 1]
floatChannel.multiply(by: 1.0/stds[c],
preBias: -means[c],
postBias: 0.0,
destination: floatChannel)
}
}
}
return arrayPFx3
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation

/// A utility for timing events and tracking time statistics
///
/// Typical usage
/// ```
/// let timer: SampleTimer
///
/// for i in 0...<iterationCount {
/// timer.start()
/// doStuff()
/// timer.stop()
/// }
///
/// print(String(format: "mean: %.2f, var: %.2f",
/// timer.mean, timer.variance))
/// ```
@available(iOS 16.2, macOS 13.1, *)
public final class SampleTimer: Codable {
var startTime: CFAbsoluteTime?
var sum: Double = 0.0
var sumOfSquares: Double = 0.0
var count = 0
var samples: [Double] = []

public init() {}

/// Start a sample, noting the current time
public func start() {
startTime = CFAbsoluteTimeGetCurrent()
}

// Stop a sample and record the elapsed time
@discardableResult public func stop() -> Double {
guard let startTime = startTime else {
return 0
}

let elapsed = CFAbsoluteTimeGetCurrent() - startTime
sum += elapsed
sumOfSquares += elapsed * elapsed
count += 1
samples.append(elapsed)
return elapsed
}

/// Mean of all sampled times
public var mean: Double { sum / Double(count) }

/// Variance of all sampled times
public var variance: Double {
guard count > 1 else {
return 0.0
}
return sumOfSquares / Double(count - 1) - mean * mean
}

/// Standard deviation of all sampled times
public var stdev: Double { variance.squareRoot() }

/// Median of all sampled times
public var median: Double {
let sorted = samples.sorted()
let (q, r) = sorted.count.quotientAndRemainder(dividingBy: 2)
if r == 0 {
return (sorted[q] + sorted[q - 1]) / 2.0
} else {
return Double(sorted[q])
}
}

public var allSamples: [Double] {
samples
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import CoreML

@available(iOS 16.2, macOS 13.1, *)
public protocol Scheduler {
/// Number of diffusion steps performed during training
var trainStepCount: Int { get }

/// Number of inference steps to be performed
var inferenceStepCount: Int { get }

/// Training diffusion time steps index by inference time step
var timeSteps: [Int] { get }

/// Schedule of betas which controls the amount of noise added at each timestep
var betas: [Float] { get }

/// 1 - betas
var alphas: [Float] { get }

/// Cached cumulative product of alphas
var alphasCumProd: [Float] { get }

/// Standard deviation of the initial noise distribution
var initNoiseSigma: Float { get }

/// Compute a de-noised image sample and step scheduler state
///
/// - Parameters:
/// - output: The predicted residual noise output of learned diffusion model
/// - timeStep: The current time step in the diffusion chain
/// - sample: The current input sample to the diffusion model
/// - Returns: Predicted de-noised sample at the previous time step
/// - Postcondition: The scheduler state is updated.
/// The state holds the current sample and history of model output noise residuals
func step(
output: MLShapedArray<Float32>,
timeStep t: Int,
sample s: MLShapedArray<Float32>
) -> MLShapedArray<Float32>
}

@available(iOS 16.2, macOS 13.1, *)
public extension Scheduler {
var initNoiseSigma: Float { 1 }
}

@available(iOS 16.2, macOS 13.1, *)
public extension Scheduler {
/// Compute weighted sum of shaped arrays of equal shapes
///
/// - Parameters:
/// - weights: The weights each array is multiplied by
/// - values: The arrays to be weighted and summed
/// - Returns: sum_i weights[i]*values[i]
func weightedSum(_ weights: [Double], _ values: [MLShapedArray<Float32>]) -> MLShapedArray<Float32> {
assert(weights.count > 1 && values.count == weights.count)
assert(values.allSatisfy({ $0.scalarCount == values.first!.scalarCount }))
var w = Float(weights.first!)
var scalars = values.first!.scalars.map({ $0 * w })
for next in 1 ..< values.count {
w = Float(weights[next])
let nextScalars = values[next].scalars
for i in 0 ..< scalars.count {
scalars[i] += w * nextScalars[i]
}
}
return MLShapedArray(scalars: scalars, shape: values.first!.shape)
}
}

/// How to map a beta range to a sequence of betas to step over
@available(iOS 16.2, macOS 13.1, *)
public enum BetaSchedule {
/// Linear stepping between start and end
case linear
/// Steps using linspace(sqrt(start),sqrt(end))^2
case scaledLinear
}


/// A scheduler used to compute a de-noised image
///
/// This implementation matches:
/// [Hugging Face Diffusers PNDMScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py)
///
/// This scheduler uses the pseudo linear multi-step (PLMS) method only, skipping pseudo Runge-Kutta (PRK) steps
@available(iOS 16.2, macOS 13.1, *)
public final class PNDMScheduler: Scheduler {
public let trainStepCount: Int
public let inferenceStepCount: Int
public let betas: [Float]
public let alphas: [Float]
public let alphasCumProd: [Float]
public let timeSteps: [Int]

// Internal state
var counter: Int
var ets: [MLShapedArray<Float32>]
var currentSample: MLShapedArray<Float32>?

/// Create a scheduler that uses a pseudo linear multi-step (PLMS) method
///
/// - Parameters:
/// - stepCount: Number of inference steps to schedule
/// - trainStepCount: Number of training diffusion steps
/// - betaSchedule: Method to schedule betas from betaStart to betaEnd
/// - betaStart: The starting value of beta for inference
/// - betaEnd: The end value for beta for inference
/// - Returns: A scheduler ready for its first step
public init(
stepCount: Int = 50,
trainStepCount: Int = 1000,
betaSchedule: BetaSchedule = .scaledLinear,
betaStart: Float = 0.00085,
betaEnd: Float = 0.012
) {
self.trainStepCount = trainStepCount
self.inferenceStepCount = stepCount

switch betaSchedule {
case .linear:
self.betas = linspace(betaStart, betaEnd, trainStepCount)
case .scaledLinear:
self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 })
}
self.alphas = betas.map({ 1.0 - $0 })
var alphasCumProd = self.alphas
for i in 1..<alphasCumProd.count {
alphasCumProd[i] *= alphasCumProd[i - 1]
}
self.alphasCumProd = alphasCumProd
let stepsOffset = 1 // For stable diffusion
let stepRatio = Float(trainStepCount / stepCount )
let forwardSteps = (0..<stepCount).map {
Int((Float($0) * stepRatio).rounded()) + stepsOffset
}

var timeSteps: [Int] = []
timeSteps.append(contentsOf: forwardSteps.dropLast(1))
timeSteps.append(timeSteps.last!)
timeSteps.append(forwardSteps.last!)
timeSteps.reverse()

self.timeSteps = timeSteps
self.counter = 0
self.ets = []
self.currentSample = nil
}

/// Compute a de-noised image sample and step scheduler state
///
/// - Parameters:
/// - output: The predicted residual noise output of learned diffusion model
/// - timeStep: The current time step in the diffusion chain
/// - sample: The current input sample to the diffusion model
/// - Returns: Predicted de-noised sample at the previous time step
/// - Postcondition: The scheduler state is updated.
/// The state holds the current sample and history of model output noise residuals
public func step(
output: MLShapedArray<Float32>,
timeStep t: Int,
sample s: MLShapedArray<Float32>
) -> MLShapedArray<Float32> {

var timeStep = t
let stepInc = (trainStepCount / inferenceStepCount)
var prevStep = timeStep - stepInc
var modelOutput = output
var sample = s

if counter != 1 {
if ets.count > 3 {
ets = Array(ets[(ets.count - 3)..<ets.count])
}
ets.append(output)
} else {
prevStep = timeStep
timeStep = timeStep + stepInc
}

if ets.count == 1 && counter == 0 {
modelOutput = output
currentSample = sample
} else if ets.count == 1 && counter == 1 {
modelOutput = weightedSum(
[1.0/2.0, 1.0/2.0],
[output, ets[back: 1]]
)
sample = currentSample!
currentSample = nil
} else if ets.count == 2 {
modelOutput = weightedSum(
[3.0/2.0, -1.0/2.0],
[ets[back: 1], ets[back: 2]]
)
} else if ets.count == 3 {
modelOutput = weightedSum(
[23.0/12.0, -16.0/12.0, 5.0/12.0],
[ets[back: 1], ets[back: 2], ets[back: 3]]
)
} else {
modelOutput = weightedSum(
[55.0/24.0, -59.0/24.0, 37/24.0, -9/24.0],
[ets[back: 1], ets[back: 2], ets[back: 3], ets[back: 4]]
)
}

let prevSample = previousSample(sample, timeStep, prevStep, modelOutput)
counter += 1
return prevSample
}

/// Compute sample (denoised image) at previous step given a current time step
///
/// - Parameters:
/// - sample: The current input to the model x_t
/// - timeStep: The current time step t
/// - prevStep: The previous time step t−δ
/// - modelOutput: Predicted noise residual the current time step e_θ(x_t, t)
/// - Returns: Computes previous sample x_(t−δ)
func previousSample(
_ sample: MLShapedArray<Float32>,
_ timeStep: Int,
_ prevStep: Int,
_ modelOutput: MLShapedArray<Float32>
) -> MLShapedArray<Float32> {

// Compute x_(t−δ) using formula (9) from
// "Pseudo Numerical Methods for Diffusion Models on Manifolds",
// Luping Liu, Yi Ren, Zhijie Lin & Zhou Zhao.
// ICLR 2022
//
// Notation:
//
// alphaProdt α_t
// alphaProdtPrev α_(t−δ)
// betaProdt (1 - α_t)
// betaProdtPrev (1 - α_(t−δ))
let alphaProdt = alphasCumProd[timeStep]
let alphaProdtPrev = alphasCumProd[max(0,prevStep)]
let betaProdt = 1 - alphaProdt
let betaProdtPrev = 1 - alphaProdtPrev

// sampleCoeff = (α_(t−δ) - α_t) divided by
// denominator of x_t in formula (9) and plus 1
// Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) =
// sqrt(α_(t−δ)) / sqrt(α_t))
let sampleCoeff = sqrt(alphaProdtPrev / alphaProdt)

// Denominator of e_θ(x_t, t) in formula (9)
let modelOutputDenomCoeff = alphaProdt * sqrt(betaProdtPrev)
+ sqrt(alphaProdt * betaProdt * alphaProdtPrev)

// full formula (9)
let modelCoeff = -(alphaProdtPrev - alphaProdt)/modelOutputDenomCoeff
let prevSample = weightedSum(
[Double(sampleCoeff), Double(modelCoeff)],
[sample, modelOutput]
)

return prevSample
}
}

/// Evenly spaced floats between specified interval
///
/// - Parameters:
/// - start: Start of the interval
/// - end: End of the interval
/// - count: The number of floats to return between [*start*, *end*]
/// - Returns: Float array with *count* elements evenly spaced between at *start* and *end*
func linspace(_ start: Float, _ end: Float, _ count: Int) -> [Float] {
let scale = (end - start) / Float(count - 1)
return (0..<count).map { Float($0)*scale + start }
}

extension Collection {
/// Collection element index from the back. *self[back: 1]* yields the last element
public subscript(back i: Int) -> Element {
return self[index(endIndex, offsetBy: -i)]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML

@available(iOS 16.2, macOS 13.1, *)
public extension StableDiffusionPipeline {

struct ResourceURLs {

public let textEncoderURL: URL
public let unetURL: URL
public let unetChunk1URL: URL
public let unetChunk2URL: URL
public let decoderURL: URL
public let safetyCheckerURL: URL
public let vocabURL: URL
public let mergesURL: URL

public init(resourcesAt baseURL: URL) {
textEncoderURL = baseURL.appending(path: "TextEncoder.mlmodelc")
unetURL = baseURL.appending(path: "Unet.mlmodelc")
unetChunk1URL = baseURL.appending(path: "UnetChunk1.mlmodelc")
unetChunk2URL = baseURL.appending(path: "UnetChunk2.mlmodelc")
decoderURL = baseURL.appending(path: "VAEDecoder.mlmodelc")
safetyCheckerURL = baseURL.appending(path: "SafetyChecker.mlmodelc")
vocabURL = baseURL.appending(path: "vocab.json")
mergesURL = baseURL.appending(path: "merges.txt")
}
}

/// Create stable diffusion pipeline using model resources at a
/// specified URL
///
/// - Parameters:
/// - baseURL: URL pointing to directory holding all model
/// and tokenization resources
/// - configuration: The configuration to load model resources with
/// - disableSafety: Load time disable of safety to save memory
/// - reduceMemory: Setup pipeline in reduced memory mode
/// - Returns:
/// Pipeline ready for image generation if all necessary resources loaded
init(resourcesAt baseURL: URL,
configuration config: MLModelConfiguration = .init(),
disableSafety: Bool = false,
reduceMemory: Bool = false) throws {

/// Expect URL of each resource
let urls = ResourceURLs(resourcesAt: baseURL)

// Text tokenizer and encoder
let tokenizer = try BPETokenizer(mergesAt: urls.mergesURL, vocabularyAt: urls.vocabURL)
let textEncoder = TextEncoder(tokenizer: tokenizer,
modelAt: urls.textEncoderURL,
configuration: config)

// Unet model
let unet: Unet
if FileManager.default.fileExists(atPath: urls.unetChunk1URL.path) &&
FileManager.default.fileExists(atPath: urls.unetChunk2URL.path) {
unet = Unet(chunksAt: [urls.unetChunk1URL, urls.unetChunk2URL],
configuration: config)
} else {
unet = Unet(modelAt: urls.unetURL, configuration: config)
}

// Image Decoder
let decoder = Decoder(modelAt: urls.decoderURL, configuration: config)

// Optional safety checker
var safetyChecker: SafetyChecker? = nil
if !disableSafety &&
FileManager.default.fileExists(atPath: urls.safetyCheckerURL.path) {
safetyChecker = SafetyChecker(modelAt: urls.safetyCheckerURL, configuration: config)
}

// Construct pipeline
self.init(textEncoder: textEncoder,
unet: unet,
decoder: decoder,
safetyChecker: safetyChecker,
reduceMemory: reduceMemory)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,388 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML
import Accelerate
import CoreGraphics
import Dispatch


/// Schedulers compatible with StableDiffusionPipeline
public enum StableDiffusionScheduler {
/// Scheduler that uses a pseudo-linear multi-step (PLMS) method
case pndmScheduler
/// Scheduler that uses a second order DPM-Solver++ algorithm
case dpmSolverMultistepScheduler
}

/// A pipeline used to generate image samples from text input using stable diffusion
///
/// This implementation matches:
/// [Hugging Face Diffusers Pipeline](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py)
@available(iOS 16.2, macOS 13.1, *)

public struct StableDiffusionPipeline: ResourceManaging {

/// Model to generate embeddings for tokenized input text
var textEncoder: TextEncoder

/// Model used to predict noise residuals given an input, diffusion time step, and conditional embedding
var unet: Unet

/// Model used to generate final image from latent diffusion process
var decoder: Decoder

/// Optional model for checking safety of generated image
var safetyChecker: SafetyChecker? = nil


/// Reports whether this pipeline can perform safety checks
public var canSafetyCheck: Bool {
safetyChecker != nil
}

/// Option to reduce memory during image generation
///
/// If true, the pipeline will lazily load TextEncoder, Unet, Decoder, and SafetyChecker
/// when needed and aggressively unload their resources after
///
/// This will increase latency in favor of reducing memory
var reduceMemory: Bool = false

/// Creates a pipeline using the specified models and tokenizer
///
/// - Parameters:
/// - textEncoder: Model for encoding tokenized text
/// - unet: Model for noise prediction on latent samples
/// - decoder: Model for decoding latent sample to image
/// - safetyChecker: Optional model for checking safety of generated images
/// - reduceMemory: Option to enable reduced memory mode
/// - Returns: Pipeline ready for image generation
public init(textEncoder: TextEncoder,
unet: Unet,
decoder: Decoder,
safetyChecker: SafetyChecker? = nil,
reduceMemory: Bool = false) {
self.textEncoder = textEncoder
self.unet = unet
self.decoder = decoder
self.safetyChecker = safetyChecker
self.reduceMemory = reduceMemory
}



/// Load required resources for this pipeline
///
/// If reducedMemory is true this will instead call prewarmResources instead
/// and let the pipeline lazily load resources as needed
public func loadResources() throws {
if reduceMemory {
try prewarmResources()
} else {
try textEncoder.loadResources()
try unet.loadResources()
try decoder.loadResources()
try safetyChecker?.loadResources()
}
}

/// Unload the underlying resources to free up memory
public func unloadResources() {
textEncoder.unloadResources()
unet.unloadResources()
decoder.unloadResources()
safetyChecker?.unloadResources()
}

// Prewarm resources one at a time
public func prewarmResources() throws {
try textEncoder.prewarmResources()
print("sdbk mlpr 25")
print("sdbk mlms done 25 of 100.0")
try unet.prewarmResources()
print("sdbk mlpr 50")
print("sdbk mlms done 75 of 100.0")
try decoder.prewarmResources()
print("Decoder prewarmed")
print("sdbk mlpr 100")
print("sdbk mlms done 100 of 100.0")
try safetyChecker?.prewarmResources()
print("SafetyChecker prewarmed")
}



/// Text to image generation using stable diffusion
///
/// - Parameters:
/// - prompt: Text prompt to guide sampling
/// - negativePrompt: Negative text prompt to guide sampling
/// - stepCount: Number of inference steps to perform
/// - imageCount: Number of samples/images to generate for the input prompt
/// - seed: Random seed which
/// - guidanceScale: Controls the influence of the text prompt on sampling process (0=random images)
/// - disableSafety: Safety checks are only performed if `self.canSafetyCheck && !disableSafety`
/// - progressHandler: Callback to perform after each step, stops on receiving false response
/// - Returns: An array of `imageCount` optional images.
/// The images will be nil if safety checks were performed and found the result to be un-safe
public func generateImages(
prompt: String,
negativePrompt: String = "",
imageCount: Int = 1,
stepCount: Int = 50,
seed: UInt32 = 0,
guidanceScale: Float = 7.5,
input_image: CGImage? = nil,
mask_image: CGImage? = nil,
input_image_strength: Float = 0.5,
disableSafety: Bool = false,
scheduler: StableDiffusionScheduler = .pndmScheduler,
progressHandler: (Progress) -> Bool = { _ in true }
) throws -> [CGImage?] {

// Encode the input prompt and negative prompt
let promptEmbedding = try textEncoder.encode(prompt)
let negativePromptEmbedding = try textEncoder.encode(negativePrompt)

let stdinQueue = DispatchQueue(label: "my.serial.queue")
var inputCharacters: [CChar] = []

let stdinSource = DispatchSource.makeReadSource(fileDescriptor: STDIN_FILENO, queue: stdinQueue)
stdinSource.setEventHandler(handler: {
var c = CChar()
if read(STDIN_FILENO, &c, 1) == 1 {
inputCharacters.append(c);
}
})
stdinSource.resume()

// Return next input character, or `nil` if there is none.
func getch() -> CChar? {
return stdinQueue.sync {
inputCharacters.isEmpty ? nil : inputCharacters.remove(at: 0)
}
}

func gets() -> String {
var input = ""
while let c = getch() {
if c == 10 {
break
}
input.append(Character(UnicodeScalar(UInt8(c))))
}
return input
}

if reduceMemory {
textEncoder.unloadResources()
}

// Convert to Unet hidden state representation
// Concatenate the prompt and negative prompt embeddings
let concatEmbedding = MLShapedArray<Float32>(
concatenating: [negativePromptEmbedding, promptEmbedding],
alongAxis: 0
)

let hiddenStates = toHiddenStates(concatEmbedding)

/// Setup schedulers
let scheduler: [Scheduler] = (0..<imageCount).map { _ in
switch scheduler {
case .pndmScheduler: return PNDMScheduler(stepCount: stepCount)
case .dpmSolverMultistepScheduler: return DPMSolverMultistepScheduler(stepCount: stepCount)
}
}
let stdev = scheduler[0].initNoiseSigma


// Generate random latent samples from specified seed
var latents = generateLatentSamples(imageCount, stdev: stdev, seed: seed)

// De-noising loop
for (step,t) in scheduler[0].timeSteps.enumerated() {
let input = gets()
if input == "b2s t2im __stop__" {
return []
}

// Expand the latents for classifier-free guidance
// and input to the Unet noise prediction model
let latentUnetInput = latents.map {
MLShapedArray<Float32>(concatenating: [$0, $0], alongAxis: 0)
}

// Predict noise residuals from latent samples
// and current time step conditioned on hidden states
var noise = try unet.predictNoise(
latents: latentUnetInput,
timeStep: t,
hiddenStates: hiddenStates
)

noise = performGuidance(noise, guidanceScale)

// Have the scheduler compute the previous (t-1) latent
// sample given the predicted noise and current sample
for i in 0..<imageCount {
latents[i] = scheduler[i].step(
output: noise[i],
timeStep: t,
sample: latents[i]
)
}

// Report progress
let progress = Progress(
pipeline: self,
prompt: prompt,
step: step,
stepCount: stepCount,
currentLatentSamples: latents,
isSafetyEnabled: canSafetyCheck && !disableSafety
)
if !progressHandler(progress) {
// Stop if requested by handler
return []
}
}

if reduceMemory {
unet.unloadResources()
}

// Decode the latent samples to images
return try decodeToImages(latents, disableSafety: disableSafety)
}

func generateLatentSamples(_ count: Int, stdev: Float, seed: UInt32) -> [MLShapedArray<Float32>] {
// func generateLatentSamples(_ count: Int, stdev: Float, seed: UInt32, input_image: CGImage? = nil, mask_image: CGImage? = nil, input_img_noise_t) -> [MLShapedArray<Float32>] {
// func generateLatentSamples(_ count: Int, stdev: Float, seed: UInt32, input_image: CGImage? = nil, mask_image: CGImage? = nil, input_img_noise_t: Float) -> [MLShapedArray<Float32>] {
// var sampleShape = unet.latentSampleShape
// sampleShape[0] = 1

// let samples = (0..<count).map { i in
// let seed = seed + UInt32(1234*i) % UInt32.max
// var random = NumPyRandomSource(seed: seed)
// return MLShapedArray<Float32>(
// converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev)))
// }
// return samples

///////
// alphas = [_ALPHAS_CUMPROD[t] for t in timesteps]
// alphas_prev = [1.0] + alphas[:-1]

// if input_image is None or self.is_sd_15_inpaint :
// latent_np = np.random.RandomState(seed).normal(size=(batch_size, n_h, n_w, 4)).astype('float32')
// latent = tf.convert_to_tensor(latent_np)
// else:
// latent = self.encoder_f(input_image[None])
// latent = self.add_noise(latent, input_img_noise_t, seed)
// latent = tf.repeat(latent , batch_size , axis=0)
////////


// if input_image == nil {
var sampleShape = unet.latentSampleShape
sampleShape[0] = 1

let samples = (0..<count).map { i in
let seed = seed + UInt32(1234*i) % UInt32.max
var random = NumPyRandomSource(seed: seed)
return MLShapedArray<Float32>(
converting: random.normalShapedArray(sampleShape, mean: 0.0, stdev: Double(stdev)))
}
return samples
// }
}

func toHiddenStates(_ embedding: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
// Unoptimized manual transpose [0, 2, None, 1]
// e.g. From [2, 77, 768] to [2, 768, 1, 77]
let fromShape = embedding.shape
let stateShape = [fromShape[0],fromShape[2], 1, fromShape[1]]
var states = MLShapedArray<Float32>(repeating: 0.0, shape: stateShape)
for i0 in 0..<fromShape[0] {
for i1 in 0..<fromShape[1] {
for i2 in 0..<fromShape[2] {
states[scalarAt:i0,i2,0,i1] = embedding[scalarAt:i0, i1, i2]
}
}
}
return states
}

func performGuidance(_ noise: [MLShapedArray<Float32>], _ guidanceScale: Float) -> [MLShapedArray<Float32>] {
noise.map { performGuidance($0, guidanceScale) }
}

func performGuidance(_ noise: MLShapedArray<Float32>, _ guidanceScale: Float) -> MLShapedArray<Float32> {

let blankNoiseScalars = noise[0].scalars
let textNoiseScalars = noise[1].scalars

var resultScalars = blankNoiseScalars

for i in 0..<resultScalars.count {
// unconditioned + guidance*(text - unconditioned)
resultScalars[i] += guidanceScale*(textNoiseScalars[i]-blankNoiseScalars[i])
}

var shape = noise.shape
shape[0] = 1
return MLShapedArray<Float32>(scalars: resultScalars, shape: shape)
}

func decodeToImages(_ latents: [MLShapedArray<Float32>],
disableSafety: Bool) throws -> [CGImage?] {

let images = try decoder.decode(latents)
if reduceMemory {
decoder.unloadResources()
}

// If safety is disabled return what was decoded
if disableSafety {
return images
}

// If there is no safety checker return what was decoded
guard let safetyChecker = safetyChecker else {
return images
}

// Otherwise change images which are not safe to nil
let safeImages = try images.map { image in
try safetyChecker.isSafe(image) ? image : nil
}

if reduceMemory {
safetyChecker.unloadResources()
}

return safeImages
}

}

@available(iOS 16.2, macOS 13.1, *)
extension StableDiffusionPipeline {
/// Sampling progress details
public struct Progress {
public let pipeline: StableDiffusionPipeline
public let prompt: String
public let step: Int
public let stepCount: Int
public let currentLatentSamples: [MLShapedArray<Float32>]
public let isSafetyEnabled: Bool
public var currentImages: [CGImage?] {
try! pipeline.decodeToImages(
currentLatentSamples,
disableSafety: !isSafetyEnabled)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML

/// A model for encoding text
@available(iOS 16.2, macOS 13.1, *)
public struct TextEncoder: ResourceManaging {

/// Text tokenizer
var tokenizer: BPETokenizer

/// Embedding model
var model: ManagedMLModel

/// Creates text encoder which embeds a tokenized string
///
/// - Parameters:
/// - tokenizer: Tokenizer for input text
/// - url: Location of compiled text encoding Core ML model
/// - configuration: configuration to be used when the model is loaded
/// - Returns: A text encoder that will lazily load its required resources when needed or requested
public init(tokenizer: BPETokenizer,
modelAt url: URL,
configuration: MLModelConfiguration) {
self.tokenizer = tokenizer
self.model = ManagedMLModel(modelAt: url, configuration: configuration)
}

/// Ensure the model has been loaded into memory
public func loadResources() throws {
try model.loadResources()
}

/// Unload the underlying model to free up memory
public func unloadResources() {
model.unloadResources()
}

/// Encode input text/string
///
/// - Parameters:
/// - text: Input text to be tokenized and then embedded
/// - Returns: Embedding representing the input text
public func encode(_ text: String) throws -> MLShapedArray<Float32> {

// Get models expected input length
let inputLength = inputShape.last!

// Tokenize, padding to the expected length
var (tokens, ids) = tokenizer.tokenize(input: text, minCount: inputLength)

// Truncate if necessary
if ids.count > inputLength {
tokens = tokens.dropLast(tokens.count - inputLength)
ids = ids.dropLast(ids.count - inputLength)
let truncated = tokenizer.decode(tokens: tokens)
print("Needed to truncate input '\(text)' to '\(truncated)'")
}

// Use the model to generate the embedding
return try encode(ids: ids)
}

/// Prediction queue
let queue = DispatchQueue(label: "textencoder.predict")

func encode(ids: [Int]) throws -> MLShapedArray<Float32> {
let inputName = inputDescription.name
let inputShape = inputShape

let floatIds = ids.map { Float32($0) }
let inputArray = MLShapedArray<Float32>(scalars: floatIds, shape: inputShape)
let inputFeatures = try! MLDictionaryFeatureProvider(
dictionary: [inputName: MLMultiArray(inputArray)])

let result = try model.perform { model in
try model.prediction(from: inputFeatures)
}

let embeddingFeature = result.featureValue(for: "last_hidden_state")
return MLShapedArray<Float32>(converting: embeddingFeature!.multiArrayValue!)
}

var inputDescription: MLFeatureDescription {
try! model.perform { model in
model.modelDescription.inputDescriptionsByName.first!.value
}
}

var inputShape: [Int] {
inputDescription.multiArrayConstraint!.shape.map { $0.intValue }
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation
import CoreML

/// U-Net noise prediction model for stable diffusion
@available(iOS 16.2, macOS 13.1, *)
public struct Unet: ResourceManaging {

/// Model used to predict noise residuals given an input, diffusion time step, and conditional embedding
///
/// It can be in the form of a single model or multiple stages
var models: [ManagedMLModel]

/// Creates a U-Net noise prediction model
///
/// - Parameters:
/// - url: Location of single U-Net compiled Core ML model
/// - configuration: Configuration to be used when the model is loaded
/// - Returns: U-net model that will lazily load its required resources when needed or requested
public init(modelAt url: URL,
configuration: MLModelConfiguration) {
self.models = [ManagedMLModel(modelAt: url, configuration: configuration)]
}

/// Creates a U-Net noise prediction model
///
/// - Parameters:
/// - urls: Location of chunked U-Net via urls to each compiled chunk
/// - configuration: Configuration to be used when the model is loaded
/// - Returns: U-net model that will lazily load its required resources when needed or requested
public init(chunksAt urls: [URL],
configuration: MLModelConfiguration) {
self.models = urls.map { ManagedMLModel(modelAt: $0, configuration: configuration) }
}

/// Load resources.
public func loadResources() throws {
for model in models {
try model.loadResources()
}
}

/// Unload the underlying model to free up memory
public func unloadResources() {
for model in models {
model.unloadResources()
}
}

/// Pre-warm resources
public func prewarmResources() throws {
// Override default to pre-warm each model
for model in models {
try model.loadResources()
model.unloadResources()
}
}

var latentSampleDescription: MLFeatureDescription {
try! models.first!.perform { model in
model.modelDescription.inputDescriptionsByName["sample"]!
}
}

/// The expected shape of the models latent sample input
public var latentSampleShape: [Int] {
latentSampleDescription.multiArrayConstraint!.shape.map { $0.intValue }
}

/// Batch prediction noise from latent samples
///
/// - Parameters:
/// - latents: Batch of latent samples in an array
/// - timeStep: Current diffusion timestep
/// - hiddenStates: Hidden state to condition on
/// - Returns: Array of predicted noise residuals
func predictNoise(
latents: [MLShapedArray<Float32>],
timeStep: Int,
hiddenStates: MLShapedArray<Float32>
) throws -> [MLShapedArray<Float32>] {

// Match time step batch dimension to the model / latent samples
let t = MLShapedArray<Float32>(scalars:[Float(timeStep), Float(timeStep)],shape:[2])

// Form batch input to model
let inputs = try latents.map {
let dict: [String: Any] = [
"sample" : MLMultiArray($0),
"timestep" : MLMultiArray(t),
"encoder_hidden_states": MLMultiArray(hiddenStates)
]
return try MLDictionaryFeatureProvider(dictionary: dict)
}
let batch = MLArrayBatchProvider(array: inputs)

// Make predictions
let results = try predictions(from: batch)

// Pull out the results in Float32 format
let noise = (0..<results.count).map { i in

let result = results.features(at: i)
let outputName = result.featureNames.first!

let outputNoise = result.featureValue(for: outputName)!.multiArrayValue!

// To conform to this func return type make sure we return float32
// Use the fact that the concatenating constructor for MLMultiArray
// can do type conversion:
let fp32Noise = MLMultiArray(
concatenating: [outputNoise],
axis: 0,
dataType: .float32
)
return MLShapedArray<Float32>(fp32Noise)
}

return noise
}

func predictions(from batch: MLBatchProvider) throws -> MLBatchProvider {

var results = try models.first!.perform { model in
try model.predictions(fromBatch: batch)
}

if models.count == 1 {
return results
}

// Manual pipeline batch prediction
let inputs = batch.arrayOfFeatureValueDictionaries
for stage in models.dropFirst() {

// Combine the original inputs with the outputs of the last stage
let next = try results.arrayOfFeatureValueDictionaries
.enumerated().map { (index, dict) in
let nextDict = dict.merging(inputs[index]) { (out, _) in out }
return try MLDictionaryFeatureProvider(dictionary: nextDict)
}
let nextBatch = MLArrayBatchProvider(array: next)

// Predict
results = try stage.perform { model in
try model.predictions(fromBatch: nextBatch)
}
}

return results
}
}

extension MLFeatureProvider {
var featureValueDictionary: [String : MLFeatureValue] {
self.featureNames.reduce(into: [String : MLFeatureValue]()) { result, name in
result[name] = self.featureValue(for: name)
}
}
}

extension MLBatchProvider {
var arrayOfFeatureValueDictionaries: [[String : MLFeatureValue]] {
(0..<self.count).map {
self.features(at: $0).featureValueDictionary
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation

@available(iOS 16.2, macOS 13.1, *)
extension BPETokenizer {
enum FileReadError: Error {
case invalidMergeFileLine(Int)
}

/// Read vocab.json file at URL into a dictionary mapping a String to its Int token id
static func readVocabulary(url: URL) throws -> [String: Int] {
let content = try Data(contentsOf: url)
return try JSONDecoder().decode([String: Int].self, from: content)
}

/// Read merges.txt file at URL into a dictionary mapping bigrams to the line number/rank/priority
static func readMerges(url: URL) throws -> [TokenPair: Int] {
let content = try String(contentsOf: url)
let lines = content.split(separator: "\n")

let merges: [(TokenPair, Int)] = try lines.enumerated().compactMap { (index, line) in
if line.hasPrefix("#") {
return nil
}
let pair = line.split(separator: " ")
if pair.count != 2 {
throw FileReadError.invalidMergeFileLine(index+1)
}
return (TokenPair(String(pair[0]), String(pair[1])),index)
}
return [TokenPair : Int](uniqueKeysWithValues: merges)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import Foundation

/// A tokenizer based on byte pair encoding.
@available(iOS 16.2, macOS 13.1, *)
public struct BPETokenizer {
/// A dictionary that maps pairs of tokens to the rank/order of the merge.
let merges: [TokenPair : Int]

/// A dictionary from of tokens to identifiers.
let vocabulary: [String: Int]

/// The start token.
let startToken: String = "<|startoftext|>"

/// The end token.
let endToken: String = "<|endoftext|>"

/// The token used for padding
let padToken: String = "<|endoftext|>"

/// The unknown token.
let unknownToken: String = "<|endoftext|>"

var unknownTokenID: Int {
vocabulary[unknownToken, default: 0]
}

/// Creates a tokenizer.
///
/// - Parameters:
/// - merges: A dictionary that maps pairs of tokens to the rank/order of the merge.
/// - vocabulary: A dictionary from of tokens to identifiers.
public init(merges: [TokenPair: Int], vocabulary: [String: Int]) {
self.merges = merges
self.vocabulary = vocabulary
}

/// Creates a tokenizer by loading merges and vocabulary from URLs.
///
/// - Parameters:
/// - mergesURL: The URL of a text file containing merges.
/// - vocabularyURL: The URL of a JSON file containing the vocabulary.
public init(mergesAt mergesURL: URL, vocabularyAt vocabularyURL: URL) throws {
self.merges = try Self.readMerges(url: mergesURL)
self.vocabulary = try! Self.readVocabulary(url: vocabularyURL)
}

/// Tokenizes an input string.
///
/// - Parameters:
/// - input: A string.
/// - minCount: The minimum number of tokens to return.
/// - Returns: An array of tokens and an array of token identifiers.
public func tokenize(input: String, minCount: Int? = nil) -> (tokens: [String], tokenIDs: [Int]) {
var tokens: [String] = []

tokens.append(startToken)
tokens.append(contentsOf: encode(input: input))
tokens.append(endToken)

// Pad if there was a min length specified
if let minLen = minCount, minLen > tokens.count {
tokens.append(contentsOf: repeatElement(padToken, count: minLen - tokens.count))
}

let ids = tokens.map({ vocabulary[$0, default: unknownTokenID] })
return (tokens: tokens, tokenIDs: ids)
}

/// Returns the token identifier for a token.
public func tokenID(for token: String) -> Int? {
vocabulary[token]
}

/// Returns the token for a token identifier.
public func token(id: Int) -> String? {
vocabulary.first(where: { $0.value == id })?.key
}

/// Decodes a sequence of tokens into a fully formed string
public func decode(tokens: [String]) -> String {
String(tokens.joined())
.replacingOccurrences(of: "</w>", with: " ")
.replacingOccurrences(of: startToken, with: "")
.replacingOccurrences(of: endToken, with: "")
}

/// Encode an input string to a sequence of tokens
func encode(input: String) -> [String] {
let normalized = input.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
let words = normalized.split(separator: " ")
return words.flatMap({ encode(word: $0) })
}

/// Encode a single word into a sequence of tokens
func encode(word: Substring) -> [String] {
var tokens = word.map { String($0) }
if let last = tokens.indices.last {
tokens[last] = tokens[last] + "</w>"
}

while true {
let pairs = pairs(for: tokens)
let canMerge = pairs.filter { merges[$0] != nil }

if canMerge.isEmpty {
break
}

// If multiple merges are found, use the one with the lowest rank
let shouldMerge = canMerge.min { merges[$0]! < merges[$1]! }!
tokens = update(tokens, merging: shouldMerge)
}
return tokens
}

/// Get the set of adjacent pairs / bigrams from a sequence of tokens
func pairs(for tokens: [String]) -> Set<TokenPair> {
guard tokens.count > 1 else {
return Set()
}

var pairs = Set<TokenPair>(minimumCapacity: tokens.count - 1)
var prev = tokens.first!
for current in tokens.dropFirst() {
pairs.insert(TokenPair(prev, current))
prev = current
}
return pairs
}

/// Update the sequence of tokens by greedily merging instance of a specific bigram
func update(_ tokens: [String], merging bigram: TokenPair) -> [String] {
guard tokens.count > 1 else {
return []
}

var newTokens = [String]()
newTokens.reserveCapacity(tokens.count - 1)

var index = 0
while index < tokens.count {
let remainingTokens = tokens[index...]
if let startMatchIndex = remainingTokens.firstIndex(of: bigram.first) {
// Found a possible match, append everything before it
newTokens.append(contentsOf: tokens[index..<startMatchIndex])

if index < tokens.count - 1 && tokens[startMatchIndex + 1] == bigram.second {
// Full match, merge
newTokens.append(bigram.first + bigram.second)
index = startMatchIndex + 2
} else {
// Only matched the first, no merge
newTokens.append(bigram.first)
index = startMatchIndex + 1
}
} else {
// Didn't find any more matches, append the rest unmerged
newTokens.append(contentsOf: remainingTokens)
break
}
}
return newTokens
}
}

@available(iOS 16.2, macOS 13.1, *)
extension BPETokenizer {

/// A hashable tuple of strings
public struct TokenPair: Hashable {
let first: String
let second: String

init(_ first: String, _ second: String) {
self.first = first
self.second = second
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import ArgumentParser
import CoreGraphics
import CoreML
import Foundation
import StableDiffusion
import UniformTypeIdentifiers

@available(iOS 16.2, macOS 13.1, *)
struct StableDiffusionSample: ParsableCommand {

static let configuration = CommandConfiguration(
abstract: "Run stable diffusion to generate images guided by a text prompt",
version: "0.1"
)

@Argument(help: "Input string prompt")
var prompt: String

@Option(help: "Input string negative prompt")
var negativePrompt: String = ""

@Option(
help: ArgumentHelp(
"Path to stable diffusion resources.",
discussion: "The resource directory should contain\n" +
" - *compiled* models: {TextEncoder,Unet,VAEDecoder}.mlmodelc\n" +
" - tokenizer info: vocab.json, merges.txt",
valueName: "directory-path"
)
)
var resourcePath: String = "./"

@Option(help: "Number of images to sample / generate")
var imageCount: Int = 1

@Option(help: "Number of diffusion steps to perform")
var stepCount: Int = 50

@Option(
help: ArgumentHelp(
"How often to save samples at intermediate steps",
discussion: "Set to 0 to only save the final sample"
)
)
var saveEvery: Int = 0

@Option(help: "Output path")
var outputPath: String = "./"

@Option(help: "Random seed")
var seed: UInt32 = 93

@Option(help: "Controls the influence of the text prompt on sampling process (0=random images)")
var guidanceScale: Float = 7.5

@Option(help: "Compute units to load model with {all,cpuOnly,cpuAndGPU,cpuAndNeuralEngine}")
var computeUnits: ComputeUnits = .all

@Option(help: "Scheduler to use, one of {pndm, dpmpp}")
var scheduler: SchedulerOption = .pndm

@Flag(help: "Disable safety checking")
var disableSafety: Bool = false

@Flag(help: "Reduce memory usage")
var reduceMemory: Bool = false

mutating func run() throws {
guard FileManager.default.fileExists(atPath: resourcePath) else {
throw RunError.resources("Resource path does not exist \(resourcePath)")
}

let config = MLModelConfiguration()
config.computeUnits = computeUnits.asMLComputeUnits
let resourceURL = URL(filePath: resourcePath)

log("Loading resources and creating pipeline\n")
log("(Note: This can take a while the first time using these resources)\n")
let pipeline = try StableDiffusionPipeline(resourcesAt: resourceURL,
configuration: config,
disableSafety: disableSafety,
reduceMemory: reduceMemory)
try pipeline.loadResources()

log("Sampling ...\n")
let sampleTimer = SampleTimer()
sampleTimer.start()

let images = try pipeline.generateImages(
prompt: prompt,
negativePrompt: negativePrompt,
imageCount: imageCount,
stepCount: stepCount,
seed: seed,
guidanceScale: guidanceScale,
scheduler: scheduler.stableDiffusionScheduler
) { progress in
sampleTimer.stop()
handleProgress(progress,sampleTimer)
if progress.stepCount != progress.step {
sampleTimer.start()
}
return true
}

_ = try saveImages(images, logNames: true)
}

func handleProgress(
_ progress: StableDiffusionPipeline.Progress,
_ sampleTimer: SampleTimer
) {
log("\u{1B}[1A\u{1B}[K")
log("Step \(progress.step) of \(progress.stepCount) ")
log(" [")
log(String(format: "mean: %.2f, ", 1.0/sampleTimer.mean))
log(String(format: "median: %.2f, ", 1.0/sampleTimer.median))
log(String(format: "last %.2f", 1.0/sampleTimer.allSamples.last!))
log("] step/sec")

if saveEvery > 0, progress.step % saveEvery == 0 {
let saveCount = (try? saveImages(progress.currentImages, step: progress.step)) ?? 0
log(" saved \(saveCount) image\(saveCount != 1 ? "s" : "")")
}
log("\n")
}

func saveImages(
_ images: [CGImage?],
step: Int? = nil,
logNames: Bool = false
) throws -> Int {
let url = URL(filePath: outputPath)
var saved = 0
for i in 0 ..< images.count {

guard let image = images[i] else {
if logNames {
log("Image \(i) failed safety check and was not saved")
}
continue
}

let name = imageName(i, step: step)
let fileURL = url.appending(path:name)

guard let dest = CGImageDestinationCreateWithURL(fileURL as CFURL, UTType.png.identifier as CFString, 1, nil) else {
throw RunError.saving("Failed to create destination for \(fileURL)")
}
CGImageDestinationAddImage(dest, image, nil)
if !CGImageDestinationFinalize(dest) {
throw RunError.saving("Failed to save \(fileURL)")
}
if logNames {
log("Saved \(name)\n")
}
saved += 1
}
return saved
}

func imageName(_ sample: Int, step: Int? = nil) -> String {
let fileCharLimit = 75
var name = prompt.prefix(fileCharLimit).replacingOccurrences(of: " ", with: "_")
if imageCount != 1 {
name += ".\(sample)"
}

name += ".\(seed)"

if let step = step {
name += ".\(step)"
} else {
name += ".final"
}
name += ".png"
return name
}

func log(_ str: String, term: String = "") {
print(str, terminator: term)
}
}

enum RunError: Error {
case resources(String)
case saving(String)
}

@available(iOS 16.2, macOS 13.1, *)
enum ComputeUnits: String, ExpressibleByArgument, CaseIterable {
case all, cpuAndGPU, cpuOnly, cpuAndNeuralEngine
var asMLComputeUnits: MLComputeUnits {
switch self {
case .all: return .all
case .cpuAndGPU: return .cpuAndGPU
case .cpuOnly: return .cpuOnly
case .cpuAndNeuralEngine: return .cpuAndNeuralEngine
}
}
}

@available(iOS 16.2, macOS 13.1, *)
enum SchedulerOption: String, ExpressibleByArgument {
case pndm, dpmpp
var stableDiffusionScheduler: StableDiffusionScheduler {
switch self {
case .pndm: return .pndmScheduler
case .dpmpp: return .dpmSolverMultistepScheduler
}
}
}

if #available(iOS 16.2, macOS 13.1, *) {
StableDiffusionSample.main()
} else {
print("Unsupported OS")
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// For licensing see accompanying LICENSE.md file.
// Copyright (C) 2022 Apple Inc. All Rights Reserved.

import XCTest
import CoreML
@testable import StableDiffusion

@available(iOS 16.2, macOS 13.1, *)
final class StableDiffusionTests: XCTestCase {

var vocabFileInBundleURL: URL {
let fileName = "vocab"
guard let url = Bundle.module.url(forResource: fileName, withExtension: "json") else {
fatalError("BPE tokenizer vocabulary file is missing from bundle")
}
return url
}

var mergesFileInBundleURL: URL {
let fileName = "merges"
guard let url = Bundle.module.url(forResource: fileName, withExtension: "txt") else {
fatalError("BPE tokenizer merges file is missing from bundle")
}
return url
}

func testBPETokenizer() throws {

let tokenizer = try BPETokenizer(mergesAt: mergesFileInBundleURL, vocabularyAt: vocabFileInBundleURL)

func testPrompt(prompt: String, expectedIds: [Int]) {

let (tokens, ids) = tokenizer.tokenize(input: prompt)

print("Tokens = \(tokens)\n")
print("Expected tokens = \(expectedIds.map({ tokenizer.token(id: $0) }))")
print("ids = \(ids)\n")
print("Expected Ids = \(expectedIds)\n")

XCTAssertEqual(ids,expectedIds)
}

testPrompt(prompt: "a photo of an astronaut riding a horse on mars",
expectedIds: [49406, 320, 1125, 539, 550, 18376, 6765, 320, 4558, 525, 7496, 49407])

testPrompt(prompt: "Apple CoreML developer tools on a Macbook Air are fast",
expectedIds: [49406, 3055, 19622, 5780, 10929, 5771, 525, 320, 20617,
1922, 631, 1953, 49407])
}

func test_randomNormalValues_matchNumPyRandom() {
var random = NumPyRandomSource(seed: 12345)
let samples = random.normalArray(count: 10_000)
let last5 = samples.suffix(5)

// numpy.random.seed(12345); print(numpy.random.randn(10000)[-5:])
let expected = [-0.86285345, 2.15229409, -0.00670556, -1.21472309, 0.65498866]

for (value, expected) in zip(last5, expected) {
XCTAssertEqual(value, expected, accuracy: .ulpOfOne.squareRoot())
}
}
}
Empty file.

Large diffs are not rendered by default.

45 changes: 42 additions & 3 deletions electron_app/src/App.vue
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
</transition>
</div>
<ApplicationFrame ref="app_frame" v-else title="DiffusionBee - Stable Diffusion App"
:app_state="app_state"

@menu_item_click_about="show_about"
@menu_item_click_help="open_url('https://diffusionbee.com/')"
@@ -104,7 +105,7 @@
<script>
import { bind_app_component } from "./py_vue_bridge.js"
import { send_to_py } from "./py_vue_bridge.js"
import { send_to_py, send_to_swift } from "./py_vue_bridge.js"
import {native_confirm, native_alert } from "./native_functions_vue_bridge.js"
import StableDiffusion from "./StableDiffusion.vue"
import SplashScreen from './components_bare/SplashScreen.vue'
@@ -146,7 +147,6 @@ export default
this.stable_diffusion = this.$refs.stable_diffusion;
bind_app_component(this);
send_to_py("strt");
if( require('../package.json').is_dev || require('../package.json').build_number.includes("dev") )
alert("Not checking for updates.")
@@ -176,10 +176,49 @@ export default
if(!data.custom_models){
data.custom_models = {}
}
if (!data.selected_model) {
data.selected_model = ""
}
if( data ){
Vue.set(this.app_state , 'app_data' , data)
}
let custom_models = window.ipcRenderer.sendSync('list_custom_models');
Vue.set(this.app_state.app_data , 'custom_models' , {})
let macos_version = window.ipcRenderer.sendSync('get_macos_version');
console.log("macOS version: " + macos_version);
for (let i = 0; i < custom_models.length; i++) {
const model_name = custom_models[i].name;
const model_path = custom_models[i].path;
if( model_name.endsWith(".tdict") ){
if( !data.custom_models[model_name.slice(0,-6)] ){
Vue.set(this.app_state.app_data.custom_models , model_name.slice(0,-6) , {
name : model_name.slice(0,-6),
orig_path : model_path,
is_coreml : false})
}
}
else{
// macos 13.1 required for coreml
if (macos_version >= 22.2){
if( !data.custom_models[model_name] ){
Vue.set(this.app_state.app_data.custom_models , model_name + " [CoreML ]", {
name : model_name,
orig_path : model_path,
is_coreml : true})
}
}
}
}
if (data.selected_model.endsWith(" [CoreML ]")) {
send_to_swift("strt");
}
else {
send_to_py("strt");
}
},
40 changes: 29 additions & 11 deletions electron_app/src/StableDiffusion.vue
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
</template>
<script>
import { send_to_py } from "./py_vue_bridge.js"
import { send_to_py, send_to_swift } from "./py_vue_bridge.js"
import {get_tokens} from './clip_tokeniser/clip_encoder.js'
import {compute_time_remaining} from "./utils.js"
const moment = require('moment')
@@ -94,13 +94,11 @@ export default {
// }
// }
if(msg_code == "nwim"){
if (this.$parent.app_state.app_data.settings.notification_sound == true) {
notification_sound.play();
}
let impath = msg.substring(5).trim()
if(this.attached_cbs){
if(this.attached_cbs.on_img)
if(this.attached_cbs.on_img){
this.attached_cbs.on_img(impath);
}
}
}
@@ -124,6 +122,7 @@ export default {
if(msg_code == "mltl"){
let p = (msg.substring(5).trim());
this.model_loading_title = p;
this.is_backend_loaded = false;
}
@@ -163,18 +162,27 @@ export default {
}
}
}
if (msg_code == "igws") {
if (this.$parent.app_state.app_data.settings.notification_sound == true) {
notification_sound.play();
}
}
} ,
interupt(){
send_to_py("t2im __stop__")
interupt(is_coreml = false){
if(is_coreml){
send_to_swift("t2im __stop__")
}
else{
send_to_py("t2im __stop__")
}
this.attached_cbs = undefined;
},
text_to_img(prompt_params, callbacks, generated_by){
text_to_img(prompt_params, callbacks, generated_by, is_coreml){
if(!this.is_input_avail)
return;
let tokens = [49406].concat((get_tokens(prompt_params.prompt))).concat([49407])
@@ -196,16 +204,26 @@ export default {
if(prompt_params.negative_prompt){
prompt_params.negative_prompt = remove_non_ascii(prompt_params.negative_prompt)
}
}
// console.log(this.$parent.app_state.app_data.settings.live_render);
if (this.$parent.app_state.app_data.settings.live_render) {
prompt_params.save_every = 1;
}
this.last_iter_t = Date.now()
this.generated_by = generated_by;
this.attached_cbs = callbacks;
this.generation_state_msg = ""
this.remaining_times = ""
this.iter_times = []
this.nb_its = prompt_params.ddim_steps||25
send_to_py("t2im " + JSON.stringify(prompt_params))
if (is_coreml) {
send_to_swift("t2im " + JSON.stringify(prompt_params))
} else {
send_to_py("t2im " + JSON.stringify(prompt_params))
}
}
},
73 changes: 61 additions & 12 deletions electron_app/src/bridge.js
Original file line number Diff line number Diff line change
@@ -8,30 +8,38 @@ var is_app_closing = false;

var last_few_err = ""

function start_bridge() {
var change_backend = false;

let script_path = process.env.PY_SCRIPT || "./src/fake_backend.py";

function start_bridge(bin_path = null) {

console.log("starting briddddd")
const fs = require('fs')

let script_path = process.env.PY_SCRIPT || "./src/fake_backend.py";
let bin_path = process.env.BIN_PATH;
if(bin_path && (fs.existsSync(script_path))){
python = require('child_process').spawn( bin_path );
}
else if (fs.existsSync(script_path)) {
if (bin_path && (fs.existsSync(bin_path))) {
change_backend = true;
if (python) python.kill();
python = require('child_process').spawn(bin_path);
} else if (fs.existsSync(script_path)) {
change_backend = true;
if (python) python.kill();
console.log("using python, script path: " + script_path)
python = require('child_process').spawn('python3', [script_path]);
}
else{
} else {
change_backend = true;
if (python) python.kill();
const path = require('path');
let backend_path = path.join(path.dirname(__dirname), 'core' , 'diffusionbee_backend' );
python = require('child_process').spawn( backend_path );
let backend_path = path.join(path.dirname(__dirname), 'core', 'diffusionbee_backend');
python = require('child_process').spawn(backend_path);
}


python.stdin.setEncoding('utf-8');

python.stdout.on('data', function(data) {
console.log("Python response: ", data.toString('utf8'));
change_backend = false;


if(! data.toString().includes("sdbk ")){
@@ -72,6 +80,7 @@ function start_bridge() {
});

python.on('close', (code) => {
if (change_backend) return;
// if( code != 0 )
// {
// dialog.showMessageBox("Backend quit unexpectedly")
@@ -97,11 +106,30 @@ function start_bridge() {
}


function on_msg_recieve(msg) { // on new msg from python

if (msg.substring(0, 4) == "py2b") {
on_msg_from_py(msg.substring(5))
} else if (msg.substring(0, 4) == "adlg") {
add_log(msg.substring(5))
} else {
alert("recieved unk message " + msg.toString())
}

}


var use = "python"

ipcMain.on('to_python_sync', (event, arg) => {
if (use != "python") {
start_bridge();
use = "python"
}
if (python) {
event.returnValue = "ok";
// console("sending to py from main " + arg )
python.stdin.write("b2py " + arg.toString() + "\n")
console.log(arg.toString())

} else {
console.log("Python not binded yet!");
@@ -118,7 +146,28 @@ ipcMain.on('to_python_async', (event, arg) => {



ipcMain.on('to_swift_sync', (event, arg) => {
if (use != "swift") {
start_bridge("./src/Debug/swiftbackend_diffusionbee");
use = "swift"
}
if (python) {
event.returnValue = "ok";
python.stdin.write("b2s " + arg.toString() + "\n")
console.log(arg.toString())

} else {
console.log("Python not binded yet!");
event.returnValue = "not_ok";
}
})


ipcMain.on('to_swift_async', (event, arg) => {
if (python) {
python.stdin.write("b2s " + arg.toString() + "\n")
}
})



29 changes: 28 additions & 1 deletion electron_app/src/components/Img2Img.vue
Original file line number Diff line number Diff line change
@@ -150,6 +150,14 @@ export default {
seed = Number(this.seed);
else
seed = Math.floor(Math.random() * 100000);
this.selected_model = this.app_state.app_data.selected_model;
const is_coreml = this.selected_model != "Default" && this.app_state.app_data.custom_models[this.selected_model] ? this.app_state.app_data.custom_models[this.selected_model].is_coreml : false;
console.log("is_coreml", is_coreml);
if (is_coreml) {
Vue.$toast.default('CoreML model for img2img not yet available, please select a different model')
return;
}
if(this.prompt.trim() == ""){
Vue.$toast.default('You need to enter a prompt')
@@ -211,6 +219,16 @@ export default {
let callbacks = {
on_img(img_path){
let exist = false;
for (let i = 0; i < that.generated_images.length; i++) {
const element = that.generated_images[i];
let element_without_suffix = element.split('?')[0];
if (element_without_suffix == img_path) {
exist = true;
break;
}
}
if (!exist) {
that.generated_images.push(img_path);
let p = {
@@ -228,7 +246,16 @@ export default {
that.app_state.app_data.history[history_key].imgs.push(img_path)
console.log(that.app_state.app_data.history)
} else {
const img_index = that.generated_images.findIndex((element) => {
let element_without_suffix = element.split('?')[0];
return element_without_suffix == img_path;
});
this.update_img(img_index, img_path);
}
},
update_img(index, img_path) {
Vue.set(that.generated_images, index, img_path + "?v=" + Math.random());
},
on_progress(p){
that.done_percentage = p;
116 changes: 86 additions & 30 deletions electron_app/src/components/ImgGenerate.vue
Original file line number Diff line number Diff line change
@@ -182,8 +182,9 @@ export default {
num_imgs : this.num_imgs ,
batch_size : this.batch_size
}
if(this.selected_model && this.selected_model != "Default" && this.app_state.app_data.custom_models[this.selected_model] ){
this.selected_model = this.app_state.app_data.selected_model;
if (this.selected_model && this.selected_model != "Default" && this.app_state.app_data.custom_models[this.selected_model]) {
params.model_id = -1;
params.custom_model_path = this.app_state.app_data.custom_models[this.selected_model].path;
}
@@ -206,42 +207,97 @@ export default {
let history_key = Math.random();
let callbacks = {
on_img(img_path){
that.generated_images.push(img_path);
if(!(that.app_state.app_data.history[history_key])){
let p = {
"prompt":that.prompt , "seed": seed, "img_w":that.img_w , "img_h":that.img_h , "key":history_key , "imgs" : [],
"guidence_scale" : that.guidence_scale , "dif_steps" : that.dif_steps
on_img(img_path) {
let exist = false;
for (let i = 0; i < that.generated_images.length; i++) {
const element = that.generated_images[i];
let element_without_suffix = element.split('?')[0];
if (element_without_suffix == img_path) {
exist = true;
break;
}
if(that.stable_diffusion.model_version)
p['model_version'] = that.stable_diffusion.model_version;
if(that.is_negative_prompt_avail)
p['negative_prompt'] = that.negative_prompt;
Vue.set(that.app_state.app_data.history, history_key , p);
}
that.app_state.app_data.history[history_key].imgs.push(img_path)
console.log(that.app_state.app_data.history)
if (!exist) {
that.generated_images.push(img_path);
if (!(that.app_state.app_data.history[history_key])) {
let p = {
"prompt": that.prompt, "seed": seed, "img_w": that.img_w, "img_h": that.img_h, "key": history_key, "imgs": [],
"guidence_scale": that.guidence_scale, "dif_steps": that.dif_steps
}
if (that.stable_diffusion.model_version)
p['model_version'] = that.stable_diffusion.model_version;
if (that.is_negative_prompt_avail)
p['negative_prompt'] = that.negative_prompt;
Vue.set(that.app_state.app_data.history, history_key, p);
}
that.app_state.app_data.history[history_key].imgs.push(img_path);
} else {
const img_index = that.generated_images.findIndex((element) => {
let element_without_suffix = element.split('?')[0];
return element_without_suffix == img_path;
});
this.update_img(img_index, img_path);
}
return;
},
update_img(index, img_path) {
Vue.set(that.generated_images, index, img_path + "?v=" + Math.random());
},
on_progress(p ){
that.done_percentage = p;
on_progress(p) {
that.done_percentage = p;
},
on_err(err){
on_err(err) {
that.backend_error = err;
},
dragElement(elmnt) {
var pos1 = 0, pos2 = 0, pos3 = 0, pos4 = 0;
if (document.getElementById(elmnt.id + "header")) {
// if present, the header is where you move the DIV from:
document.getElementById(elmnt.id + "header").onmousedown = dragMouseDown;
} else {
// otherwise, move the DIV from anywhere inside the DIV:
elmnt.onmousedown = dragMouseDown;
}
function dragMouseDown(e) {
e = e || window.event;
e.preventDefault();
// get the mouse cursor position at startup:
pos3 = e.clientX;
pos4 = e.clientY;
document.onmouseup = closeDragElement;
// call a function whenever the cursor moves:
document.onmousemove = elementDrag;
}
function elementDrag(e) {
e = e || window.event;
e.preventDefault();
// calculate the new cursor position:
pos1 = pos3 - e.clientX;
pos2 = pos4 - e.clientY;
pos3 = e.clientX;
pos4 = e.clientY;
// set the element's new position:
elmnt.style.top = (elmnt.offsetTop - pos2) + "px";
elmnt.style.left = (elmnt.offsetLeft - pos1) + "px";
}
function closeDragElement() {
// stop moving when mouse button is released:
document.onmouseup = null;
document.onmousemove = null;
}
}
}
this.is_stopping = false;
if(this.stable_diffusion)
this.stable_diffusion.text_to_img(params, callbacks, 'txt2img');
} ,
const is_coreml = this.selected_model != "Default" && this.app_state.app_data.custom_models[this.selected_model] ? this.app_state.app_data.custom_models[this.selected_model].is_coreml : false;
if (this.stable_diffusion)
this.stable_diffusion.text_to_img(params, callbacks, 'txt2img', is_coreml);
},
open_arthub(){
@@ -250,7 +306,8 @@ export default {
stop_generation(){
this.is_stopping = true;
this.stable_diffusion.interupt();
const is_coreml = this.selected_model != "Default" && this.app_state.app_data.custom_models[this.selected_model] ? this.app_state.app_data.custom_models[this.selected_model].is_coreml : false;
this.stable_diffusion.interupt(is_coreml);
},
add_style(tag){
@@ -338,5 +395,4 @@ export default {
}
}
</style>
29 changes: 24 additions & 5 deletions electron_app/src/components/Settings.vue
Original file line number Diff line number Diff line change
@@ -19,6 +19,19 @@
</label>
</div>
</div>
<div class="setting_box">
<div class="settings_left">
<h3>Live render preview</h3>
<p>To display a preview of the image as it is being rendered, at each step of the generation process</p>
</div>
<hr>
<div style="float:right;margin-right: 9px;align-self: center;" >
<label class="switch">
<input type="checkbox" v-model="app_state.app_data.settings.live_render">
<span class="toggle round"></span>
</label>
</div>
</div>
<hr>
<!--
<div class="setting_box">
@@ -41,9 +54,10 @@
<!-- <br> -->
<hr>
<div v-for="model in Object.values(this.app_state.app_data.custom_models)" :key="model.name">
<div class="l_button" @click="delete_model(model.name)" style="float:right">Remove</div>
<div class="l_button" @click="delete_model(model.name, model.is_coreml)" style="float:right">Remove</div>
<p> Name : {{model.name}} </p>
<p> Path : {{model.orig_path}} </p>
<span v-if="model.is_coreml" style="color:white;background:#02b3b6;border-radius:10px;padding:2px 5px 2px 5px;font-size:12px;">CoreML Model</span>
<hr>
</div>
</div>
@@ -73,10 +87,15 @@ export default {
};
},
methods: {
delete_model(k){
let model_path = this.app_state.app_data.custom_models[k].path;
window.ipcRenderer.sendSync('delete_file', model_path );
Vue.delete( this.app_state.app_data.custom_models , k );
delete_model(k, coreml){
let model_path = coreml ? this.app_state.app_data.custom_models[k+" [CoreML ]"].orig_path : this.app_state.app_data.custom_models[k].orig_path;
if (coreml) {
window.ipcRenderer.sendSync('delete_dir', model_path );
Vue.delete( this.app_state.app_data.custom_models , k+" [CoreML ]" );
} else {
window.ipcRenderer.sendSync('delete_file', model_path );
Vue.delete( this.app_state.app_data.custom_models , k );
}
},
add_model(){
let that = this;
11 changes: 11 additions & 0 deletions electron_app/src/components_bare/ApplicationFrame.vue
Original file line number Diff line number Diff line change
@@ -91,12 +91,14 @@
</div>
</template>
<script>
import { send_to_py, send_to_swift } from "../py_vue_bridge.js"
export default {
name: 'ApplicationFrame',
components: {},
props: {
title: String,
app_state: Object,
},
data: function() {
@@ -116,6 +118,15 @@ export default {
methods: {
selectTab(tab) {
this.selected_tab = tab;
if (tab == "inpainting" || tab == "outpainting")
return send_to_py("start");
if (this.app_state.app_data.selected_model.endsWith(" [CoreML ]")) {
send_to_swift("start");
}
else {
send_to_py("start");
}
},
detect_windows_os(){
17 changes: 14 additions & 3 deletions electron_app/src/components_bare/SDOptionsDropdown.vue
Original file line number Diff line number Diff line change
@@ -236,9 +236,9 @@
stroke="#A2A3AA" stroke-width="2" stroke-linecap="round"
stroke-linejoin="round" />
</svg>

<b-form-select v-model="options_model_values.selected_model"
:options="['Default'].concat(Object.keys(options_model_values.app_state.app_data.custom_models))"
<b-form-select v-model="options_model_values.app_state.app_data.selected_model"
:options="['Default'].concat(Object.keys(options_model_values.app_state.app_data.custom_models).map((model_name) => model_name))"
@change="setModel"
required></b-form-select>
</div>
</div>
@@ -270,6 +270,8 @@
</div>
</template>
<script>
import { send_to_py, send_to_swift } from "../py_vue_bridge.js"
import Vue from "vue"
export default {
name: 'SDOptionsDropdown',
props: {
@@ -291,6 +293,15 @@ export default {
return {};
},
methods: {
setModel(e) {
const model = e;
if ( model != "Default" && this.options_model_values.app_state.app_data.custom_models[model].is_coreml){
send_to_swift("start");
} else{
send_to_py("start");
}
Vue.set(this.options_model_values.app_state.app_data, 'selected_model', model);
},
SetStrength(e) {
var value = (e.target.value - e.target.min) / (e.target.max - e.target.min) * 100
e.target.style.background = 'linear-gradient(to right, var(--slider-progress) 0%, var(--slider-progress) ' + value + '%, var(--slider-progress_end) ' + value + '%, var(--slider-progress_end) 100%)'
51 changes: 48 additions & 3 deletions electron_app/src/native_functions.js
Original file line number Diff line number Diff line change
@@ -143,6 +143,12 @@ ipcMain.on('save_file', (event, arg) => {
})


ipcMain.on('file_exist', (event, arg) => {
let p1 = arg.split("||")[0];
let exist = require('fs').existsSync(p1);
event.returnValue = exist;
});




@@ -176,6 +182,18 @@ ipcMain.on('get_instance_id', (event, arg) => {

})

ipcMain.on('get_macos_version', (event, arg) => {
let version = -1;
if (process.platform == 'darwin') {
const os = require('os');
const osVersion = os.release();
version = parseFloat(osVersion);
}

event.returnValue = version;
})



ipcMain.on('unfreeze_win', (event, arg) => {

@@ -419,6 +437,18 @@ ipcMain.on('delete_file', (event, fpath) => {

})

ipcMain.on('delete_dir', (event, fpath) => {
const fs = require('fs');
try{
fs.rmdirSync(fpath, { recursive: true });
console.log("deleted")
event.returnValue = true;
} catch {
console.log("err in deleting")
event.returnValue = false;
}

})

function run_realesrgan(input_path , cb ){
const path = require('path');
@@ -511,22 +541,37 @@ ipcMain.handle('run_realesrgan', async (event, arg) => {




ipcMain.on('list_custom_models', (event, arg) => {
const path = require('path');
const fs = require('fs');
const homedir = require('os').homedir();
let models_path = path.join(homedir , ".diffusionbee" , "custom_models");
let models_path = path.join(homedir, ".diffusionbee", "custom_models");
let coreml_models_path = path.join(homedir, ".diffusionbee", "coreml_models");

if (!fs.existsSync(models_path)){
if (!fs.existsSync(models_path)) {
fs.mkdirSync(models_path, { recursive: true });
}
if (!fs.existsSync(coreml_models_path)) {
fs.mkdirSync(coreml_models_path, { recursive: true });
}

event.returnValue = fs.readdirSync(models_path, {withFileTypes: true}).filter(item => !item.isDirectory()).map(item => item.name).filter(item => item.endsWith('.tdict'))
let models = []

models.push(...fs.readdirSync(models_path, { withFileTypes: true }).filter(item => !item.isDirectory()).map(item => item.name).filter(item => item.endsWith('.tdict')).map(item => { return { name: item, path: path.join(models_path, item) } }))
for (const model_dir of fs.readdirSync(coreml_models_path, { withFileTypes: true }).filter(item => item.isDirectory())) {
let model_dir_path = path.join(coreml_models_path, model_dir.name);
let mlmodelc_files = fs.readdirSync(model_dir_path).filter(item => item.endsWith('.mlmodelc'))
if (mlmodelc_files.includes('TextEncoder.mlmodelc') && mlmodelc_files.includes('Unet.mlmodelc') && mlmodelc_files.includes('UnetChunk1.mlmodelc') && mlmodelc_files.includes('UnetChunk2.mlmodelc') && mlmodelc_files.includes('VAEDecoder.mlmodelc')) {
models.push({ name: model_dir.name, path: model_dir_path })
}
}
event.returnValue = models
})




console.log("native functions imported")


8 changes: 7 additions & 1 deletion electron_app/src/py_vue_bridge.js
Original file line number Diff line number Diff line change
@@ -92,6 +92,12 @@ function send_to_py_async() {

}

function send_to_swift(msg) {
window.ipcRenderer.sendSync('to_swift_sync', msg)
}

function send_to_swift_async() {}



export { send_to_py, bind_app_component, send_to_py_async }
export { send_to_py, send_to_swift, bind_app_component, send_to_py_async, send_to_swift_async }