Basics

./code/Makefile

CXXFLAGS := -std=c++17
CXXFLAGS += -I /Users/fangjun/Downloads/onnxruntime-osx-x86_64-1.18.0/include
LDFLAGS := -L /Users/fangjun/Downloads/onnxruntime-osx-x86_64-1.18.0/lib
LDFLAGS += -l onnxruntime
LDFLAGS += -Wl,-rpath,/Users/fangjun/Downloads/onnxruntime-osx-x86_64-1.18.0/lib

main: main.cc c-api-test.cc cpp-api-test.cc ./custom-op.cc ./custom-op-2.cc ./custom-op-3.cc
	$(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS)

./code/main.cc

#include <iostream>

#include "onnxruntime_cxx_api.h" // NOLINT

void TestCApi();
void TestCppApi();
void TestCustomModel();
void TestCustomModel2();
void TestCustomModel3();

int main() {
  TestCApi();
  TestCppApi();

  std::cout << "---test custom model---\n";
  TestCustomModel();

  std::cout << "---test custom model2---\n";
  TestCustomModel2();

  std::cout << "---test custom model3---\n";
  TestCustomModel3();

  std::cout << "ORT_API_VERSION: " << ORT_API_VERSION << "\n";
  return 0;
}
/*
GetVersionString(): 1.18.0
Available providers: CoreMLExecutionProvider, CPUExecutionProvider
allocator name: Cpu
---test custom model---
110
 220
 330
 ---test custom model2---
11.5
 2.5
 3.5
 44.5
 ---test custom model3---
11
 22
 ORT_API_VERSION: 18
 */

./code/c-api-test.cc

#include "onnxruntime_c_api.h" // NOLINT
#include <cassert>
#include <stdio.h>

static void TestOrtStatus() {
  const OrtApiBase *api_base = OrtGetApiBase();
  const OrtApi *api = api_base->GetApi(ORT_API_VERSION);
  OrtErrorCode code = ORT_OK;
  const char *msg = "this is a message";

  OrtStatus *status = api->CreateStatus(code, msg);
  assert(api->GetErrorCode(status) == code);

  const char *msg2 = api->GetErrorMessage(status);
  assert(strcmp(msg, msg2) == 0);

  // status addr: 0x600001e54040, msg2 addr: 0x600001e54044
  fprintf(stderr, "status addr: %p, msg2 addr: %p\n", status, msg2);

  // note that sizeof(code) is 4 in my test
  assert((intptr_t)status + sizeof(code) == (intptr_t)msg2);

  // we have to free the status to avoid memory leak
  api->ReleaseStatus(status);
}

static void TestOrtApiBase() {

  // OrtApiBase only has two method
  const OrtApiBase *api_base = OrtGetApiBase();
  fprintf(stderr, "GetVersionString(): %s\n", api_base->GetVersionString());

  const OrtApi *api = api_base->GetApi(ORT_API_VERSION);
  fprintf(stderr, "OrtApi: %p\n", api);

  const char *info = api->GetBuildInfoString();
  fprintf(stderr, "info: %s\n", info);
}

void TestCApi() {
  TestOrtApiBase();
  TestOrtStatus();
}

./code/cpp-api-test.cc

#include "onnxruntime_cxx_api.h" // NOLINT
#include <assert.h>
#include <iostream>
#include <sstream>

static void TestOrtGetApi() {
  const OrtApi &api = Ort::GetApi(); // it returns a const reference

  std::string version = Ort::GetVersionString();
  std::cout << "version: " << version << "\n";
}

static void PrintAvailableProviders() {
  std::vector<std::string> providers = Ort::GetAvailableProviders();
  std::ostringstream os;
  os << "Available providers: ";
  std::string sep = "";
  for (const auto &p : providers) {
    os << sep << p;
    sep = ", ";
  }
  std::cout << os.str() << "\n";
}

static void TestCreateTensorFromBuffer() {
  std::vector<int32_t> v = {1, 2, 3, 4, 5, 6};
  std::array<int64_t, 2> shape = {2, 3};
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  Ort::Value x = Ort::Value::CreateTensor<int32_t>(
      memory_info, v.data(), v.size(), shape.data(), shape.size());

  // memory is shared between x and v
  int32_t *p = x.GetTensorMutableData<int32_t>();
  p[0] = 10;
  assert(v[0] == 10);

  v[1] = 20;
  assert(p[1] == 20);
}

static void TestCreateTensor() {
  Ort::AllocatorWithDefaultOptions allocator;

  std::array<int64_t, 2> shape = {2, 3};
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  Ort::Value x =
      Ort::Value::CreateTensor<int32_t>(allocator, shape.data(), shape.size());
  assert(x.IsTensor());
  assert(x.HasValue());
  Ort::TypeInfo type_info = x.GetTypeInfo();
  auto tensor_type_and_shape_info = type_info.GetTensorTypeAndShapeInfo();
  assert(tensor_type_and_shape_info.GetElementCount() == 2 * 3);
  assert(tensor_type_and_shape_info.GetDimensionsCount() == 2);
  std::vector<int64_t> x_shape = tensor_type_and_shape_info.GetShape();
  assert(x_shape.size() == shape.size());
  assert(x_shape[0] == shape[0]);
  assert(x_shape[1] == shape[1]);

  assert(tensor_type_and_shape_info.GetElementType() ==
         ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);

  Ort::ConstMemoryInfo memory_info2 = x.GetTensorMemoryInfo();
  std::cout << "allocator name: " << memory_info2.GetAllocatorName() << "\n";
}

static void TestDataType() {
  static_assert(Ort::TypeToTensorType<float>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT);

  static_assert(Ort::TypeToTensorType<double>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE);

  static_assert(Ort::TypeToTensorType<int8_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8);
  static_assert(Ort::TypeToTensorType<int16_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16);
  static_assert(Ort::TypeToTensorType<int32_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
  static_assert(Ort::TypeToTensorType<int64_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64);
  static_assert(Ort::TypeToTensorType<uint8_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8);
  static_assert(Ort::TypeToTensorType<uint16_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16);
  static_assert(Ort::TypeToTensorType<uint32_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32);
  static_assert(Ort::TypeToTensorType<uint64_t>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64);
  static_assert(Ort::TypeToTensorType<bool>::type ==
                ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL);
}

void TestCppApi() {
  TestOrtGetApi();
  PrintAvailableProviders();
  TestCreateTensorFromBuffer();
  TestCreateTensor();
  TestDataType();
}

./code/custom-op.cc

/*
references:
https://onnxruntime.ai/docs/reference/operators/add-custom-op.html

 */
#include "onnxruntime_lite_custom_op.h"
#include <iostream>
#include <utility>
#include <vector>

static void KernelOne(const Ort::Custom::Tensor<float> &X,
                      const Ort::Custom::Tensor<float> &Y,
                      Ort::Custom::Tensor<float> &Z) {
  auto input_shape = X.Shape();
  auto x_raw = X.Data();
  auto y_raw = Y.Data();
  auto z_raw = Z.Allocate(input_shape);
  for (int64_t i = 0; i < Z.NumberOfElement(); ++i) {
    z_raw[i] = x_raw[i] + y_raw[i];
  }
}

static Ort::CustomOpDomain TestCustomOp() {
  Ort::CustomOpDomain v1_domain{"com.k2fsa.org"};
  // please make sure that custom_op_one has the same lifetime as the consuming
  // session
  //
  // Here we use a static variable so it is never released.
  // in practice, we can move it to a member variable of a class
  static std::unique_ptr<Ort::Custom::OrtLiteCustomOp> custom_op_one{
      Ort::Custom::CreateLiteCustomOp("CustomOpOne", "CPUExecutionProvider",
                                      KernelOne)};
  v1_domain.Add(custom_op_one.get());

  return v1_domain;
}

void TestCustomModel() {
  Ort::Env env;
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  Ort::CustomOpDomain v1_domain = TestCustomOp();

  Ort::SessionOptions session_options;
  sess_opts.Add(v1_domain);
  // create a session with the session_options ...

  std::unique_ptr<Ort::Session> sess =
      std::make_unique<Ort::Session>(env, "./e.onnx", sess_opts);

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

  std::vector<float> x = {10, 20, 30};
  std::vector<float> y = {100, 200, 300};

  std::array<int64_t, 1> shape = {3};

  Ort::Value x_tensor = Ort::Value::CreateTensor(
      memory_info, x.data(), x.size(), shape.data(), shape.size());

  Ort::Value y_tensor = Ort::Value::CreateTensor(
      memory_info, y.data(), y.size(), shape.data(), shape.size());

  std::vector<Ort::Value> inputs;
  inputs.push_back(std::move(x_tensor));
  inputs.push_back(std::move(y_tensor));

  std::vector<const char *> input_names = {"l_x_", "l_y_"};
  std::vector<const char *> output_names = {"my_add_op"};
  auto out = sess->Run({}, input_names.data(), inputs.data(), inputs.size(),
                       output_names.data(), output_names.size());
  const float *p = out[0].GetTensorData<float>();
  for (int i = 0; i < 3; ++i) {
    std::cout << p[i] << "\n ";
  }
}

./code/e.py

#!/usr/bin/env python3

from torch._custom_op import impl as custom_op
import torch
import onnx
import onnxscript
from onnxscript import opset18

import warnings
warnings.filterwarnings("ignore")

@custom_op.custom_op("mylibrary::my_add_op")
def my_add_op(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    # Since we are using mylibrary::my_add_op, so the function
    # name must be my_add_op; otherwise, it will throw an error
    # when this script is run
    pass

@my_add_op.impl_abstract()
def my_add_op_impl_abstract_any_name_is_ok(x, y):
    return torch.empty_like(x)

@my_add_op.impl("cpu")
def my_add_op_impl_any_name_is_ok(tensor_x):
    return torch.round(tensor_x + tensor_x)  # add x to itself, and round the result

class CustomFoo(torch.nn.Module):
    def forward(self, x, y):
        return my_add_op(x, y)


custom_opset = onnxscript.values.Opset(domain="com.k2fsa.org", version=1)

@onnxscript.script(custom_opset)
def custom_my_add(x, y):
    return custom_opset.CustomOpOne(x, y)

def main():
    torch._dynamo.allow_in_graph(my_add_op)
    x = torch.randn(3)
    y = torch.randn(3)
    custom_addandround_model = CustomFoo()
    onnx_registry = torch.onnx.OnnxRegistry()
    onnx_registry.register_op(
        namespace="mylibrary", op_name="my_add_op", overload="default", function=custom_my_add
        )

    export_options = torch.onnx.ExportOptions(onnx_registry=onnx_registry)
    onnx_program = torch.onnx.dynamo_export(
        custom_addandround_model, x, y, export_options=export_options
        )
    onnx_program.save("./e.onnx")
    with open('e.txt', 'w') as f:
        f.write(str(onnx_program.model_proto))
    onnx_model = onnx.load("e.onnx")
    onnx.checker.check_model(onnx_model)


if __name__ == '__main__':
    main()

./code/e.txt

ir_version: 8
opset_import {
  domain: "com.k2fsa.org"
  version: 1
}
opset_import {
  domain: ""
  version: 18
}
opset_import {
  domain: "pkg.onnxscript.torch_lib.common"
  version: 1
}
producer_name: "pytorch"
producer_version: "2.4.0"
graph {
  node {
    input: "l_x_"
    input: "l_y_"
    output: "my_add_op"
    name: "custom_my_add_0_n0"
    op_type: "CustomOpOne"
    domain: "com.k2fsa.org"
  }
  name: "main_graph"
  input {
    name: "l_x_"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 3
          }
        }
      }
    }
  }
  input {
    name: "l_y_"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 3
          }
        }
      }
    }
  }
  output {
    name: "my_add_op"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 3
          }
        }
      }
    }
  }
}

./code/custom-op-2.cc

/*
references:
https://onnxruntime.ai/docs/reference/operators/add-custom-op.html

 */
#include "onnxruntime_lite_custom_op.h"
#include <iostream>
#include <utility>
#include <vector>

static void KernelOne(const Ort::Custom::Tensor<float> &X,
                      const Ort::Custom::Tensor<float> &Y,
                      Ort::Custom::Tensor<float> &Z) {
  auto input_shape = X.Shape();
  auto x_raw = X.Data();
  auto y_raw = Y.Data();
  auto z_raw = Z.Allocate(input_shape);
  for (int64_t i = 0; i < Z.NumberOfElement(); ++i) {
    z_raw[i] = x_raw[i] + y_raw[i];
  }
}

static Ort::CustomOpDomain TestCustomOp2() {
  Ort::CustomOpDomain v1_domain{"com.k2fsa.org"};
  // please make sure that custom_op_one has the same lifetime as the consuming
  // session
  //
  // Here we use a static variable so it is never released.
  // in practice, we can move it to a member variable of a class
  static std::unique_ptr<Ort::Custom::OrtLiteCustomOp> custom_op_one{
      Ort::Custom::CreateLiteCustomOp("CustomOpOne2", "CPUExecutionProvider",
                                      KernelOne)};
  v1_domain.Add(custom_op_one.get());

  return v1_domain;
}

void TestCustomModel2() {
  Ort::Env env;
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  Ort::CustomOpDomain v1_domain = TestCustomOp2();

  Ort::SessionOptions session_options;
  sess_opts.Add(v1_domain);
  // create a session with the session_options ...

  std::unique_ptr<Ort::Session> sess =
      std::make_unique<Ort::Session>(env, "./f.onnx", sess_opts);

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  // foo is [1.5, 2.5, 3.5, 4.5]
  //

  std::vector<float> x = {10, -20, -30, 40};

  std::array<int64_t, 1> shape = {4};

  Ort::Value x_tensor = Ort::Value::CreateTensor(
      memory_info, x.data(), x.size(), shape.data(), shape.size());

  std::vector<Ort::Value> inputs;
  inputs.push_back(std::move(x_tensor));

  std::vector<const char *> input_names = {"l_x_"};
  std::vector<const char *> output_names = {"my_add_op2"};
  auto out = sess->Run({}, input_names.data(), inputs.data(), inputs.size(),
                       output_names.data(), output_names.size());
  const float *p = out[0].GetTensorData<float>();
  for (int i = 0; i < 4; ++i) {
    std::cout << p[i] << "\n ";
  }
}

./code/f.py

#!/usr/bin/env python3

from torch._custom_op import impl as custom_op
import torch
import onnx
import onnxscript
from onnxscript import opset18

import warnings
warnings.filterwarnings("ignore")

@custom_op.custom_op("mylibrary::my_add_op2")
def my_add_op2(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    # Since we are using mylibrary::my_add_op2, so the function
    # name must be my_add_op2; otherwise, it will throw an error
    # when this script is run
    pass

@my_add_op2.impl_abstract()
def my_add_op_impl_abstract_any_name_is_ok(x, y):
    return torch.empty_like(x)

@my_add_op2.impl("cpu")
def my_add_op_impl_any_name_is_ok(x, y):
    x = torch.nn.functional.relu(x)
    return x+y   # add x to itself, and round the result

class CustomFoo(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.register_parameter('foo', torch.nn.Parameter(torch.tensor([1.5, 2.5, 3.5, 4.5])))
        self.relu = torch.nn.ReLU()
    def forward(self, x):
        x = self.relu(x)
        return my_add_op2(x, self.foo)


custom_opset = onnxscript.values.Opset(domain="com.k2fsa.org", version=1)

@onnxscript.script(custom_opset)
def custom_my_add(x, y):
    return custom_opset.CustomOpOne2(x, y)

@torch.no_grad()
def main():
    torch._dynamo.allow_in_graph(my_add_op2)
    x = torch.randn(4)
    custom_addandround_model = CustomFoo()
    onnx_registry = torch.onnx.OnnxRegistry()
    onnx_registry.register_op(
        namespace="mylibrary", op_name="my_add_op2", overload="default", function=custom_my_add
        )

    export_options = torch.onnx.ExportOptions(onnx_registry=onnx_registry)
    onnx_program = torch.onnx.dynamo_export(
        custom_addandround_model, x,  export_options=export_options
        )
    onnx_program.save("./f.onnx")
    with open('f.txt', 'w') as f:
        f.write(str(onnx_program.model_proto))
    onnx_model = onnx.load("f.onnx")
    onnx.checker.check_model(onnx_model)


if __name__ == '__main__':
    main()

./code/f.txt

ir_version: 8
opset_import {
  domain: "pkg.onnxscript.torch_lib"
  version: 1
}
opset_import {
  domain: "pkg.torch.2.4.0+cpu"
  version: 1
}
opset_import {
  domain: "com.k2fsa.org"
  version: 1
}
opset_import {
  domain: ""
  version: 18
}
opset_import {
  domain: "pkg.onnxscript.torch_lib.common"
  version: 1
}
producer_name: "pytorch"
producer_version: "2.4.0"
graph {
  node {
    input: "l_x_"
    output: "relu_1"
    name: "torch_nn_modules_activation_ReLU_relu_1_0_aten_relu_0_n0"
    op_type: "Relu"
  }
  node {
    input: "relu_1"
    input: "foo"
    output: "my_add_op2"
    name: "custom_my_add_1_n0"
    op_type: "CustomOpOne2"
    domain: "com.k2fsa.org"
  }
  name: "main_graph"
  initializer {
    dims: 4
    data_type: 1
    name: "foo"
    raw_data: "\000\000\300?\000\000 @\000\000`@\000\000\220@"
  }
  input {
    name: "l_x_"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 4
          }
        }
      }
    }
  }
  output {
    name: "my_add_op2"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 4
          }
        }
      }
    }
  }
  value_info {
    name: "relu_1"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 4
          }
        }
      }
    }
  }
}

./code/custom-op-3.cc

/*
references:
https://onnxruntime.ai/docs/reference/operators/add-custom-op.html

 */
#include "onnxruntime_lite_custom_op.h"
#include <iostream>
#include <utility>
#include <vector>

static void KernelOne(const Ort::Custom::Tensor<uint8_t> &X,
                      const Ort::Custom::Tensor<float> &scale_tensor,
                      Ort::Custom::Tensor<float> &Y) {
  auto input_shape = X.Shape();
  auto x_raw = X.Data();
  auto scale = scale_tensor.Data()[0];
  auto y_raw = Y.Allocate(input_shape);
  for (int64_t i = 0; i < Y.NumberOfElement(); ++i) {

    // scale each uint8 number
    y_raw[i] = x_raw[i] * scale;
  }
}

static Ort::CustomOpDomain TestCustomOp3() {
  Ort::CustomOpDomain v1_domain{"com.k2fsa.org"};
  // please make sure that custom_op_one has the same lifetime as the consuming
  // session
  //
  // Here we use a static variable so it is never released.
  // in practice, we can move it to a member variable of a class
  static std::unique_ptr<Ort::Custom::OrtLiteCustomOp> custom_op_one{
      Ort::Custom::CreateLiteCustomOp("MyCast", "CPUExecutionProvider",
                                      KernelOne)};
  v1_domain.Add(custom_op_one.get());

  return v1_domain;
}

void TestCustomModel3() {
  Ort::Env env;
  Ort::SessionOptions sess_opts;
  sess_opts.SetIntraOpNumThreads(1);
  sess_opts.SetInterOpNumThreads(1);

  Ort::CustomOpDomain v1_domain = TestCustomOp3();

  Ort::SessionOptions session_options;
  sess_opts.Add(v1_domain);
  // create a session with the session_options ...

  std::unique_ptr<Ort::Session> sess =
      std::make_unique<Ort::Session>(env, "./g.onnx", sess_opts);

  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  // foo is [10, 20]
  //

  std::vector<float> x = {1, 2};

  std::array<int64_t, 1> shape = {2};

  Ort::Value x_tensor = Ort::Value::CreateTensor(
      memory_info, x.data(), x.size(), shape.data(), shape.size());

  std::vector<Ort::Value> inputs;
  inputs.push_back(std::move(x_tensor));

  std::vector<const char *> input_names = {"l_x_"};
  std::vector<const char *> output_names = {"add"};
  auto out = sess->Run({}, input_names.data(), inputs.data(), inputs.size(),
                       output_names.data(), output_names.size());
  const float *p = out[0].GetTensorData<float>();
  for (int i = 0; i < 2; ++i) {
    std::cout << p[i] << "\n ";
  }
}

./code/g.py

#!/usr/bin/env python3

from torch._custom_op import impl as custom_op
import torch
import onnx
import onnxscript
from onnxscript import opset18

import warnings
warnings.filterwarnings("ignore")

@custom_op.custom_op("mylibrary::my_cast")
def my_cast(x: torch.Tensor, scale: float = 0.25) -> torch.Tensor:
    # Since we are using mylibrary::my_cast, so the function
    # name must be my_cast; otherwise, it will throw an error
    # when this script is run
    return x.to(torch.float32)

@my_cast.impl_abstract()
def my_cast_impl_abstract_any_name_is_ok(x, scale: float = 0.25):
    return x.to(torch.float32)

@my_cast.impl("cpu")
def my_cast_impl_any_name_is_ok(x, scale: float = 0.25):
    return x.to(torch.float32)   # add x to itself, and round the result

class CustomFoo(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.register_buffer('foo', torch.tensor([10, 20], dtype=torch.uint8))
        self.scale = 0.125

    def forward(self, x):
        return x + my_cast(self.foo, self.scale)


custom_opset = onnxscript.values.Opset(domain="com.k2fsa.org", version=1)

@onnxscript.script(custom_opset, default_opset=opset18)
def custom_my_cast(x, scale: float = 0.5):
    return custom_opset.MyCast(x, scale)

@torch.no_grad()
def main():
    torch._dynamo.allow_in_graph(my_cast)
    x = torch.randn(2)
    custom_addandround_model = CustomFoo()
    onnx_registry = torch.onnx.OnnxRegistry()
    onnx_registry.register_op(
        namespace="mylibrary", op_name="my_cast", overload="default", function=custom_my_cast
        )

    export_options = torch.onnx.ExportOptions(onnx_registry=onnx_registry)
    onnx_program = torch.onnx.dynamo_export(
        custom_addandround_model, x,  export_options=export_options
        )
    onnx_program.save("./g.onnx")
    with open('g.txt', 'w') as f:
        f.write(str(onnx_program.model_proto))
    onnx_model = onnx.load("g.onnx")
    onnx.checker.check_model(onnx_model)


if __name__ == '__main__':
    main()

./code/g.txt

ir_version: 8
opset_import {
  domain: "com.k2fsa.org"
  version: 1
}
opset_import {
  domain: "pkg.onnxscript.torch_lib"
  version: 1
}
opset_import {
  domain: ""
  version: 18
}
opset_import {
  domain: "pkg.onnxscript.torch_lib.common"
  version: 1
}
producer_name: "pytorch"
producer_version: "2.4.0"
graph {
  node {
    output: "custom_my_cast_0_scale"
    name: "custom_my_cast_0_n0"
    op_type: "Constant"
    attribute {
      name: "value_float"
      type: FLOAT
      f: 0.125
    }
  }
  node {
    input: "foo"
    input: "custom_my_cast_0_scale"
    output: "my_cast"
    name: "custom_my_cast_0_n1"
    op_type: "MyCast"
    domain: "com.k2fsa.org"
  }
  node {
    input: "l_x_"
    input: "my_cast"
    output: "add"
    name: "aten_add_1"
    op_type: "aten_add"
    domain: "pkg.onnxscript.torch_lib"
    attribute {
      name: "alpha"
      type: FLOAT
      f: 1
    }
  }
  name: "main_graph"
  initializer {
    dims: 2
    data_type: 2
    name: "foo"
    raw_data: "\n\024"
  }
  input {
    name: "l_x_"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
  output {
    name: "add"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
  value_info {
    name: "custom_my_cast_0_scale"
    type {
      tensor_type {
        elem_type: 1
        shape {
        }
      }
    }
  }
  value_info {
    name: "my_cast"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
  value_info {
    name: "pkg.onnxscript.torch_lib::aten_add/self"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
  value_info {
    name: "pkg.onnxscript.torch_lib::aten_add/other"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
  value_info {
    name: "pkg.onnxscript.torch_lib::aten_add/alpha"
    type {
      tensor_type {
        elem_type: 1
        shape {
        }
      }
    }
  }
  value_info {
    name: "pkg.onnxscript.torch_lib::aten_add/other_1"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
  value_info {
    name: "pkg.onnxscript.torch_lib::aten_add/return_val"
    type {
      tensor_type {
        elem_type: 1
        shape {
          dim {
            dim_value: 2
          }
        }
      }
    }
  }
}
functions {
  name: "aten_add"
  input: "self"
  input: "other"
  output: "return_val"
  attribute_proto {
    name: "alpha"
    type: FLOAT
    f: 1
  }
  node {
    output: "alpha"
    name: "n0"
    op_type: "Constant"
    attribute {
      name: "value_float"
      ref_attr_name: "alpha"
      type: FLOAT
    }
  }
  node {
    input: "other"
    input: "alpha"
    output: "other_1"
    name: "n2"
    op_type: "Mul"
  }
  node {
    input: "self"
    input: "other_1"
    output: "return_val"
    name: "n3"
    op_type: "Add"
  }
  doc_string: "add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"
  opset_import {
    domain: ""
    version: 18
  }
  domain: "pkg.onnxscript.torch_lib"
}