From 978ffc2c3240441c9df3049ead021017512a42d1 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Fri, 13 Dec 2024 11:35:09 +0000 Subject: [PATCH] 2024-12-13 nightly release (b4bc713e2698664d0fa534a2be01ac5a06a92b8e) --- .../scripts/generate_binary_build_matrix.py | 10 +- .github/workflows/build-test-linux.yml | 2 +- .../workflows/build-test-tensorrt-linux.yml | 4 +- .../workflows/build-test-tensorrt-windows.yml | 4 +- .github/workflows/build-test-windows.yml | 2 +- .github/workflows/docker_builder.yml | 1 + MODULE.bazel | 12 +- README.md | 2 +- dev_dep_versions.yml | 2 +- docker/README.md | 4 +- .../classtorch__tensorrt_1_1DataType.html | 4 +- ...rch__tensorrt_1_1Device_1_1DeviceType.html | 4 +- .../classtorch__tensorrt_1_1TensorFormat.html | 4 +- ...ensorrt_1_1ptq_1_1Int8CacheCalibrator.html | 4 +- ...ch__tensorrt_1_1ptq_1_1Int8Calibrator.html | 4 +- ...8h_1a18d295a837ac71add5578860b55e5502.html | 4 +- ...8h_1a282fd3c0b1c3a215148ae372070e1268.html | 4 +- ...8h_1a31398a6d4d27e28817afb0f0139e909e.html | 4 +- ...8h_1a35703561b26b1a9d2738ad7d58b27827.html | 4 +- ...8h_1abd1465eb38256d3f22cc1426b23d516b.html | 4 +- ...8h_1abe87b341f562fd1cf40b7672e4d759da.html | 4 +- ...8h_1ad19939408f7be171a74a89928b36eb59.html | 4 +- ...8h_1adad592a7b1b7eed529cdf6acd584c883.html | 4 +- docs/_cpp_api/dir_cpp.html | 4 +- docs/_cpp_api/dir_cpp_include.html | 4 +- .../dir_cpp_include_torch_tensorrt.html | 4 +- ...8h_1a130f65408ad8cbaee060f05e8db69558.html | 4 +- ...8h_1a3fbe5d72e4fc624dbd038853079620eb.html | 4 +- ..._cpp_include_torch_tensorrt_logging.h.html | 4 +- ...e_cpp_include_torch_tensorrt_macros.h.html | 4 +- ...file_cpp_include_torch_tensorrt_ptq.h.html | 4 +- ...clude_torch_tensorrt_torch_tensorrt.h.html | 4 +- ...8h_1a0593f776f469c20469e2f729fc7861a3.html | 4 +- ...8h_1a0c012cb374addd90eb1f42eaec570650.html | 4 +- ...8h_1a56e110feaaba2c3fd44bd201fd21a76a.html | 4 +- ...8h_1a7cb50492421ea9de4e3db895819df6f2.html | 4 +- ...8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html | 4 +- ...8h_1ad2efd47b6c3689e58ccc595680579ae5.html | 4 +- ...8h_1af8f3443813315af7901903d25dd495cc.html | 4 +- ...8h_1a226e3c83379d1012cde8578c1c86b16c.html | 4 +- ...8h_1a6186e305f47c1d94b6130ef6c7f7e178.html | 4 +- ...8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html | 4 +- ...8h_1a6e19490a08fb1553c9dd347a5ae79db9.html | 4 +- ...8h_1a81f9783517335dda877d8cfcf38987c9.html | 4 +- ...8h_1ac4ab8313ae72c2c899ea31548b528528.html | 4 +- ...8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html | 4 +- ...8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html | 4 +- ...8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html | 4 +- docs/_cpp_api/namespace_torch_tensorrt.html | 4 +- .../namespace_torch_tensorrt__logging.html | 4 +- .../namespace_torch_tensorrt__ptq.html | 4 +- ...namespace_torch_tensorrt__torchscript.html | 4 +- ..._cpp_include_torch_tensorrt_logging.h.html | 4 +- ...e_cpp_include_torch_tensorrt_macros.h.html | 4 +- ...file_cpp_include_torch_tensorrt_ptq.h.html | 4 +- ...clude_torch_tensorrt_torch_tensorrt.h.html | 4 +- .../structtorch__tensorrt_1_1Device.html | 4 +- .../structtorch__tensorrt_1_1GraphInputs.html | 4 +- .../structtorch__tensorrt_1_1Input.html | 4 +- ...ensorrt_1_1torchscript_1_1CompileSpec.html | 4 +- docs/_cpp_api/torch_tensort_cpp.html | 6 +- docs/_cpp_api/unabridged_orphan.html | 4 +- .../engine_caching_bert_example.ipynb | 2 +- .../engine_caching_example.py | 8 +- .../mutable_torchtrt_module_example.py | 4 +- .../engine_caching_example.ipynb | 8 +- .../mutable_torchtrt_module_example.ipynb | 4 +- .../_rendered_examples_jupyter.zip | Bin 130938 -> 130965 bytes .../_rendered_examples_python.zip | Bin 99859 -> 99886 bytes .../refit_engine_example.py | 4 +- .../refit_engine_example.ipynb | 4 +- .../engine_caching_bert_example.py | 2 +- docs/_modules/index.html | 4 +- docs/_modules/torch_tensorrt/_Device.html | 4 +- docs/_modules/torch_tensorrt/_Input.html | 4 +- docs/_modules/torch_tensorrt/_compile.html | 4 +- docs/_modules/torch_tensorrt/_enums.html | 8 +- .../torch_tensorrt/dynamo/_compiler.html | 157 ++++- .../torch_tensorrt/dynamo/_exporter.html | 4 +- .../torch_tensorrt/dynamo/_refit.html | 49 +- .../torch_tensorrt/dynamo/_settings.html | 20 +- .../torch_tensorrt/dynamo/_tracer.html | 4 +- .../runtime/_MutableTorchTensorRTModule.html | 14 +- .../runtime/_PythonTorchTensorRTModule.html | 7 +- .../dynamo/runtime/_TorchTensorRTModule.html | 5 +- docs/_modules/torch_tensorrt/fx/fx2trt.html | 4 +- .../torch_tensorrt/fx/input_tensor_spec.html | 4 +- docs/_modules/torch_tensorrt/fx/lower.html | 4 +- .../torch_tensorrt/fx/trt_module.html | 4 +- docs/_modules/torch_tensorrt/logging.html | 4 +- .../runtime/_multi_device_safe_mode.html | 4 +- .../torch_tensorrt/ts/_compile_spec.html | 4 +- .../_modules/torch_tensorrt/ts/_compiler.html | 4 +- docs/_modules/torch_tensorrt/ts/ptq.html | 4 +- .../engine_caching_bert_example.rst.txt | 2 +- .../dynamo/engine_caching_example.rst.txt | 8 +- .../mutable_torchtrt_module_example.rst.txt | 4 +- .../dynamo/refit_engine_example.rst.txt | 4 +- docs/_static/documentation_options.js | 2 +- docs/cli/torchtrtc.html | 4 +- docs/contributors/conversion.html | 4 +- docs/contributors/dynamo_converters.html | 4 +- docs/contributors/lowering.html | 4 +- docs/contributors/partitioning.html | 4 +- docs/contributors/phases.html | 4 +- docs/contributors/runtime.html | 4 +- docs/contributors/system_overview.html | 4 +- docs/contributors/ts_converters.html | 4 +- docs/contributors/useful_links.html | 4 +- .../writing_dynamo_aten_lowering_passes.html | 4 +- docs/dynamo/dynamo_export.html | 4 +- docs/dynamo/torch_compile.html | 10 +- docs/fx/getting_started_with_fx_path.html | 4 +- docs/genindex.html | 4 +- docs/getting_started/installation.html | 4 +- docs/getting_started/jetpack.html | 4 +- docs/getting_started/quick_start.html | 4 +- docs/index.html | 4 +- docs/indices/supported_ops.html | 4 +- docs/objects.inv | Bin 32968 -> 32968 bytes docs/py-modindex.html | 4 +- docs/py_api/dynamo.html | 16 +- docs/py_api/fx.html | 4 +- docs/py_api/logging.html | 4 +- docs/py_api/ptq.html | 4 +- docs/py_api/runtime.html | 9 +- docs/py_api/torch_tensorrt.html | 10 +- docs/py_api/ts.html | 6 +- docs/search.html | 4 +- docs/searchindex.js | 2 +- docs/sg_execution_times.html | 4 +- .../pytorch-sphinx-theme/docs/changelog.html | 4 +- .../docs/configuring.html | 4 +- .../pytorch-sphinx-theme/docs/demo/api.html | 4 +- .../pytorch-sphinx-theme/docs/demo/demo.html | 6 +- .../docs/demo/lists_tables.html | 4 +- .../pytorch-sphinx-theme/docs/demo/long.html | 4 +- .../docs/demo/structure.html | 4 +- docs/src/pytorch-sphinx-theme/docs/index.html | 4 +- .../pytorch-sphinx-theme/docs/installing.html | 4 +- ...creating_torchscript_module_in_python.html | 4 +- docs/ts/getting_started_with_cpp_api.html | 4 +- docs/ts/getting_started_with_python_api.html | 4 +- docs/ts/ptq.html | 4 +- .../ts/torchscript_frontend_from_pytorch.html | 4 +- .../dynamo/converter_overloading.html | 4 +- ...cross_runtime_compilation_for_windows.html | 4 +- .../dynamo/custom_kernel_plugins.html | 4 +- .../dynamo/engine_caching_bert_example.html | 6 +- .../dynamo/engine_caching_example.html | 12 +- .../_rendered_examples/dynamo/index.html | 4 +- .../mutable_torchtrt_module_example.html | 8 +- .../dynamo/refit_engine_example.html | 8 +- .../dynamo/torch_compile_advanced_usage.html | 4 +- .../dynamo/torch_compile_resnet_example.html | 4 +- .../torch_compile_stable_diffusion.html | 4 +- .../torch_compile_transformers_example.html | 4 +- .../dynamo/torch_export_cudagraphs.html | 4 +- .../dynamo/torch_export_gpt2.html | 4 +- .../dynamo/torch_export_llama2.html | 4 +- .../_rendered_examples/dynamo/vgg16_ptq.html | 4 +- .../dynamo/weight_streaming_example.html | 4 +- docs/tutorials/_rendered_examples/index.html | 4 +- .../_rendered_examples/triton/index.html | 4 +- docs/tutorials/notebooks.html | 4 +- .../serving_torch_tensorrt_with_triton.html | 4 +- docs/user_guide/dynamic_shapes.html | 4 +- docs/user_guide/mixed_precision.html | 4 +- docs/user_guide/runtime.html | 4 +- docs/user_guide/saving_models.html | 4 +- docs/user_guide/torch_tensorrt_explained.html | 4 +- docs/user_guide/using_dla.html | 4 +- .../dynamo/engine_caching_bert_example.py | 2 +- examples/dynamo/engine_caching_example.py | 8 +- .../dynamo/mutable_torchtrt_module_example.py | 4 +- examples/dynamo/refit_engine_example.py | 4 +- packaging/pre_build_script.sh | 14 +- packaging/pre_build_script_windows.sh | 16 +- py/ci/Dockerfile.ci | 2 +- py/torch_tensorrt/_enums.py | 4 +- py/torch_tensorrt/dynamo/_compiler.py | 153 ++++- py/torch_tensorrt/dynamo/_defaults.py | 6 +- py/torch_tensorrt/dynamo/_refit.py | 45 +- py/torch_tensorrt/dynamo/_settings.py | 16 +- py/torch_tensorrt/dynamo/backend/backends.py | 4 + .../dynamo/conversion/_TRTInterpreter.py | 242 +++++--- .../dynamo/conversion/aten_ops_converters.py | 102 ++-- .../conversion/impl/normalization/ops.py | 261 ++------ .../dynamo/lowering/passes/view_to_reshape.py | 12 +- .../runtime/_MutableTorchTensorRTModule.py | 10 +- .../runtime/_PythonTorchTensorRTModule.py | 3 +- .../dynamo/runtime/_TorchTensorRTModule.py | 1 + .../dynamo/runtime/register_fake_class.py | 21 +- py/torch_tensorrt/dynamo/utils.py | 22 +- pyproject.toml | 10 +- tests/py/dynamo/conversion/harness.py | 13 +- tests/py/dynamo/conversion/test_chunk_aten.py | 187 ------ .../py/dynamo/conversion/test_cumsum_aten.py | 8 +- .../conversion/test_embedding_bag_aten.py | 8 +- .../dynamo/conversion/test_group_norm_aten.py | 157 +---- tests/py/dynamo/models/test_engine_cache.py | 528 +++++++++++++++- tests/py/dynamo/models/test_model_refit.py | 41 +- tests/py/dynamo/models/test_reexport.py | 1 - .../models/test_weight_stripped_engine.py | 564 ++++++++++++++++++ .../runtime/test_mutable_torchtrt_module.py | 16 +- toolchains/ci_workspaces/MODULE.bazel.tmpl | 12 +- uv.lock | 269 ++++----- 207 files changed, 2311 insertions(+), 1421 deletions(-) delete mode 100644 tests/py/dynamo/conversion/test_chunk_aten.py create mode 100644 tests/py/dynamo/models/test_weight_stripped_engine.py diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 4ba7e0faeb..26bb447b4f 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -152,10 +152,10 @@ def initialize_globals(channel: str, build_python_only: bool) -> None: "12.4": "pytorch/manylinux2_28-builder:cuda12.4", "12.6": "pytorch/manylinux2_28-builder:cuda12.6", **{ - gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}" + gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}" for gpu_arch in ROCM_ARCHES }, - CPU: "pytorch/manylinux-builder:cpu", + CPU: "pytorch/manylinux2_28-builder:cpu", XPU: "pytorch/manylinux2_28-builder:xpu", # TODO: Migrate CUDA_AARCH64 image to manylinux2_28_aarch64-builder:cuda12.4 CPU_AARCH64: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64", @@ -163,7 +163,7 @@ def initialize_globals(channel: str, build_python_only: bool) -> None: } LIBTORCH_CONTAINER_IMAGES = { **{ - (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:cuda{gpu_arch}" + (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:cuda{gpu_arch}" for gpu_arch in CUDA_ARCHES }, **{ @@ -171,14 +171,14 @@ def initialize_globals(channel: str, build_python_only: bool) -> None: for gpu_arch in CUDA_ARCHES }, **{ - (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:rocm{gpu_arch}" + (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:rocm{gpu_arch}" for gpu_arch in ROCM_ARCHES }, **{ (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}" for gpu_arch in ROCM_ARCHES }, - (CPU, PRE_CXX11_ABI): "pytorch/manylinux-builder:cpu", + (CPU, PRE_CXX11_ABI): "pytorch/manylinux2_28-builder:cpu", (CPU, CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu", } diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index 72d7e21b5c..b0a487bb79 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -137,7 +137,7 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ popd tests-py-dynamo-fe: diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml index cfad7274dc..625ffe9a31 100644 --- a/.github/workflows/build-test-tensorrt-linux.yml +++ b/.github/workflows/build-test-tensorrt-linux.yml @@ -129,7 +129,7 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ popd tests-py-dynamo-fe: @@ -314,4 +314,4 @@ jobs: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file + cancel-in-progress: true diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml index d2be9febd7..fe812e1b9d 100644 --- a/.github/workflows/build-test-tensorrt-windows.yml +++ b/.github/workflows/build-test-tensorrt-windows.yml @@ -132,7 +132,7 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ popd tests-py-dynamo-fe: @@ -298,4 +298,4 @@ jobs: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} - cancel-in-progress: true \ No newline at end of file + cancel-in-progress: true diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index c2b05d8994..c227d14a0f 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -119,7 +119,7 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 8 conversion/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ popd tests-py-dynamo-fe: diff --git a/.github/workflows/docker_builder.yml b/.github/workflows/docker_builder.yml index 3059b4da71..a978d82b6a 100644 --- a/.github/workflows/docker_builder.yml +++ b/.github/workflows/docker_builder.yml @@ -7,6 +7,7 @@ on: - main - nightly - release/* + workflow_dispatch: # If pushes to main are made in rapid succession, # cancel existing docker builds and use newer commits diff --git a/MODULE.bazel b/MODULE.bazel index add7821fcb..b7f52be76f 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -79,20 +79,20 @@ http_archive( http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "adff1cd5abe5d87013806172351e58fd024e5bf0fc61d49ef4b84cd38ed99081", - strip_prefix = "TensorRT-10.3.0.26", + sha256 = "33d3c2f3f4c84dc7991a4337a6fde9ed33f5c8e5c4f03ac2eb6b994a382b03a0", + strip_prefix = "TensorRT-10.6.0.26", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", ], ) http_archive( name = "tensorrt_win", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "2bb4bcb79e8c33575816d874b0512ea28c302af1c06ee6d224da71aa182f75e0", - strip_prefix = "TensorRT-10.3.0.26", + sha256 = "6c6d92c108a1b3368423e8f69f08d31269830f1e4c9da43b37ba34a176797254", + strip_prefix = "TensorRT-10.6.0.26", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip", ], ) diff --git a/README.md b/README.md index 1e6716f950..5ee8dd4df7 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ These are the following dependencies used to verify the testcases. Torch-TensorR - Bazel 6.3.2 - Libtorch 2.5.0.dev (latest nightly) (built with CUDA 12.4) - CUDA 12.4 -- TensorRT 10.3.0.26 +- TensorRT 10.6.0.26 ## Deprecation Policy diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index 3b23c49da3..527b83936e 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,2 +1,2 @@ __cuda_version__: "12.4" -__tensorrt_version__: "10.3.0" +__tensorrt_version__: ">=10.3.0,<=10.6.0" diff --git a/docker/README.md b/docker/README.md index 3d44f45b74..7435973b1a 100644 --- a/docker/README.md +++ b/docker/README.md @@ -17,14 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch ### Instructions -- The example below uses TensorRT 10.3.0.26 +- The example below uses TensorRT 10.6.0.26 - See dependencies for a list of current default dependencies. > From root of Torch-TensorRT repo Build: ``` -DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=10.3.0 -f docker/Dockerfile -t torch_tensorrt:latest . +DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=10.6.0 -f docker/Dockerfile -t torch_tensorrt:latest . ``` Run: diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html index 7f441d0a48..f02996b3e6 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1DataType.html @@ -10,7 +10,7 @@ - Class DataType — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Class DataType — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html b/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html index 5ba16b36e7..0396a49f4d 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html @@ -10,7 +10,7 @@ - Class Device::DeviceType — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Class Device::DeviceType — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html b/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html index 071b72f722..586828c431 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html @@ -10,7 +10,7 @@ - Class TensorFormat — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Class TensorFormat — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html index 3bcf4278e3..1d9635c536 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html @@ -10,7 +10,7 @@ - Template Class Int8CacheCalibrator — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Template Class Int8CacheCalibrator — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html index e34fa08118..24dee94807 100644 --- a/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html +++ b/docs/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html @@ -10,7 +10,7 @@ - Template Class Int8Calibrator — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Template Class Int8Calibrator — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html b/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html index f8ed97222c..496fc0bccf 100644 --- a/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html +++ b/docs/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html @@ -10,7 +10,7 @@ - Define STR — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define STR — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html b/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html index c257e3cf43..224f17950a 100644 --- a/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html +++ b/docs/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_PATCH_VERSION — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define TORCH_TENSORRT_PATCH_VERSION — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html b/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html index 16a1442fd1..42a8d5e240 100644 --- a/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html +++ b/docs/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_MAJOR_VERSION — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define TORCH_TENSORRT_MAJOR_VERSION — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html b/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html index e20db8ab73..9499a8e6d4 100644 --- a/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html +++ b/docs/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_MINOR_VERSION — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define TORCH_TENSORRT_MINOR_VERSION — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html b/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html index 620ca0a39b..56a56f3cb3 100644 --- a/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html +++ b/docs/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html @@ -10,7 +10,7 @@ - Define TORCHTRT_API — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define TORCHTRT_API — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html b/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html index 5d3c22d77c..efa3d152a0 100644 --- a/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html +++ b/docs/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html @@ -10,7 +10,7 @@ - Define XSTR — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define XSTR — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html b/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html index 4e9476d8f7..e8978400ea 100644 --- a/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html +++ b/docs/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html @@ -10,7 +10,7 @@ - Define TORCHTRT_HIDDEN — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define TORCHTRT_HIDDEN — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html b/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html index f511d8b41b..4455899e87 100644 --- a/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html +++ b/docs/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html @@ -10,7 +10,7 @@ - Define TORCH_TENSORRT_VERSION — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Define TORCH_TENSORRT_VERSION — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/dir_cpp.html b/docs/_cpp_api/dir_cpp.html index 5e70673ad8..e2de367d1f 100644 --- a/docs/_cpp_api/dir_cpp.html +++ b/docs/_cpp_api/dir_cpp.html @@ -10,7 +10,7 @@ - Directory cpp — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Directory cpp — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/dir_cpp_include.html b/docs/_cpp_api/dir_cpp_include.html index da4c5edbea..5badeae2da 100644 --- a/docs/_cpp_api/dir_cpp_include.html +++ b/docs/_cpp_api/dir_cpp_include.html @@ -10,7 +10,7 @@ - Directory include — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Directory include — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html b/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html index 23f15e796d..cbf2fbd924 100644 --- a/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html +++ b/docs/_cpp_api/dir_cpp_include_torch_tensorrt.html @@ -10,7 +10,7 @@ - Directory torch_tensorrt — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Directory torch_tensorrt — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.html b/docs/_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.html index f8f0882dae..123e3ee161 100644 --- a/docs/_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.html +++ b/docs/_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.html @@ -10,7 +10,7 @@ - Enum Level — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Enum Level — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.html b/docs/_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.html index 318ec372f7..f2db2a4b52 100644 --- a/docs/_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.html +++ b/docs/_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.html @@ -10,7 +10,7 @@ - Enum EngineCapability — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Enum EngineCapability — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html index 5a15ec4a4e..c96509181e 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html @@ -10,7 +10,7 @@ - File logging.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + File logging.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html index eef75be22d..4b46c2cb31 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html @@ -10,7 +10,7 @@ - File macros.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + File macros.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html index 9dc114f14f..69016745fb 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html @@ -10,7 +10,7 @@ - File ptq.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + File ptq.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html b/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html index 2f9b75c354..724614aaae 100644 --- a/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html +++ b/docs/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html @@ -10,7 +10,7 @@ - File torch_tensorrt.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + File torch_tensorrt.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.html b/docs/_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.html index f134abf556..e43ac58a02 100644 --- a/docs/_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.html +++ b/docs/_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::get_logging_prefix — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::get_logging_prefix — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.html b/docs/_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.html index 77a8ff6e3e..1bd9ae2d54 100644 --- a/docs/_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.html +++ b/docs/_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::get_reportable_log_level — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::get_reportable_log_level — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.html b/docs/_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.html index fd9aee11d2..5d8ab84922 100644 --- a/docs/_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.html +++ b/docs/_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::get_is_colored_output_on — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::get_is_colored_output_on — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.html b/docs/_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.html index 827564dcd3..858fe324c0 100644 --- a/docs/_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.html +++ b/docs/_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::set_reportable_log_level — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::set_reportable_log_level — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html b/docs/_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html index d4b722f9b5..2513504e39 100644 --- a/docs/_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html +++ b/docs/_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::log — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::log — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.html b/docs/_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.html index bf6fb76614..05453f521b 100644 --- a/docs/_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.html +++ b/docs/_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::set_is_colored_output_on — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::set_is_colored_output_on — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.html b/docs/_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.html index 965750fc38..d5bd7c6538 100644 --- a/docs/_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.html +++ b/docs/_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::logging::set_logging_prefix — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::logging::set_logging_prefix — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html b/docs/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html index 4de3ae6a67..50f7ef6385 100644 --- a/docs/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html +++ b/docs/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html @@ -10,7 +10,7 @@ - Template Function torch_tensorrt::ptq::make_int8_cache_calibrator — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Template Function torch_tensorrt::ptq::make_int8_cache_calibrator — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html b/docs/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html index c0f3f3c3dd..10ba1e45c3 100644 --- a/docs/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html +++ b/docs/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html @@ -10,7 +10,7 @@ - Template Function torch_tensorrt::ptq::make_int8_calibrator — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Template Function torch_tensorrt::ptq::make_int8_calibrator — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html b/docs/_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html index 1efda8d593..ffd3a57efd 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::check_method_operator_support — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::torchscript::check_method_operator_support — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.html b/docs/_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.html index 28425dd493..906a281e83 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::compile — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::torchscript::compile — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.html b/docs/_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.html index 336a73dd20..313fa57448 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::embed_engine_in_new_module — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::torchscript::embed_engine_in_new_module — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.html b/docs/_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.html index 96d9908d07..833473c4f7 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::get_build_info — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::get_build_info — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html b/docs/_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html index 33c008e875..c456e5dafd 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::set_device — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::set_device — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html b/docs/_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html index a7bd73d6f9..71783e0f0e 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::dump_build_info — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::dump_build_info — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html b/docs/_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html index 81e3e933aa..d68a9a3ac3 100644 --- a/docs/_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html +++ b/docs/_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html @@ -10,7 +10,7 @@ - Function torch_tensorrt::torchscript::convert_method_to_trt_engine — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Function torch_tensorrt::torchscript::convert_method_to_trt_engine — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/namespace_torch_tensorrt.html b/docs/_cpp_api/namespace_torch_tensorrt.html index 7f5871ee42..24214a0a03 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt.html +++ b/docs/_cpp_api/namespace_torch_tensorrt.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Namespace torch_tensorrt — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/namespace_torch_tensorrt__logging.html b/docs/_cpp_api/namespace_torch_tensorrt__logging.html index bc31cc44f5..567586f962 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt__logging.html +++ b/docs/_cpp_api/namespace_torch_tensorrt__logging.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt::logging — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Namespace torch_tensorrt::logging — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/namespace_torch_tensorrt__ptq.html b/docs/_cpp_api/namespace_torch_tensorrt__ptq.html index 316eff71a9..2de2afb391 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt__ptq.html +++ b/docs/_cpp_api/namespace_torch_tensorrt__ptq.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt::ptq — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Namespace torch_tensorrt::ptq — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html b/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html index 0143c721fe..721489a7a6 100644 --- a/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html +++ b/docs/_cpp_api/namespace_torch_tensorrt__torchscript.html @@ -10,7 +10,7 @@ - Namespace torch_tensorrt::torchscript — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Namespace torch_tensorrt::torchscript — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html index 37150367c7..b77393c8ae 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html @@ -10,7 +10,7 @@ - Program Listing for File logging.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Program Listing for File logging.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html index 6310c51d12..de0d2eb694 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html @@ -10,7 +10,7 @@ - Program Listing for File macros.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Program Listing for File macros.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html index 1745e2bff8..1e917492a2 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html @@ -10,7 +10,7 @@ - Program Listing for File ptq.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Program Listing for File ptq.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html index 9daf0fd135..5592c3779a 100644 --- a/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html +++ b/docs/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html @@ -10,7 +10,7 @@ - Program Listing for File torch_tensorrt.h — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Program Listing for File torch_tensorrt.h — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1Device.html b/docs/_cpp_api/structtorch__tensorrt_1_1Device.html index 3ad2ff26c8..5bcd14b169 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1Device.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1Device.html @@ -10,7 +10,7 @@ - Struct Device — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Struct Device — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html b/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html index 8353f78ada..17d3d851c5 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html @@ -10,7 +10,7 @@ - Struct GraphInputs — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Struct GraphInputs — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1Input.html b/docs/_cpp_api/structtorch__tensorrt_1_1Input.html index 776e941e27..df02b40dd1 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1Input.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1Input.html @@ -10,7 +10,7 @@ - Struct Input — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Struct Input — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html b/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html index b6519c8581..3b0af47562 100644 --- a/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html +++ b/docs/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html @@ -10,7 +10,7 @@ - Struct CompileSpec — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Struct CompileSpec — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
diff --git a/docs/_cpp_api/torch_tensort_cpp.html b/docs/_cpp_api/torch_tensort_cpp.html index 37e997359c..ae812da01c 100644 --- a/docs/_cpp_api/torch_tensort_cpp.html +++ b/docs/_cpp_api/torch_tensort_cpp.html @@ -10,7 +10,7 @@ - Torch-TensorRT C++ API — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch-TensorRT C++ API — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
- v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
@@ -474,7 +474,7 @@

Class Hierarchy
  • diff --git a/docs/_cpp_api/unabridged_orphan.html b/docs/_cpp_api/unabridged_orphan.html index 14ca9fa802..58a093550a 100644 --- a/docs/_cpp_api/unabridged_orphan.html +++ b/docs/_cpp_api/unabridged_orphan.html @@ -10,7 +10,7 @@ - Full API — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Full API — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_downloads/06a1dddfb8c2b5515b697700d863a453/engine_caching_bert_example.ipynb b/docs/_downloads/06a1dddfb8c2b5515b697700d863a453/engine_caching_bert_example.ipynb index 677495750d..c1c3e52e0b 100644 --- a/docs/_downloads/06a1dddfb8c2b5515b697700d863a453/engine_caching_bert_example.ipynb +++ b/docs/_downloads/06a1dddfb8c2b5515b697700d863a453/engine_caching_bert_example.ipynb @@ -15,7 +15,7 @@ }, "outputs": [], "source": [ - "import numpy as np\nimport torch\nimport torch_tensorrt\nfrom engine_caching_example import remove_timing_cache\nfrom transformers import BertModel\n\nnp.random.seed(0)\ntorch.manual_seed(0)\n\nmodel = BertModel.from_pretrained(\"bert-base-uncased\", return_dict=False).cuda().eval()\ninputs = [\n torch.randint(0, 2, (1, 14), dtype=torch.int32).to(\"cuda\"),\n torch.randint(0, 2, (1, 14), dtype=torch.int32).to(\"cuda\"),\n]\n\n\ndef compile_bert(iterations=3):\n times = []\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n # remove timing cache and reset dynamo for engine caching messurement\n remove_timing_cache()\n torch._dynamo.reset()\n\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n compilation_kwargs = {\n \"use_python_runtime\": False,\n \"enabled_precisions\": {torch.float},\n \"truncate_double\": True,\n \"debug\": False,\n \"min_block_size\": 1,\n \"make_refittable\": True,\n \"cache_built_engines\": cache_built_engines,\n \"reuse_cached_engines\": reuse_cached_engines,\n \"engine_cache_dir\": \"/tmp/torch_trt_bert_engine_cache\",\n \"engine_cache_size\": 1 << 30, # 1GB\n }\n optimized_model = torch.compile(\n model,\n backend=\"torch_tensorrt\",\n options=compilation_kwargs,\n )\n optimized_model(*inputs)\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"-----compile bert-----> compilation time:\\n\", times, \"milliseconds\")\n\n\nif __name__ == \"__main__\":\n compile_bert()" + "import numpy as np\nimport torch\nimport torch_tensorrt\nfrom engine_caching_example import remove_timing_cache\nfrom transformers import BertModel\n\nnp.random.seed(0)\ntorch.manual_seed(0)\n\nmodel = BertModel.from_pretrained(\"bert-base-uncased\", return_dict=False).cuda().eval()\ninputs = [\n torch.randint(0, 2, (1, 14), dtype=torch.int32).to(\"cuda\"),\n torch.randint(0, 2, (1, 14), dtype=torch.int32).to(\"cuda\"),\n]\n\n\ndef compile_bert(iterations=3):\n times = []\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n # remove timing cache and reset dynamo for engine caching messurement\n remove_timing_cache()\n torch._dynamo.reset()\n\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n compilation_kwargs = {\n \"use_python_runtime\": False,\n \"enabled_precisions\": {torch.float},\n \"truncate_double\": True,\n \"debug\": False,\n \"min_block_size\": 1,\n \"immutable_weights\": False,\n \"cache_built_engines\": cache_built_engines,\n \"reuse_cached_engines\": reuse_cached_engines,\n \"engine_cache_dir\": \"/tmp/torch_trt_bert_engine_cache\",\n \"engine_cache_size\": 1 << 30, # 1GB\n }\n optimized_model = torch.compile(\n model,\n backend=\"torch_tensorrt\",\n options=compilation_kwargs,\n )\n optimized_model(*inputs)\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"-----compile bert-----> compilation time:\\n\", times, \"milliseconds\")\n\n\nif __name__ == \"__main__\":\n compile_bert()" ] } ], diff --git a/docs/_downloads/1c759c0181fe2845e5579cc82e5b7a7a/engine_caching_example.py b/docs/_downloads/1c759c0181fe2845e5579cc82e5b7a7a/engine_caching_example.py index 28ff73aa72..fb4c341077 100644 --- a/docs/_downloads/1c759c0181fe2845e5579cc82e5b7a7a/engine_caching_example.py +++ b/docs/_downloads/1c759c0181fe2845e5579cc82e5b7a7a/engine_caching_example.py @@ -63,7 +63,7 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): # in a subsequent compilation, either as part of this session or a new session, the cache will # pull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude. # As such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``), -# the engine must be refittable (``make_refittable=True``). See :ref:`refit_engine_example` for more details. +# the engine must be refittable (``immutable_weights=False``). See :ref:`refit_engine_example` for more details. def torch_compile(iterations=3): @@ -97,7 +97,7 @@ def torch_compile(iterations=3): "enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, }, @@ -157,7 +157,7 @@ def dynamo_compile(iterations=3): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, + immutable_weights=False, cache_built_engines=cache_built_engines, reuse_cached_engines=reuse_cached_engines, engine_cache_size=1 << 30, # 1GB @@ -268,7 +268,7 @@ def torch_compile_my_cache(iterations=3): "enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "custom_engine_cache": engine_cache, diff --git a/docs/_downloads/2fbbf7380f818b1cbce2b90bbcaf2904/mutable_torchtrt_module_example.py b/docs/_downloads/2fbbf7380f818b1cbce2b90bbcaf2904/mutable_torchtrt_module_example.py index b68c9a11ee..8b62855c32 100644 --- a/docs/_downloads/2fbbf7380f818b1cbce2b90bbcaf2904/mutable_torchtrt_module_example.py +++ b/docs/_downloads/2fbbf7380f818b1cbce2b90bbcaf2904/mutable_torchtrt_module_example.py @@ -31,7 +31,7 @@ settings = { "use_python": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -80,7 +80,7 @@ "use_python_runtime": True, "enabled_precisions": {torch.float16}, "debug": True, - "make_refittable": True, + "immutable_weights": False, } model_id = "runwayml/stable-diffusion-v1-5" diff --git a/docs/_downloads/3454ee6d4b68e83cdf0c757f0059986b/engine_caching_example.ipynb b/docs/_downloads/3454ee6d4b68e83cdf0c757f0059986b/engine_caching_example.ipynb index 7a8fb2b6ac..5837a9094d 100644 --- a/docs/_downloads/3454ee6d4b68e83cdf0c757f0059986b/engine_caching_example.ipynb +++ b/docs/_downloads/3454ee6d4b68e83cdf0c757f0059986b/engine_caching_example.ipynb @@ -22,7 +22,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Engine Caching for JIT Compilation\n\nThe primary goal of engine caching is to help speed up JIT workflows. ``torch.compile``\nprovides a great deal of flexibility in model construction which makes it a good\nfirst tool to try when looking to speed up your workflow. However, historically\nthe cost of compilation and in particular recompilation has been a barrier to entry\nfor many users. If for some reason a subgraph gets invalidated, that graph is reconstructed\nscratch prior to the addition of engine caching. Now as engines are constructed, with ``cache_built_engines=True``,\nengines are saved to disk tied to a hash of their corresponding PyTorch subgraph. If\nin a subsequent compilation, either as part of this session or a new session, the cache will\npull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude.\nAs such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``),\nthe engine must be refittable (``make_refittable=True``). See `refit_engine_example` for more details.\n\n" + "## Engine Caching for JIT Compilation\n\nThe primary goal of engine caching is to help speed up JIT workflows. ``torch.compile``\nprovides a great deal of flexibility in model construction which makes it a good\nfirst tool to try when looking to speed up your workflow. However, historically\nthe cost of compilation and in particular recompilation has been a barrier to entry\nfor many users. If for some reason a subgraph gets invalidated, that graph is reconstructed\nscratch prior to the addition of engine caching. Now as engines are constructed, with ``cache_built_engines=True``,\nengines are saved to disk tied to a hash of their corresponding PyTorch subgraph. If\nin a subsequent compilation, either as part of this session or a new session, the cache will\npull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude.\nAs such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``),\nthe engine must be refittable (``immutable_weights=False``). See `refit_engine_example` for more details.\n\n" ] }, { @@ -33,7 +33,7 @@ }, "outputs": [], "source": [ - "def torch_compile(iterations=3):\n times = []\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n inputs = [torch.rand((100, 3, 224, 224)).to(\"cuda\")]\n # remove timing cache and reset dynamo just for engine caching messurement\n remove_timing_cache()\n torch._dynamo.reset()\n\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n compiled_model = torch.compile(\n model,\n backend=\"tensorrt\",\n options={\n \"use_python_runtime\": True,\n \"enabled_precisions\": enabled_precisions,\n \"debug\": debug,\n \"min_block_size\": min_block_size,\n \"make_refittable\": True,\n \"cache_built_engines\": cache_built_engines,\n \"reuse_cached_engines\": reuse_cached_engines,\n },\n )\n compiled_model(*inputs) # trigger the compilation\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"----------------torch_compile----------------\")\n print(\"disable engine caching, used:\", times[0], \"ms\")\n print(\"enable engine caching to cache engines, used:\", times[1], \"ms\")\n print(\"enable engine caching to reuse engines, used:\", times[2], \"ms\")\n\n\ntorch_compile()" + "def torch_compile(iterations=3):\n times = []\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n inputs = [torch.rand((100, 3, 224, 224)).to(\"cuda\")]\n # remove timing cache and reset dynamo just for engine caching messurement\n remove_timing_cache()\n torch._dynamo.reset()\n\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n compiled_model = torch.compile(\n model,\n backend=\"tensorrt\",\n options={\n \"use_python_runtime\": True,\n \"enabled_precisions\": enabled_precisions,\n \"debug\": debug,\n \"min_block_size\": min_block_size,\n \"immutable_weights\": False,\n \"cache_built_engines\": cache_built_engines,\n \"reuse_cached_engines\": reuse_cached_engines,\n },\n )\n compiled_model(*inputs) # trigger the compilation\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"----------------torch_compile----------------\")\n print(\"disable engine caching, used:\", times[0], \"ms\")\n print(\"enable engine caching to cache engines, used:\", times[1], \"ms\")\n print(\"enable engine caching to reuse engines, used:\", times[2], \"ms\")\n\n\ntorch_compile()" ] }, { @@ -51,7 +51,7 @@ }, "outputs": [], "source": [ - "def dynamo_compile(iterations=3):\n times = []\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n example_inputs = (torch.randn((100, 3, 224, 224)).to(\"cuda\"),)\n # Mark the dim0 of inputs as dynamic\n batch = torch.export.Dim(\"batch\", min=1, max=200)\n exp_program = torch.export.export(\n model, args=example_inputs, dynamic_shapes={\"x\": {0: batch}}\n )\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n inputs = [torch.rand((100 + i, 3, 224, 224)).to(\"cuda\")]\n remove_timing_cache() # remove timing cache just for engine caching messurement\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n trt_gm = torch_trt.dynamo.compile(\n exp_program,\n tuple(inputs),\n use_python_runtime=use_python_runtime,\n enabled_precisions=enabled_precisions,\n debug=debug,\n min_block_size=min_block_size,\n make_refittable=True,\n cache_built_engines=cache_built_engines,\n reuse_cached_engines=reuse_cached_engines,\n engine_cache_size=1 << 30, # 1GB\n )\n # output = trt_gm(*inputs)\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"----------------dynamo_compile----------------\")\n print(\"disable engine caching, used:\", times[0], \"ms\")\n print(\"enable engine caching to cache engines, used:\", times[1], \"ms\")\n print(\"enable engine caching to reuse engines, used:\", times[2], \"ms\")\n\n\ndynamo_compile()" + "def dynamo_compile(iterations=3):\n times = []\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n example_inputs = (torch.randn((100, 3, 224, 224)).to(\"cuda\"),)\n # Mark the dim0 of inputs as dynamic\n batch = torch.export.Dim(\"batch\", min=1, max=200)\n exp_program = torch.export.export(\n model, args=example_inputs, dynamic_shapes={\"x\": {0: batch}}\n )\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n inputs = [torch.rand((100 + i, 3, 224, 224)).to(\"cuda\")]\n remove_timing_cache() # remove timing cache just for engine caching messurement\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n trt_gm = torch_trt.dynamo.compile(\n exp_program,\n tuple(inputs),\n use_python_runtime=use_python_runtime,\n enabled_precisions=enabled_precisions,\n debug=debug,\n min_block_size=min_block_size,\n immutable_weights=False,\n cache_built_engines=cache_built_engines,\n reuse_cached_engines=reuse_cached_engines,\n engine_cache_size=1 << 30, # 1GB\n )\n # output = trt_gm(*inputs)\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"----------------dynamo_compile----------------\")\n print(\"disable engine caching, used:\", times[0], \"ms\")\n print(\"enable engine caching to cache engines, used:\", times[1], \"ms\")\n print(\"enable engine caching to reuse engines, used:\", times[2], \"ms\")\n\n\ndynamo_compile()" ] }, { @@ -69,7 +69,7 @@ }, "outputs": [], "source": [ - "class RAMEngineCache(BaseEngineCache):\n def __init__(\n self,\n ) -> None:\n \"\"\"\n Constructs a user held engine cache in memory.\n \"\"\"\n self.engine_cache: Dict[str, bytes] = {}\n\n def save(\n self,\n hash: str,\n blob: bytes,\n ):\n \"\"\"\n Insert the engine blob to the cache.\n\n Args:\n hash (str): The hash key to associate with the engine blob.\n blob (bytes): The engine blob to be saved.\n\n Returns:\n None\n \"\"\"\n self.engine_cache[hash] = blob\n\n def load(self, hash: str) -> Optional[bytes]:\n \"\"\"\n Load the engine blob from the cache.\n\n Args:\n hash (str): The hash key of the engine to load.\n\n Returns:\n Optional[bytes]: The engine blob if found, None otherwise.\n \"\"\"\n if hash in self.engine_cache:\n return self.engine_cache[hash]\n else:\n return None\n\n\ndef torch_compile_my_cache(iterations=3):\n times = []\n engine_cache = RAMEngineCache()\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n inputs = [torch.rand((100, 3, 224, 224)).to(\"cuda\")]\n # remove timing cache and reset dynamo just for engine caching messurement\n remove_timing_cache()\n torch._dynamo.reset()\n\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n compiled_model = torch.compile(\n model,\n backend=\"tensorrt\",\n options={\n \"use_python_runtime\": True,\n \"enabled_precisions\": enabled_precisions,\n \"debug\": debug,\n \"min_block_size\": min_block_size,\n \"make_refittable\": True,\n \"cache_built_engines\": cache_built_engines,\n \"reuse_cached_engines\": reuse_cached_engines,\n \"custom_engine_cache\": engine_cache,\n },\n )\n compiled_model(*inputs) # trigger the compilation\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"----------------torch_compile----------------\")\n print(\"disable engine caching, used:\", times[0], \"ms\")\n print(\"enable engine caching to cache engines, used:\", times[1], \"ms\")\n print(\"enable engine caching to reuse engines, used:\", times[2], \"ms\")\n\n\ntorch_compile_my_cache()" + "class RAMEngineCache(BaseEngineCache):\n def __init__(\n self,\n ) -> None:\n \"\"\"\n Constructs a user held engine cache in memory.\n \"\"\"\n self.engine_cache: Dict[str, bytes] = {}\n\n def save(\n self,\n hash: str,\n blob: bytes,\n ):\n \"\"\"\n Insert the engine blob to the cache.\n\n Args:\n hash (str): The hash key to associate with the engine blob.\n blob (bytes): The engine blob to be saved.\n\n Returns:\n None\n \"\"\"\n self.engine_cache[hash] = blob\n\n def load(self, hash: str) -> Optional[bytes]:\n \"\"\"\n Load the engine blob from the cache.\n\n Args:\n hash (str): The hash key of the engine to load.\n\n Returns:\n Optional[bytes]: The engine blob if found, None otherwise.\n \"\"\"\n if hash in self.engine_cache:\n return self.engine_cache[hash]\n else:\n return None\n\n\ndef torch_compile_my_cache(iterations=3):\n times = []\n engine_cache = RAMEngineCache()\n start = torch.cuda.Event(enable_timing=True)\n end = torch.cuda.Event(enable_timing=True)\n\n # The 1st iteration is to measure the compilation time without engine caching\n # The 2nd and 3rd iterations are to measure the compilation time with engine caching.\n # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.\n # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.\n for i in range(iterations):\n inputs = [torch.rand((100, 3, 224, 224)).to(\"cuda\")]\n # remove timing cache and reset dynamo just for engine caching messurement\n remove_timing_cache()\n torch._dynamo.reset()\n\n if i == 0:\n cache_built_engines = False\n reuse_cached_engines = False\n else:\n cache_built_engines = True\n reuse_cached_engines = True\n\n start.record()\n compiled_model = torch.compile(\n model,\n backend=\"tensorrt\",\n options={\n \"use_python_runtime\": True,\n \"enabled_precisions\": enabled_precisions,\n \"debug\": debug,\n \"min_block_size\": min_block_size,\n \"immutable_weights\": False,\n \"cache_built_engines\": cache_built_engines,\n \"reuse_cached_engines\": reuse_cached_engines,\n \"custom_engine_cache\": engine_cache,\n },\n )\n compiled_model(*inputs) # trigger the compilation\n end.record()\n torch.cuda.synchronize()\n times.append(start.elapsed_time(end))\n\n print(\"----------------torch_compile----------------\")\n print(\"disable engine caching, used:\", times[0], \"ms\")\n print(\"enable engine caching to cache engines, used:\", times[1], \"ms\")\n print(\"enable engine caching to reuse engines, used:\", times[2], \"ms\")\n\n\ntorch_compile_my_cache()" ] } ], diff --git a/docs/_downloads/68b8589f80a47518afd92bbad3fda19d/mutable_torchtrt_module_example.ipynb b/docs/_downloads/68b8589f80a47518afd92bbad3fda19d/mutable_torchtrt_module_example.ipynb index f2d5458fc6..065a3d7d44 100644 --- a/docs/_downloads/68b8589f80a47518afd92bbad3fda19d/mutable_torchtrt_module_example.ipynb +++ b/docs/_downloads/68b8589f80a47518afd92bbad3fda19d/mutable_torchtrt_module_example.ipynb @@ -33,7 +33,7 @@ }, "outputs": [], "source": [ - "settings = {\n \"use_python\": False,\n \"enabled_precisions\": {torch.float32},\n \"make_refittable\": True,\n}\n\nmodel = models.resnet18(pretrained=True).eval().to(\"cuda\")\nmutable_module = torch_trt.MutableTorchTensorRTModule(model, **settings)\n# You can use the mutable module just like the original pytorch module. The compilation happens while you first call the mutable module.\nmutable_module(*inputs)" + "settings = {\n \"use_python\": False,\n \"enabled_precisions\": {torch.float32},\n \"immutable_weights\": False,\n}\n\nmodel = models.resnet18(pretrained=True).eval().to(\"cuda\")\nmutable_module = torch_trt.MutableTorchTensorRTModule(model, **settings)\n# You can use the mutable module just like the original pytorch module. The compilation happens while you first call the mutable module.\nmutable_module(*inputs)" ] }, { @@ -94,7 +94,7 @@ }, "outputs": [], "source": [ - "# The LoRA checkpoint is from https://civitai.com/models/12597/moxin\n\nfrom diffusers import DiffusionPipeline\n\nwith torch.no_grad():\n settings = {\n \"use_python_runtime\": True,\n \"enabled_precisions\": {torch.float16},\n \"debug\": True,\n \"make_refittable\": True,\n }\n\n model_id = \"runwayml/stable-diffusion-v1-5\"\n device = \"cuda:0\"\n\n prompt = \"house in forest, shuimobysim, wuchangshuo, best quality\"\n negative = \"(worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, out of focus, cloudy, (watermark:2),\"\n\n pipe = DiffusionPipeline.from_pretrained(\n model_id, revision=\"fp16\", torch_dtype=torch.float16\n )\n pipe.to(device)\n\n # The only extra line you need\n pipe.unet = torch_trt.MutableTorchTensorRTModule(pipe.unet, **settings)\n\n image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]\n image.save(\"./without_LoRA_mutable.jpg\")\n\n # Standard Huggingface LoRA loading procedure\n pipe.load_lora_weights(\n \"stablediffusionapi/load_lora_embeddings\",\n weight_name=\"moxin.safetensors\",\n adapter_name=\"lora1\",\n )\n pipe.set_adapters([\"lora1\"], adapter_weights=[1])\n pipe.fuse_lora()\n pipe.unload_lora_weights()\n\n # Refit triggered\n image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]\n image.save(\"./with_LoRA_mutable.jpg\")" + "# The LoRA checkpoint is from https://civitai.com/models/12597/moxin\n\nfrom diffusers import DiffusionPipeline\n\nwith torch.no_grad():\n settings = {\n \"use_python_runtime\": True,\n \"enabled_precisions\": {torch.float16},\n \"debug\": True,\n \"immutable_weights\": False,\n }\n\n model_id = \"runwayml/stable-diffusion-v1-5\"\n device = \"cuda:0\"\n\n prompt = \"house in forest, shuimobysim, wuchangshuo, best quality\"\n negative = \"(worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, out of focus, cloudy, (watermark:2),\"\n\n pipe = DiffusionPipeline.from_pretrained(\n model_id, revision=\"fp16\", torch_dtype=torch.float16\n )\n pipe.to(device)\n\n # The only extra line you need\n pipe.unet = torch_trt.MutableTorchTensorRTModule(pipe.unet, **settings)\n\n image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]\n image.save(\"./without_LoRA_mutable.jpg\")\n\n # Standard Huggingface LoRA loading procedure\n pipe.load_lora_weights(\n \"stablediffusionapi/load_lora_embeddings\",\n weight_name=\"moxin.safetensors\",\n adapter_name=\"lora1\",\n )\n pipe.set_adapters([\"lora1\"], adapter_weights=[1])\n pipe.fuse_lora()\n pipe.unload_lora_weights()\n\n # Refit triggered\n image = pipe(prompt, negative_prompt=negative, num_inference_steps=30).images[0]\n image.save(\"./with_LoRA_mutable.jpg\")" ] } ], diff --git a/docs/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip b/docs/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip index 4b93cd71127df2ff50a686cc08c4f2902f1ebe25..a6987fd5a0d173248aeb90274c1f9ad71cf4b85e 100644 GIT binary patch delta 973 zcmezMkA3QYcAfxlW)=|!5IC@HBTpwM(}8W9r*k&*foK_t$^5(rw)I5*?upX)EzQ92 zd-EQ3eopnw+}zTV#H5_m`0~`u^o)|?1cj1}#1e(fVuj+=5`~g{g#@?6oZ`*z4PG;g zVJNl*%e^)E2Dam=v%e_FF0)8QQDKl>D?BEDD&b*Z0AURVhUp*vF^W&G|HCLW*{?c# z^S?Y#c0mjyC-;>~ZO$*7&jNAn{jDcvNiZ}-iIx%^*F?o{{ z`5ZP2H0yCm;#9dgq<01z$d&IVvrB;J@2j)Hu1-0~D+b~hUw>f?q_@X%GwxLaF=X@@ z&uW1EbC-E)?oLAn1`t+;_(x^CLO5d&H;$m$zCVdkiw&zMC8lp^W@MY3UuFqmtSYnK z-k-_%ol^qC>EKXK)tUUTL~;9`V#Xi=VhseFynR;-qbv)^BR4x36AFQh>DIp(!$I_v zUyK1D`q?kWIMl>3J@7Z9lL9CX8@L@evWPM;fUrI|aY2YBjrX0W3;baepUmSTHNE~H zBab{tdFbqnNq2yW5QH^Rlp9Rv|HEjZh$UUXl`Uxu&O@XUIk5e|VJQHZ3UuHq85ou{ z@>Wk)sMZ5p4f4d#Ka4U=A18y|G+pm6qXI}U;4h;L24!IGRdZ zphT)pOy2Z)O^gzo`Au(ut$FS2FAB2GCQ?xpY#pz+z?woH28P1TS$U7yd5{g6{H|1H zx>hEm<@EGSM%&Hr$~0NPTCSxpvzj8oz%XUAO7#qOS!69h7se=ADL}2Rjmevw$mg(G zrdf|m47;+;$-Oh!Ko0slnOy=zv#rSnJFxg5uNa8mbp3@fklvod&A3+y#L(7bJgXrH z3aO|Wj3?F_GBAL!^5ncc$?Zzvj5E1~(QE<*#P*9xj9P5yzLT1skip2d9q7eEF08sX z7BiL#VAll-m@0_=t1XPOEFh0nGK1Tv7#MNU8Wn^Amna-@iMfHxzP2s13z^hCnr3m9h_;IsrG zmNfcKZ~Vh(t^f)dcnkq!Ne4v{OZ8;UYQ5?Ie>3vPgH*<7o^v}S&cFb|nkXtCO*j0@ zXaRCl_FteAUr+D&%cu+1lbW)`{5^;R{R4xLSXvQe~hYNttgR?qV;|l hrPbYR=&9M-SwAUc`XoSXN+ww_4!qv;oyh%qoM z*}Pn$l3hJBH@CDTF)1fCzC1NEJ)@*JL7^lgu|y%WSfMzzM4=>KA;B#%r+BlAf+Djd zhGJWgT&fP&MGlUHDNv zmUP4wuz+0mD@l$YL`xNU@qy^HhGsDRx{pg8#Q(n4LIy+|UfLlq2y&F>-TyBRYB4Z? zu)<^m56R8KpTfArFq{Jplg%}MJXvx2boxd{Mv?9B7#U4C1u^tbepo6p*~?dWdpI}a z6+ZlCOb41#I^AE2k!|~VDMnricI)W{O^o7`Z(H+Cf8WH& zuK>~wk6>UVtD@+Z_Lywwp+0?1Gb0b!9@LmcC|%MxV|qa|qXo#GRn0(q{w9I#nSQ^S zQ5Gcl4=BheJ6*bkQ44HkTm9K-UVID;AS{b8hha%$X2ayN26^oOZ$>5&W>|LUiQMQa zq;_77fdPc&Q8c`pzP^P~4`j*x7N8}+rvGVSGz4+=TY(NSxis1FlBf>4aVV())wqv9 mFH1~c*2*XbGGrgn5azcsjMLxCFe*&{+sY`wCfLXb)BymqmOwuM delta 897 zcmZ42!#25xjVHjHnMH&F1gd2>^2D(+Rm*P9Vg1SuqLX>exrM4_dm{gKSXs>zV_*Pb zIfm)>O^gbg&q#!@tK=qTr^Xkhre&6tBqrsgCMc9-B$g;-7Aq8|mME0uDLPC&%L1Odzz*DTEsA^M{tQ0_YP7Hs?hC^z4TmcKn8Jx*- z{2*Gh$cqm|mo_wm>0fL5PbHVYXL?Qm&_JlNtg;lt~pFgmC+Ey^=buLp?Ya@z$MY?wXmRp$2l - Overview: module code — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Overview: module code — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/_Device.html b/docs/_modules/torch_tensorrt/_Device.html index b815e03a65..b7bc40cb10 100644 --- a/docs/_modules/torch_tensorrt/_Device.html +++ b/docs/_modules/torch_tensorrt/_Device.html @@ -9,7 +9,7 @@ - torch_tensorrt._Device — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt._Device — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/_Input.html b/docs/_modules/torch_tensorrt/_Input.html index 3ca82ad497..7ab110d160 100644 --- a/docs/_modules/torch_tensorrt/_Input.html +++ b/docs/_modules/torch_tensorrt/_Input.html @@ -9,7 +9,7 @@ - torch_tensorrt._Input — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt._Input — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/_compile.html b/docs/_modules/torch_tensorrt/_compile.html index 4dc84139ca..27a5388cc0 100644 --- a/docs/_modules/torch_tensorrt/_compile.html +++ b/docs/_modules/torch_tensorrt/_compile.html @@ -9,7 +9,7 @@ - torch_tensorrt._compile — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt._compile — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/_enums.html b/docs/_modules/torch_tensorrt/_enums.html index a98528998b..72a5e8e136 100644 --- a/docs/_modules/torch_tensorrt/_enums.html +++ b/docs/_modules/torch_tensorrt/_enums.html @@ -9,7 +9,7 @@ - torch_tensorrt._enums — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt._enums — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -685,7 +685,7 @@

    Source code for torch_tensorrt._enums

                     return dtype.f32
                 elif t == np.float64:
                     return dtype.f64
    -            elif t == np.bool:
    +            elif t == np.bool_:
                     return dtype.b
                 # TODO: Consider using ml_dtypes when issues like this are resolved:
                 # https://github.com/pytorch/pytorch/issues/109873
    @@ -1849,7 +1849,7 @@ 

    Source code for torch_tensorrt._enums

         def __str__(self) -> str:
             return str(self.name)
     
    -    @needs_torch_tensorrt_runtime
    +    @needs_torch_tensorrt_runtime  # type: ignore
         def _to_serialized_rt_platform(self) -> str:
             val: str = torch.ops.tensorrt._platform_unknown()
     
    diff --git a/docs/_modules/torch_tensorrt/dynamo/_compiler.html b/docs/_modules/torch_tensorrt/dynamo/_compiler.html
    index 7c3d2f05f8..a86eaaa91e 100644
    --- a/docs/_modules/torch_tensorrt/dynamo/_compiler.html
    +++ b/docs/_modules/torch_tensorrt/dynamo/_compiler.html
    @@ -9,7 +9,7 @@
       
       
       
    -  torch_tensorrt.dynamo._compiler — Torch-TensorRT v2.6.0.dev0+a945aeb documentation
    +  torch_tensorrt.dynamo._compiler — Torch-TensorRT v2.6.0.dev0+38b1804 documentation
       
     
       
    @@ -272,7 +272,7 @@
                   
                   
                     
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -528,7 +528,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]] ] = _defaults.ENABLED_PRECISIONS, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, - make_refittable: bool = _defaults.MAKE_REFITTABLE, debug: bool = _defaults.DEBUG, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, workspace_size: int = _defaults.WORKSPACE_SIZE, @@ -558,6 +557,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < custom_engine_cache: Optional[BaseEngineCache] = _defaults.CUSTOM_ENGINE_CACHE, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, use_fp32_acc: bool = _defaults.USE_FP32_ACC, + refit_identical_engine_weights: bool = _defaults.REFIT_IDENTICAL_ENGINE_WEIGHTS, + strip_engine_weights: bool = _defaults.STRIP_ENGINE_WEIGHTS, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING, **kwargs: Any, ) -> torch.fx.GraphModule: @@ -597,7 +599,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False sparse_weights (bool): Enable sparsity for convolution and fully connected layers. enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels - refit (bool): Enable refitting debug (bool): Enable debuggable engine capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -629,6 +630,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < custom_engine_cache (Optional[BaseEngineCache]): Engine cache instance to use for saving and loading engines. Users can provide their own engine cache by inheriting from BaseEngineCache. If used, engine_cache_dir and engine_cache_size will be ignored. use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs. + strip_engine_weights (bool): Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required. + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored. enable_weight_streaming (bool): Enable weight streaming. **kwargs: Any, Returns: @@ -658,14 +662,44 @@

    Source code for torch_tensorrt.dynamo._compiler

    < if "refit" in kwargs.keys(): warnings.warn( - "Refit is deprecated. Please use make_refittable=True if you want to enable refitting of the engine.", + "`refit` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted.", DeprecationWarning, stacklevel=2, ) - if make_refittable: - raise ValueError("Use flag make_refittable only. Flag refit is deprecated.") + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `refit` is deprecated." + ) else: - make_refittable = kwargs["refit"] + immutable_weights = not kwargs["refit"] + + if "make_refittable" in kwargs.keys(): + warnings.warn( + "`make_refittable` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", + DeprecationWarning, + stacklevel=2, + ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `make_refittable` is deprecated." + ) + else: + immutable_weights = not kwargs["make_refittable"] + + if refit_identical_engine_weights: + if immutable_weights: + raise ValueError( + "`immutable_weights` must be False when `refit_identical_engine_weights` is True." + ) + + if ( + not immutable_weights + and not refit_identical_engine_weights + and enable_weight_streaming + ): + raise ValueError( + "TensorRT's `REFIT` flag is not compatible with `enable_weight_streaming=True` for now. This issue was reported on https://github.com/pytorch/TensorRT/issues/3305" + ) engine_capability = EngineCapability._from(engine_capability) @@ -740,7 +774,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, "engine_capability": engine_capability, "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, @@ -751,6 +784,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < "lazy_engine_init": lazy_engine_init, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, + "refit_identical_engine_weights": refit_identical_engine_weights, + "strip_engine_weights": strip_engine_weights, + "immutable_weights": immutable_weights, "enable_cross_compile_for_windows": True, "enable_weight_streaming": enable_weight_streaming, } @@ -807,7 +843,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]] ] = _defaults.ENABLED_PRECISIONS, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, - make_refittable: bool = _defaults.MAKE_REFITTABLE, debug: bool = _defaults.DEBUG, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, workspace_size: int = _defaults.WORKSPACE_SIZE, @@ -837,6 +872,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < custom_engine_cache: Optional[BaseEngineCache] = _defaults.CUSTOM_ENGINE_CACHE, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, use_fp32_acc: bool = _defaults.USE_FP32_ACC, + refit_identical_engine_weights: bool = _defaults.REFIT_IDENTICAL_ENGINE_WEIGHTS, + strip_engine_weights: bool = _defaults.STRIP_ENGINE_WEIGHTS, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING, **kwargs: Any, ) -> torch.fx.GraphModule: @@ -878,7 +916,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False sparse_weights (bool): Enable sparsity for convolution and fully connected layers. enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels - refit (bool): Enable refitting debug (bool): Enable debuggable engine capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -910,6 +947,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < custom_engine_cache (Optional[BaseEngineCache]): Engine cache instance to use for saving and loading engines. Users can provide their own engine cache by inheriting from BaseEngineCache. If used, engine_cache_dir and engine_cache_size will be ignored. use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs. + strip_engine_weights (bool): Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required. + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored. enable_weight_streaming (bool): Enable weight streaming. **kwargs: Any, Returns: @@ -933,14 +973,44 @@

    Source code for torch_tensorrt.dynamo._compiler

    < if "refit" in kwargs.keys(): warnings.warn( - "Refit is deprecated. Please use make_refittable=True if you want to enable refitting of the engine.", + "`refit` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", + DeprecationWarning, + stacklevel=2, + ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `refit` is deprecated." + ) + else: + immutable_weights = not kwargs["refit"] + + if "make_refittable" in kwargs.keys(): + warnings.warn( + "`make_refittable` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", DeprecationWarning, stacklevel=2, ) - if make_refittable: - raise ValueError("Use flag make_refittable only. Flag refit is deprecated.") + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `make_refittable` is deprecated." + ) else: - make_refittable = kwargs["refit"] + immutable_weights = not kwargs["make_refittable"] + + if refit_identical_engine_weights: + if immutable_weights: + raise ValueError( + "`immutable_weights` must be False when `refit_identical_engine_weights` is True." + ) + + if ( + not immutable_weights + and not refit_identical_engine_weights + and enable_weight_streaming + ): + raise ValueError( + "TensorRT's `REFIT` flag is not compatible with `enable_weight_streaming=True` for now. This issue was reported on https://github.com/pytorch/TensorRT/issues/3305" + ) if ( "enable_cross_compile_for_windows" in kwargs.keys() @@ -1006,9 +1076,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < engine_cache = None if cache_built_engines or reuse_cached_engines: - assert ( - make_refittable - ), "Engine caching requires make_refittable to be set to True" engine_cache = ( custom_engine_cache if custom_engine_cache is not None @@ -1039,7 +1106,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, "engine_capability": engine_capability, "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, @@ -1052,6 +1118,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < "reuse_cached_engines": reuse_cached_engines, "use_explicit_typing": use_explicit_typing, "use_fp32_acc": use_fp32_acc, + "refit_identical_engine_weights": refit_identical_engine_weights, + "strip_engine_weights": strip_engine_weights, + "immutable_weights": immutable_weights, "enable_cross_compile_for_windows": False, "enable_weight_streaming": enable_weight_streaming, } @@ -1326,7 +1395,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < require_full_compilation: bool = _defaults.REQUIRE_FULL_COMPILATION, disable_tf32: bool = _defaults.DISABLE_TF32, sparse_weights: bool = _defaults.SPARSE_WEIGHTS, - make_refittable: bool = _defaults.MAKE_REFITTABLE, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, dla_sram_size: int = _defaults.DLA_SRAM_SIZE, @@ -1337,6 +1405,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < timing_cache_path: str = _defaults.TIMING_CACHE_PATH, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, use_fp32_acc: bool = _defaults.USE_FP32_ACC, + refit_identical_engine_weights: bool = _defaults.REFIT_IDENTICAL_ENGINE_WEIGHTS, + strip_engine_weights: bool = _defaults.STRIP_ENGINE_WEIGHTS, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING, **kwargs: Any, ) -> bytes: @@ -1387,7 +1458,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < Only applicable for `ir="dynamo"`; has no effect for `torch.compile` path disable_tf32 (bool): Whether to disable TF32 computation for TRT layers sparse_weights (bool): Whether to allow the builder to use sparse weights - refit (bool): Whether to build a refittable engine engine_capability (trt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer. @@ -1398,6 +1468,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < timing_cache_path (str): Path to the timing cache if it exists (or) where it will be saved after compilation use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs. + strip_engine_weights (bool): Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required. + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored. enable_weight_streaming (bool): Enable weight streaming. Returns: bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs @@ -1417,12 +1490,48 @@

    Source code for torch_tensorrt.dynamo._compiler

    < DeprecationWarning, stacklevel=2, ) + if "refit" in kwargs.keys(): warnings.warn( - "Refit is deprecated. Please use make_refittable=True if you want to enable refitting of the engine.", + "`refit` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", + DeprecationWarning, + stacklevel=2, + ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `refit` is deprecated." + ) + else: + immutable_weights = not kwargs["refit"] + + if "make_refittable" in kwargs.keys(): + warnings.warn( + "`make_refittable` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", DeprecationWarning, stacklevel=2, ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `make_refittable` is deprecated." + ) + else: + immutable_weights = not kwargs["make_refittable"] + + if refit_identical_engine_weights: + if immutable_weights: + raise ValueError( + "`immutable_weights` must be False when `refit_identical_engine_weights` is True." + ) + + if ( + not immutable_weights + and not refit_identical_engine_weights + and enable_weight_streaming + ): + raise ValueError( + "TensorRT's `REFIT` flag is not compatible with `enable_weight_streaming=True` for now. This issue was reported on https://github.com/pytorch/TensorRT/issues/3305" + ) + if arg_inputs is None and inputs is None: raise AssertionError("'arg_inputs' and 'inputs' should not both be None.") @@ -1465,7 +1574,6 @@

    Source code for torch_tensorrt.dynamo._compiler

    < "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, "engine_capability": engine_capability, "num_avg_timing_iters": num_avg_timing_iters, "dla_sram_size": dla_sram_size, @@ -1474,6 +1582,9 @@

    Source code for torch_tensorrt.dynamo._compiler

    < "timing_cache_path": timing_cache_path, "use_explicit_typing": use_explicit_typing, "use_fp32_acc": use_fp32_acc, + "refit_identical_engine_weights": refit_identical_engine_weights, + "strip_engine_weights": strip_engine_weights, + "immutable_weights": immutable_weights, "enable_weight_streaming": enable_weight_streaming, } diff --git a/docs/_modules/torch_tensorrt/dynamo/_exporter.html b/docs/_modules/torch_tensorrt/dynamo/_exporter.html index dcd44d352e..9253d19db3 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_exporter.html +++ b/docs/_modules/torch_tensorrt/dynamo/_exporter.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._exporter — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo._exporter — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/dynamo/_refit.html b/docs/_modules/torch_tensorrt/dynamo/_refit.html index c0bf8ffcd1..046224ee0e 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_refit.html +++ b/docs/_modules/torch_tensorrt/dynamo/_refit.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._refit — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo._refit — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -621,13 +621,26 @@

    Source code for torch_tensorrt.dynamo._refit

    if torch_device.type == "cuda"
                 else trt.TensorLocation.HOST
             )
    +
    +        constant_mapping: dict[str, Any] = weight_name_map.pop(
    +            "constant_mapping", {}
    +        )  # type: ignore
             mapping = construct_refit_mapping_from_weight_name_map(
                 weight_name_map, new_gm.state_dict()
             )
    +        constant_mapping_with_type = {}
    +
    +        for constant_name, val in constant_mapping.items():
    +            np_weight_type = val.dtype
    +            val_tensor = torch.from_numpy(val).cuda()
    +            trt_dtype = dtype.try_from(np_weight_type).to(trt.DataType)
    +            torch_dtype = dtype.try_from(np_weight_type).to(torch.dtype)
    +            constant_mapping_with_type[constant_name] = (
    +                val_tensor.clone().reshape(-1).contiguous().to(torch_dtype),
    +                trt_dtype,
    +            )
     
    -        # Debug Use
    -        # correct = construct_refit_mapping(new_gm, input_list, settings)
    -        # comparison = {k: (np.allclose(correct[k][0], mapping[k][0].cpu().numpy(), 1e-2, 1e-2), correct[k][0], mapping[k][0]) for k in mapping if k in correct}
    +        mapping.update(constant_mapping_with_type)
     
             for layer_name in weight_list:
                 if layer_name not in mapping:
    @@ -716,7 +729,7 @@ 

    Source code for torch_tensorrt.dynamo._refit

    ]
             assert (
                 encoded_metadata != ""
    -        ), "The engine provided is either not refittable or was built with a version of Torch-TensorRT that is too old, please recompile using the latest version with make_refittable=True"
    +        ), "The engine provided is either not refittable or was built with a version of Torch-TensorRT that is too old, please recompile using the latest version"
             settings = TorchTensorRTModule.decode_metadata(encoded_metadata)["settings"]
             # Handle torch modules
             compiled_submodules_map = dict(compiled_submodules)
    @@ -734,8 +747,8 @@ 

    Source code for torch_tensorrt.dynamo._refit

    assert settings is not None
     
         assert (
    -        settings.make_refittable
    -    ), "Refitting is not enabled. Please recompile the engine with refit=True."
    +        not settings.immutable_weights
    +    ), "Refitting is not enabled. Please recompile the engine with immutable_weights=False."
     
         if settings.debug:
             set_log_level(logger.parent, logging.DEBUG)
    @@ -914,17 +927,21 @@ 

    Source code for torch_tensorrt.dynamo._refit

    weight_name_map=None,
                     )
     
    -        if isinstance(compiled_submodule, TorchTensorRTModule):
    -            serialized_engine = bytes(engine.serialize())
    -            new_engine_info = list(engine_info)
    -            new_engine_info[ENGINE_IDX] = serialized_engine
    -            refitted_engine = torch.classes.tensorrt.Engine(tuple(new_engine_info))
    -            compiled_submodule.engine = refitted_engine
    +        # clear EXCLUDE_WEIGHTS flag
    +        serialization_config = engine.create_serialization_config()
    +        serialization_config.clear_flag(trt.SerializationFlag.EXCLUDE_WEIGHTS)
    +        serialized_engine = engine.serialize_with_config(serialization_config)
    +
    +        if isinstance(
    +            compiled_submodule, (PythonTorchTensorRTModule, TorchTensorRTModule)
    +        ):
    +            compiled_submodule.engine = None  # Clear the engine for TorchTensorRTModule, otherwise it won't be updated
    +            compiled_submodule.serialized_engine = bytes(serialized_engine)
    +            compiled_submodule.setup_engine()
     
             elif inline_module:
    -            serialized_engine = bytes(engine.serialize())
                 new_engine_info = list(engine_info)
    -            new_engine_info[ENGINE_IDX] = serialized_engine
    +            new_engine_info[ENGINE_IDX] = bytes(serialized_engine)
                 refitted_engine = torch.classes.tensorrt.Engine(tuple(new_engine_info))
                 setattr(compiled_module, f"{name}_engine", refitted_engine)
     
    diff --git a/docs/_modules/torch_tensorrt/dynamo/_settings.html b/docs/_modules/torch_tensorrt/dynamo/_settings.html
    index ef0866e39c..6ad62442fb 100644
    --- a/docs/_modules/torch_tensorrt/dynamo/_settings.html
    +++ b/docs/_modules/torch_tensorrt/dynamo/_settings.html
    @@ -9,7 +9,7 @@
       
       
       
    -  torch_tensorrt.dynamo._settings — Torch-TensorRT v2.6.0.dev0+a945aeb documentation
    +  torch_tensorrt.dynamo._settings — Torch-TensorRT v2.6.0.dev0+38b1804 documentation
       
     
       
    @@ -272,7 +272,7 @@
                   
                   
                     
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -484,16 +484,18 @@

    Source code for torch_tensorrt.dynamo._settings

    < ENABLED_PRECISIONS, ENGINE_CAPABILITY, HARDWARE_COMPATIBLE, + IMMUTABLE_WEIGHTS, LAZY_ENGINE_INIT, - MAKE_REFITTABLE, MAX_AUX_STREAMS, MIN_BLOCK_SIZE, NUM_AVG_TIMING_ITERS, OPTIMIZATION_LEVEL, PASS_THROUGH_BUILD_FAILURES, + REFIT_IDENTICAL_ENGINE_WEIGHTS, REQUIRE_FULL_COMPILATION, REUSE_CACHED_ENGINES, SPARSE_WEIGHTS, + STRIP_ENGINE_WEIGHTS, TIMING_CACHE_PATH, TRUNCATE_DOUBLE, USE_EXPLICIT_TYPING, @@ -534,7 +536,6 @@

    Source code for torch_tensorrt.dynamo._settings

    < assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False disable_tf32 (bool): Whether to disable TF32 computation for TRT layers sparse_weights (bool): Whether to allow the builder to use sparse weights - refit (bool): Whether to build a refittable engine engine_capability (trt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer. @@ -549,6 +550,9 @@

    Source code for torch_tensorrt.dynamo._settings

    < reuse_cached_engines (bool): Whether to load the compiled TRT engines from storage use_strong_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Whether to refit the engine with identical weights + strip_engine_weights (bool): Whether to strip the engine weights + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored enable_weight_streaming (bool): Enable weight streaming. enable_cross_compile_for_windows (bool): By default this is False means TensorRT engines can only be executed on the same platform where they were built. True will enable cross-platform compatibility which allows the engine to be built on Linux and run on Windows @@ -572,7 +576,6 @@

    Source code for torch_tensorrt.dynamo._settings

    < disable_tf32: bool = DISABLE_TF32 assume_dynamic_shape_support: bool = ASSUME_DYNAMIC_SHAPE_SUPPORT sparse_weights: bool = SPARSE_WEIGHTS - make_refittable: bool = MAKE_REFITTABLE engine_capability: EngineCapability = field( default_factory=lambda: ENGINE_CAPABILITY ) @@ -588,6 +591,9 @@

    Source code for torch_tensorrt.dynamo._settings

    < reuse_cached_engines: bool = REUSE_CACHED_ENGINES use_explicit_typing: bool = USE_EXPLICIT_TYPING use_fp32_acc: bool = USE_FP32_ACC + refit_identical_engine_weights: bool = REFIT_IDENTICAL_ENGINE_WEIGHTS + strip_engine_weights: bool = STRIP_ENGINE_WEIGHTS + immutable_weights: bool = IMMUTABLE_WEIGHTS enable_weight_streaming: bool = ENABLE_WEIGHT_STREAMING enable_cross_compile_for_windows: bool = ENABLE_CROSS_COMPILE_FOR_WINDOWS
    @@ -599,9 +605,11 @@

    Source code for torch_tensorrt.dynamo._settings

    < "optimization_level", "disable_tf32", "sparse_weights", - "make_refittable", "engine_capability", "hardware_compatible", + "refit_identical_engine_weights", + "strip_engine_weights", # TODO: @Evan to remove this after implementing caching weight-stripped engines as default? + "immutable_weights", "enable_weight_streaming", ) diff --git a/docs/_modules/torch_tensorrt/dynamo/_tracer.html b/docs/_modules/torch_tensorrt/dynamo/_tracer.html index 1ed7f46aa2..9d1c7e21bf 100644 --- a/docs/_modules/torch_tensorrt/dynamo/_tracer.html +++ b/docs/_modules/torch_tensorrt/dynamo/_tracer.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo._tracer — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo._tracer — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html b/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html index f007d8e5b4..971e14ac9a 100644 --- a/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html +++ b/docs/_modules/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModule — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModule — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -530,7 +530,7 @@

    Source code for torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModuleUnion[torch.dtype, dtype] ] = _defaults.ENABLED_PRECISIONS, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, - make_refittable: bool = _defaults.MAKE_REFITTABLE, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, debug: bool = _defaults.DEBUG, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, workspace_size: int = _defaults.WORKSPACE_SIZE, @@ -568,7 +568,7 @@

    Source code for torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModule assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False sparse_weights (bool): Enable sparsity for convolution and fully connected layers. enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels - refit (bool): Enable refitting + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. debug (bool): Enable debuggable engine capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -617,8 +617,8 @@

    Source code for torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModuledevice = to_torch_tensorrt_device(device) enabled_precisions = {dtype._from(p) for p in enabled_precisions} assert ( - make_refittable - ), "'make_refittable' has to be True for a MutableTorchTensorRTModule." + not immutable_weights + ), "`immutable_weights` has to be False for a MutableTorchTensorRTModule." compilation_options = { "enabled_precisions": ( enabled_precisions @@ -645,7 +645,7 @@

    Source code for torch_tensorrt.dynamo.runtime._MutableTorchTensorRTModule"require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, + "immutable_weights": immutable_weights, "engine_capability": engine_capability, "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, diff --git a/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html b/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html index d3345710f9..e8286ea9fe 100644 --- a/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html +++ b/docs/_modules/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -503,7 +503,7 @@

    Source code for torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule

    *, name: str = "", settings: CompilationSettings = CompilationSettings(), - weight_name_map: Any = None, + weight_name_map: Optional[dict[Any, Any]] = None, ): """Takes a name, target device, serialized TensorRT engine, and binding names / order and constructs a PyTorch ``torch.nn.Module`` around it. Uses TensorRT Python APIs to run the engine @@ -516,6 +516,7 @@

    Source code for torch_tensorrt.dynamo.runtime._PythonTorchTensorRTModule

    Keyword Arguments: name (str): Name for module settings (torch_tensorrt.dynamo.CompilationSettings): Settings used to compile engine, assumes engine was built with default compilation settings if object not passed + weight_name_map (dict): Mapping of engine weight name to state_dict weight name Example: diff --git a/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html b/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html index b74aebca01..08e0b2194c 100644 --- a/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html +++ b/docs/_modules/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.html @@ -9,7 +9,7 @@ - torch_tensorrt.dynamo.runtime._TorchTensorRTModule — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo.runtime._TorchTensorRTModule — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -561,6 +561,7 @@

    Source code for torch_tensorrt.dynamo.runtime._TorchTensorRTModule

    Keyword Arguments: name (str): Name for module settings (torch_tensorrt.dynamo.CompilationSettings): Settings used to compile engine, assumes engine was built with default compilation settings if object not passed + weight_name_map (dict): Mapping of engine weight name to state_dict weight name Example: diff --git a/docs/_modules/torch_tensorrt/fx/fx2trt.html b/docs/_modules/torch_tensorrt/fx/fx2trt.html index 0989c6c924..1cf07ba118 100644 --- a/docs/_modules/torch_tensorrt/fx/fx2trt.html +++ b/docs/_modules/torch_tensorrt/fx/fx2trt.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.fx2trt — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.fx.fx2trt — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html b/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html index 258382426e..79aa871ce0 100644 --- a/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html +++ b/docs/_modules/torch_tensorrt/fx/input_tensor_spec.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.input_tensor_spec — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.fx.input_tensor_spec — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/fx/lower.html b/docs/_modules/torch_tensorrt/fx/lower.html index 001b23e740..544f582126 100644 --- a/docs/_modules/torch_tensorrt/fx/lower.html +++ b/docs/_modules/torch_tensorrt/fx/lower.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.lower — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.fx.lower — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/fx/trt_module.html b/docs/_modules/torch_tensorrt/fx/trt_module.html index d7db657580..00060f2daa 100644 --- a/docs/_modules/torch_tensorrt/fx/trt_module.html +++ b/docs/_modules/torch_tensorrt/fx/trt_module.html @@ -9,7 +9,7 @@ - torch_tensorrt.fx.trt_module — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.fx.trt_module — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/logging.html b/docs/_modules/torch_tensorrt/logging.html index 25a3445fc4..b3d03b8e1b 100644 --- a/docs/_modules/torch_tensorrt/logging.html +++ b/docs/_modules/torch_tensorrt/logging.html @@ -9,7 +9,7 @@ - torch_tensorrt.logging — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.logging — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html b/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html index 30ff590cd3..91c04f5504 100644 --- a/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html +++ b/docs/_modules/torch_tensorrt/runtime/_multi_device_safe_mode.html @@ -9,7 +9,7 @@ - torch_tensorrt.runtime._multi_device_safe_mode — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.runtime._multi_device_safe_mode — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/ts/_compile_spec.html b/docs/_modules/torch_tensorrt/ts/_compile_spec.html index 1116b7efac..60bbc2cc4f 100644 --- a/docs/_modules/torch_tensorrt/ts/_compile_spec.html +++ b/docs/_modules/torch_tensorrt/ts/_compile_spec.html @@ -9,7 +9,7 @@ - torch_tensorrt.ts._compile_spec — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.ts._compile_spec — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/ts/_compiler.html b/docs/_modules/torch_tensorrt/ts/_compiler.html index 3531a2d8d6..97b9e0601f 100644 --- a/docs/_modules/torch_tensorrt/ts/_compiler.html +++ b/docs/_modules/torch_tensorrt/ts/_compiler.html @@ -9,7 +9,7 @@ - torch_tensorrt.ts._compiler — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.ts._compiler — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_modules/torch_tensorrt/ts/ptq.html b/docs/_modules/torch_tensorrt/ts/ptq.html index d47cc49aa7..67f695f042 100644 --- a/docs/_modules/torch_tensorrt/ts/ptq.html +++ b/docs/_modules/torch_tensorrt/ts/ptq.html @@ -9,7 +9,7 @@ - torch_tensorrt.ts.ptq — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.ts.ptq — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.rst.txt b/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.rst.txt index 5b7760d73f..cc84e2b968 100644 --- a/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.rst.txt +++ b/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.rst.txt @@ -74,7 +74,7 @@ Small caching example on BERT. "truncate_double": True, "debug": False, "min_block_size": 1, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "engine_cache_dir": "/tmp/torch_trt_bert_engine_cache", diff --git a/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_example.rst.txt b/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_example.rst.txt index b81b2c7f1a..a21b53f623 100644 --- a/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_example.rst.txt +++ b/docs/_sources/tutorials/_rendered_examples/dynamo/engine_caching_example.rst.txt @@ -87,7 +87,7 @@ engines are saved to disk tied to a hash of their corresponding PyTorch subgraph in a subsequent compilation, either as part of this session or a new session, the cache will pull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude. As such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``), -the engine must be refittable (``make_refittable=True``). See :ref:`refit_engine_example` for more details. +the engine must be refittable (``immutable_weights=False``). See :ref:`refit_engine_example` for more details. .. GENERATED FROM PYTHON SOURCE LINES 67-118 @@ -126,7 +126,7 @@ the engine must be refittable (``make_refittable=True``). See :ref:`refit_engine "enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, }, @@ -193,7 +193,7 @@ previously built engines and refit the weights. enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, + immutable_weights=False, cache_built_engines=cache_built_engines, reuse_cached_engines=reuse_cached_engines, engine_cache_size=1 << 30, # 1GB @@ -311,7 +311,7 @@ Below is an example of a custom engine cache implementation that implents a ``RA "enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "custom_engine_cache": engine_cache, diff --git a/docs/_sources/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.rst.txt b/docs/_sources/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.rst.txt index cd15592a25..70ec515462 100644 --- a/docs/_sources/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.rst.txt +++ b/docs/_sources/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.rst.txt @@ -61,7 +61,7 @@ Initialize the Mutable Torch TensorRT Module with settings. settings = { "use_python": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -132,7 +132,7 @@ Stable Diffusion with Huggingface "use_python_runtime": True, "enabled_precisions": {torch.float16}, "debug": True, - "make_refittable": True, + "immutable_weights": False, } model_id = "runwayml/stable-diffusion-v1-5" diff --git a/docs/_sources/tutorials/_rendered_examples/dynamo/refit_engine_example.rst.txt b/docs/_sources/tutorials/_rendered_examples/dynamo/refit_engine_example.rst.txt index d371ecc887..c0acbf4cb8 100644 --- a/docs/_sources/tutorials/_rendered_examples/dynamo/refit_engine_example.rst.txt +++ b/docs/_sources/tutorials/_rendered_examples/dynamo/refit_engine_example.rst.txt @@ -74,7 +74,7 @@ Make a refittable Compilation Program --------------------------------------- The inital step is to compile a module and save it as with a normal. Note that there is an -additional parameter `make_refittable` that is set to `True`. This parameter is used to +additional parameter `immutable_weights` that is set to `False`. This parameter is used to indicate that the engine being built should support weight refitting later. Engines built without these setttings will not be able to be refit. @@ -101,7 +101,7 @@ In this case we are going to compile a ResNet18 model with randomly initialized debug=debug, min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, - make_refittable=True, + immutable_weights=False, reuse_cached_engines=False, ) # Output is a torch.fx.GraphModule diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index 3a895a549f..85569f0bf8 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: 'v2.6.0.dev0+a945aeb', + VERSION: 'v2.6.0.dev0+38b1804', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/cli/torchtrtc.html b/docs/cli/torchtrtc.html index 89265e17aa..4011c0af61 100644 --- a/docs/cli/torchtrtc.html +++ b/docs/cli/torchtrtc.html @@ -10,7 +10,7 @@ - torchtrtc — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torchtrtc — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/conversion.html b/docs/contributors/conversion.html index 2430092138..469d2311f6 100644 --- a/docs/contributors/conversion.html +++ b/docs/contributors/conversion.html @@ -10,7 +10,7 @@ - Conversion Phase — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Conversion Phase — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/dynamo_converters.html b/docs/contributors/dynamo_converters.html index f14b1cd5ad..b00ad6e3db 100644 --- a/docs/contributors/dynamo_converters.html +++ b/docs/contributors/dynamo_converters.html @@ -10,7 +10,7 @@ - Writing Dynamo Converters — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Writing Dynamo Converters — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/lowering.html b/docs/contributors/lowering.html index e94e89c522..2a13451003 100644 --- a/docs/contributors/lowering.html +++ b/docs/contributors/lowering.html @@ -10,7 +10,7 @@ - Lowering Phase — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Lowering Phase — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/partitioning.html b/docs/contributors/partitioning.html index 0630ddd942..198fd2f403 100644 --- a/docs/contributors/partitioning.html +++ b/docs/contributors/partitioning.html @@ -10,7 +10,7 @@ - Partitioning Phase — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Partitioning Phase — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/phases.html b/docs/contributors/phases.html index 776fd09aab..82cc71fc0a 100644 --- a/docs/contributors/phases.html +++ b/docs/contributors/phases.html @@ -10,7 +10,7 @@ - Compiler Phases — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiler Phases — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/runtime.html b/docs/contributors/runtime.html index 04c05e5c5d..f575af78c7 100644 --- a/docs/contributors/runtime.html +++ b/docs/contributors/runtime.html @@ -10,7 +10,7 @@ - Runtime Phase — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Runtime Phase — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/system_overview.html b/docs/contributors/system_overview.html index 4c93b63465..2e83b473e4 100644 --- a/docs/contributors/system_overview.html +++ b/docs/contributors/system_overview.html @@ -10,7 +10,7 @@ - System Overview — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + System Overview — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/ts_converters.html b/docs/contributors/ts_converters.html index 7381ad2c5d..dcf2a2c94f 100644 --- a/docs/contributors/ts_converters.html +++ b/docs/contributors/ts_converters.html @@ -10,7 +10,7 @@ - Writing TorchScript Converters — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Writing TorchScript Converters — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/useful_links.html b/docs/contributors/useful_links.html index b4a533f47a..4b8a72389b 100644 --- a/docs/contributors/useful_links.html +++ b/docs/contributors/useful_links.html @@ -10,7 +10,7 @@ - Useful Links for Torch-TensorRT Development — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Useful Links for Torch-TensorRT Development — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/contributors/writing_dynamo_aten_lowering_passes.html b/docs/contributors/writing_dynamo_aten_lowering_passes.html index 8d5b5b1cb1..3993142ddd 100644 --- a/docs/contributors/writing_dynamo_aten_lowering_passes.html +++ b/docs/contributors/writing_dynamo_aten_lowering_passes.html @@ -10,7 +10,7 @@ - Writing Dynamo ATen Lowering Passes — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Writing Dynamo ATen Lowering Passes — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/dynamo/dynamo_export.html b/docs/dynamo/dynamo_export.html index 1dcb8245e9..2b482d42f6 100644 --- a/docs/dynamo/dynamo_export.html +++ b/docs/dynamo/dynamo_export.html @@ -10,7 +10,7 @@ - Compiling Exported Programs with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiling Exported Programs with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/dynamo/torch_compile.html b/docs/dynamo/torch_compile.html index c7ac92b318..c0fcec1ffd 100644 --- a/docs/dynamo/torch_compile.html +++ b/docs/dynamo/torch_compile.html @@ -10,7 +10,7 @@ - TensorRT Backend for torch.compile — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + TensorRT Backend for torch.compile — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -488,7 +488,7 @@

    Key Features¶

    -class torch_tensorrt.dynamo.CompilationSettings(enabled_precisions: ~typing.Set[~torch_tensorrt._enums.dtype] = <factory>, debug: bool = False, workspace_size: int = 0, min_block_size: int = 5, torch_executed_ops: ~typing.Collection[~typing.Union[~typing.Callable[[...], ~typing.Any], str]] = <factory>, pass_through_build_failures: bool = False, max_aux_streams: ~typing.Optional[int] = None, version_compatible: bool = False, optimization_level: ~typing.Optional[int] = None, use_python_runtime: ~typing.Optional[bool] = False, truncate_double: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, device: ~torch_tensorrt._Device.Device = <factory>, require_full_compilation: bool = False, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, make_refittable: bool = False, engine_capability: ~torch_tensorrt._enums.EngineCapability = <factory>, num_avg_timing_iters: int = 1, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, dryrun: ~typing.Union[bool, str] = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init: bool = False, cache_built_engines: bool = False, reuse_cached_engines: bool = False, use_explicit_typing: bool = False, use_fp32_acc: bool = False, enable_weight_streaming: bool = False, enable_cross_compile_for_windows: bool = False)[source]¶
    +class torch_tensorrt.dynamo.CompilationSettings(enabled_precisions: ~typing.Set[~torch_tensorrt._enums.dtype] = <factory>, debug: bool = False, workspace_size: int = 0, min_block_size: int = 5, torch_executed_ops: ~typing.Collection[~typing.Union[~typing.Callable[[...], ~typing.Any], str]] = <factory>, pass_through_build_failures: bool = False, max_aux_streams: ~typing.Optional[int] = None, version_compatible: bool = False, optimization_level: ~typing.Optional[int] = None, use_python_runtime: ~typing.Optional[bool] = False, truncate_double: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, device: ~torch_tensorrt._Device.Device = <factory>, require_full_compilation: bool = False, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, engine_capability: ~torch_tensorrt._enums.EngineCapability = <factory>, num_avg_timing_iters: int = 1, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, dryrun: ~typing.Union[bool, str] = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init: bool = False, cache_built_engines: bool = False, reuse_cached_engines: bool = False, use_explicit_typing: bool = False, use_fp32_acc: bool = False, refit_identical_engine_weights: bool = False, strip_engine_weights: bool = False, immutable_weights: bool = True, enable_weight_streaming: bool = False, enable_cross_compile_for_windows: bool = False)[source]¶

    Compilation settings for Torch-TensorRT Dynamo Paths

    Parameters
    @@ -516,7 +516,6 @@

    Customizable Settings

    assume_dynamic_shape_support (bool) – Setting this to true enables the converters work for both dynamic and static shapes. Default: False

  • disable_tf32 (bool) – Whether to disable TF32 computation for TRT layers

  • sparse_weights (bool) – Whether to allow the builder to use sparse weights

  • -
  • refit (bool) – Whether to build a refittable engine

  • engine_capability (trt.EngineCapability) – Restrict kernel selection to safe gpu kernels or safe dla kernels

  • num_avg_timing_iters (python:int) – Number of averaging timing iterations used to select kernels

  • dla_sram_size (python:int) – Fast software managed RAM used by DLA to communicate within a layer.

  • @@ -531,6 +530,9 @@

    Customizable Settings

    reuse_cached_engines (bool) – Whether to load the compiled TRT engines from storage

  • use_strong_typing (bool) – This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs.

  • use_fp32_acc (bool) – This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions.

  • +
  • refit_identical_engine_weights (bool) – Whether to refit the engine with identical weights

  • +
  • strip_engine_weights (bool) – Whether to strip the engine weights

  • +
  • immutable_weights (bool) – Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, strip_engine_weights and refit_identical_engine_weights will be ignored

  • enable_weight_streaming (bool) – Enable weight streaming.

  • enable_cross_compile_for_windows (bool) – By default this is False means TensorRT engines can only be executed on the same platform where they were built. True will enable cross-platform compatibility which allows the engine to be built on Linux and run on Windows

  • diff --git a/docs/fx/getting_started_with_fx_path.html b/docs/fx/getting_started_with_fx_path.html index 98d255ea54..8fd9beba7c 100644 --- a/docs/fx/getting_started_with_fx_path.html +++ b/docs/fx/getting_started_with_fx_path.html @@ -10,7 +10,7 @@ - Torch-TensorRT (FX Frontend) User Guide — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch-TensorRT (FX Frontend) User Guide — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/genindex.html b/docs/genindex.html index 1abce9d54f..5aaff21a94 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -9,7 +9,7 @@ - Index — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Index — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/getting_started/installation.html b/docs/getting_started/installation.html index 60da70d4b3..968abeb077 100644 --- a/docs/getting_started/installation.html +++ b/docs/getting_started/installation.html @@ -10,7 +10,7 @@ - Installation — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Installation — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/getting_started/jetpack.html b/docs/getting_started/jetpack.html index 214b086ad7..be6e492e0e 100644 --- a/docs/getting_started/jetpack.html +++ b/docs/getting_started/jetpack.html @@ -10,7 +10,7 @@ - Overview — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Overview — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/getting_started/quick_start.html b/docs/getting_started/quick_start.html index c8ccf89629..84327a6b99 100644 --- a/docs/getting_started/quick_start.html +++ b/docs/getting_started/quick_start.html @@ -10,7 +10,7 @@ - Quick Start — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Quick Start — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/index.html b/docs/index.html index 51dfc4cd13..0154ae265a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ - Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -274,7 +274,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/indices/supported_ops.html b/docs/indices/supported_ops.html index 232a4cb965..b4b72f28a6 100644 --- a/docs/indices/supported_ops.html +++ b/docs/indices/supported_ops.html @@ -10,7 +10,7 @@ - Operators Supported — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Operators Supported — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -274,7 +274,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/objects.inv b/docs/objects.inv index 456ee96582ea987f53d1c30ef54a0085ed4c8c8c..38a2b98be7f31ec9d8e5d69153af664f0f06ed97 100644 GIT binary patch delta 20 bcmX@n$aJESX@VcSu|<-hg@MV&(BcLFO*;n< delta 20 bcmX@n$aJESX@VbnqNRyxVrtUH(BcLFQ3MB| diff --git a/docs/py-modindex.html b/docs/py-modindex.html index c2e1386dfa..d4aed3d9eb 100644 --- a/docs/py-modindex.html +++ b/docs/py-modindex.html @@ -9,7 +9,7 @@ - Python Module Index — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Python Module Index — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/py_api/dynamo.html b/docs/py_api/dynamo.html index 7f6ecbfdf0..01397fffab 100644 --- a/docs/py_api/dynamo.html +++ b/docs/py_api/dynamo.html @@ -10,7 +10,7 @@ - torch_tensorrt.dynamo — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.dynamo — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -475,7 +475,7 @@

    Functions¶

    -torch_tensorrt.dynamo.compile(exported_program: ExportedProgram, inputs: Optional[Sequence[Sequence[Any]]] = None, *, arg_inputs: Optional[Sequence[Sequence[Any]]] = None, kwarg_inputs: Optional[dict[Any, Any]] = None, device: Optional[Union[Device, device, str]] = None, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, enabled_precisions: Union[Set[Union[dtype, dtype]], Tuple[Union[dtype, dtype]]] = {dtype.f32}, engine_capability: EngineCapability = EngineCapability.STANDARD, make_refittable: bool = False, debug: bool = False, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 5, torch_executed_ops: Optional[Collection[Union[Callable[[...], Any], str]]] = None, torch_executed_modules: Optional[List[str]] = None, pass_through_build_failures: bool = False, max_aux_streams: Optional[int] = None, version_compatible: bool = False, optimization_level: Optional[int] = None, use_python_runtime: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, dryrun: bool = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init: bool = False, cache_built_engines: bool = False, reuse_cached_engines: bool = False, engine_cache_dir: str = '/tmp/torch_tensorrt_engine_cache', engine_cache_size: int = 1073741824, custom_engine_cache: Optional[BaseEngineCache] = None, use_explicit_typing: bool = False, use_fp32_acc: bool = False, enable_weight_streaming: bool = False, **kwargs: Any) GraphModule[source]¶
    +torch_tensorrt.dynamo.compile(exported_program: ExportedProgram, inputs: Optional[Sequence[Sequence[Any]]] = None, *, arg_inputs: Optional[Sequence[Sequence[Any]]] = None, kwarg_inputs: Optional[dict[Any, Any]] = None, device: Optional[Union[Device, device, str]] = None, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, enabled_precisions: Union[Set[Union[dtype, dtype]], Tuple[Union[dtype, dtype]]] = {dtype.f32}, engine_capability: EngineCapability = EngineCapability.STANDARD, debug: bool = False, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 5, torch_executed_ops: Optional[Collection[Union[Callable[[...], Any], str]]] = None, torch_executed_modules: Optional[List[str]] = None, pass_through_build_failures: bool = False, max_aux_streams: Optional[int] = None, version_compatible: bool = False, optimization_level: Optional[int] = None, use_python_runtime: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, dryrun: bool = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init: bool = False, cache_built_engines: bool = False, reuse_cached_engines: bool = False, engine_cache_dir: str = '/tmp/torch_tensorrt_engine_cache', engine_cache_size: int = 5368709120, custom_engine_cache: Optional[BaseEngineCache] = None, use_explicit_typing: bool = False, use_fp32_acc: bool = False, refit_identical_engine_weights: bool = False, strip_engine_weights: bool = False, immutable_weights: bool = True, enable_weight_streaming: bool = False, **kwargs: Any) GraphModule[source]¶

    Compile an ExportedProgram module for NVIDIA GPUs using TensorRT

    Takes a existing TorchScript module and a set of settings to configure the compiler and will convert methods to JIT Graphs which call equivalent TensorRT engines

    @@ -518,7 +518,6 @@

    FunctionsEngineCapability) – Restrict kernel selection to safe gpu kernels or safe dla kernels

  • num_avg_timing_iters (python:int) – Number of averaging timing iterations used to select kernels

  • @@ -550,6 +549,9 @@

    Functions¶

    -class torch_tensorrt.dynamo.CompilationSettings(enabled_precisions: ~typing.Set[~torch_tensorrt._enums.dtype] = <factory>, debug: bool = False, workspace_size: int = 0, min_block_size: int = 5, torch_executed_ops: ~typing.Collection[~typing.Union[~typing.Callable[[...], ~typing.Any], str]] = <factory>, pass_through_build_failures: bool = False, max_aux_streams: ~typing.Optional[int] = None, version_compatible: bool = False, optimization_level: ~typing.Optional[int] = None, use_python_runtime: ~typing.Optional[bool] = False, truncate_double: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, device: ~torch_tensorrt._Device.Device = <factory>, require_full_compilation: bool = False, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, make_refittable: bool = False, engine_capability: ~torch_tensorrt._enums.EngineCapability = <factory>, num_avg_timing_iters: int = 1, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, dryrun: ~typing.Union[bool, str] = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init: bool = False, cache_built_engines: bool = False, reuse_cached_engines: bool = False, use_explicit_typing: bool = False, use_fp32_acc: bool = False, enable_weight_streaming: bool = False, enable_cross_compile_for_windows: bool = False)[source]¶
    +class torch_tensorrt.dynamo.CompilationSettings(enabled_precisions: ~typing.Set[~torch_tensorrt._enums.dtype] = <factory>, debug: bool = False, workspace_size: int = 0, min_block_size: int = 5, torch_executed_ops: ~typing.Collection[~typing.Union[~typing.Callable[[...], ~typing.Any], str]] = <factory>, pass_through_build_failures: bool = False, max_aux_streams: ~typing.Optional[int] = None, version_compatible: bool = False, optimization_level: ~typing.Optional[int] = None, use_python_runtime: ~typing.Optional[bool] = False, truncate_double: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, device: ~torch_tensorrt._Device.Device = <factory>, require_full_compilation: bool = False, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, engine_capability: ~torch_tensorrt._enums.EngineCapability = <factory>, num_avg_timing_iters: int = 1, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, dryrun: ~typing.Union[bool, str] = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init: bool = False, cache_built_engines: bool = False, reuse_cached_engines: bool = False, use_explicit_typing: bool = False, use_fp32_acc: bool = False, refit_identical_engine_weights: bool = False, strip_engine_weights: bool = False, immutable_weights: bool = True, enable_weight_streaming: bool = False, enable_cross_compile_for_windows: bool = False)[source]¶

    Compilation settings for Torch-TensorRT Dynamo Paths

    Parameters
    @@ -685,7 +687,6 @@

    Classes - torch_tensorrt.fx — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.fx — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/py_api/logging.html b/docs/py_api/logging.html index 2381d7eb43..5a6a4ecaf0 100644 --- a/docs/py_api/logging.html +++ b/docs/py_api/logging.html @@ -10,7 +10,7 @@ - torch_tensorrt.logging — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.logging — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/py_api/ptq.html b/docs/py_api/ptq.html index 11993a323d..d06316ce0e 100644 --- a/docs/py_api/ptq.html +++ b/docs/py_api/ptq.html @@ -10,7 +10,7 @@ - torch_tensorrt.ts.ptq — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.ts.ptq — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/py_api/runtime.html b/docs/py_api/runtime.html index 8c3307726e..214d82823a 100644 --- a/docs/py_api/runtime.html +++ b/docs/py_api/runtime.html @@ -10,7 +10,7 @@ - torch_tensorrt.runtime — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.runtime — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -577,13 +577,13 @@

    Classes
    -class torch_tensorrt.runtime.PythonTorchTensorRTModule(serialized_engine: ~typing.Optional[bytes] = None, input_binding_names: ~typing.Optional[~typing.List[str]] = None, output_binding_names: ~typing.Optional[~typing.List[str]] = None, *, name: str = '', settings: ~torch_tensorrt.dynamo._settings.CompilationSettings = CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, make_refittable=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init=False, cache_built_engines=False, reuse_cached_engines=False, use_explicit_typing=False, use_fp32_acc=False, enable_weight_streaming=False, enable_cross_compile_for_windows=False), weight_name_map: ~typing.Any = None)[source]¶
    +class torch_tensorrt.runtime.PythonTorchTensorRTModule(serialized_engine: ~typing.Optional[bytes] = None, input_binding_names: ~typing.Optional[~typing.List[str]] = None, output_binding_names: ~typing.Optional[~typing.List[str]] = None, *, name: str = '', settings: ~torch_tensorrt.dynamo._settings.CompilationSettings = CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init=False, cache_built_engines=False, reuse_cached_engines=False, use_explicit_typing=False, use_fp32_acc=False, refit_identical_engine_weights=False, strip_engine_weights=False, immutable_weights=True, enable_weight_streaming=False, enable_cross_compile_for_windows=False), weight_name_map: ~typing.Optional[dict[typing.Any, typing.Any]] = None)[source]¶

    PythonTorchTensorRTModule is a PyTorch module which encompasses an arbitrary TensorRT Engine.

    This module is backed by the Torch-TensorRT runtime and is only compatible with FX / Dynamo / Python deployments. This module cannot be serialized to torchscript via torch.jit.trace for C++ deployment.

    -__init__(serialized_engine: ~typing.Optional[bytes] = None, input_binding_names: ~typing.Optional[~typing.List[str]] = None, output_binding_names: ~typing.Optional[~typing.List[str]] = None, *, name: str = '', settings: ~torch_tensorrt.dynamo._settings.CompilationSettings = CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, make_refittable=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init=False, cache_built_engines=False, reuse_cached_engines=False, use_explicit_typing=False, use_fp32_acc=False, enable_weight_streaming=False, enable_cross_compile_for_windows=False), weight_name_map: ~typing.Any = None)[source]¶
    +__init__(serialized_engine: ~typing.Optional[bytes] = None, input_binding_names: ~typing.Optional[~typing.List[str]] = None, output_binding_names: ~typing.Optional[~typing.List[str]] = None, *, name: str = '', settings: ~torch_tensorrt.dynamo._settings.CompilationSettings = CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/torch_tensorrt_engine_cache/timing_cache.bin', lazy_engine_init=False, cache_built_engines=False, reuse_cached_engines=False, use_explicit_typing=False, use_fp32_acc=False, refit_identical_engine_weights=False, strip_engine_weights=False, immutable_weights=True, enable_weight_streaming=False, enable_cross_compile_for_windows=False), weight_name_map: ~typing.Optional[dict[typing.Any, typing.Any]] = None)[source]¶

    Takes a name, target device, serialized TensorRT engine, and binding names / order and constructs a PyTorch torch.nn.Module around it. Uses TensorRT Python APIs to run the engine

    @@ -598,6 +598,7 @@

    Classes

    diff --git a/docs/py_api/torch_tensorrt.html b/docs/py_api/torch_tensorrt.html index d3511080b1..c741d6627d 100644 --- a/docs/py_api/torch_tensorrt.html +++ b/docs/py_api/torch_tensorrt.html @@ -10,7 +10,7 @@ - torch_tensorrt — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -601,7 +601,7 @@

    Functions¶

    -class torch_tensorrt.MutableTorchTensorRTModule(pytorch_model: Module, *, device: Optional[Union[Device, device, str]] = None, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, enabled_precisions: Set[Union[dtype, dtype]] = {dtype.f32}, engine_capability: EngineCapability = EngineCapability.STANDARD, make_refittable: bool = False, debug: bool = False, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 5, torch_executed_ops: Optional[Collection[Union[Callable[[...], Any], str]]] = None, torch_executed_modules: Optional[List[str]] = None, pass_through_build_failures: bool = False, max_aux_streams: Optional[int] = None, version_compatible: bool = False, optimization_level: Optional[int] = None, use_python_runtime: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, dryrun: bool = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', **kwargs: Any)[source]¶
    +class torch_tensorrt.MutableTorchTensorRTModule(pytorch_model: Module, *, device: Optional[Union[Device, device, str]] = None, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, enabled_precisions: Set[Union[dtype, dtype]] = {dtype.f32}, engine_capability: EngineCapability = EngineCapability.STANDARD, immutable_weights: bool = True, debug: bool = False, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 5, torch_executed_ops: Optional[Collection[Union[Callable[[...], Any], str]]] = None, torch_executed_modules: Optional[List[str]] = None, pass_through_build_failures: bool = False, max_aux_streams: Optional[int] = None, version_compatible: bool = False, optimization_level: Optional[int] = None, use_python_runtime: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, dryrun: bool = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', **kwargs: Any)[source]¶

    Initialize a MutableTorchTensorRTModule to seamlessly manipulate it like a regular PyTorch module. All TensorRT compilation and refitting processes are handled automatically as you work with the module. Any changes to its attributes or loading a different state_dict will trigger refitting or recompilation, @@ -611,7 +611,7 @@

    Classes
    -__init__(pytorch_model: Module, *, device: Optional[Union[Device, device, str]] = None, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, enabled_precisions: Set[Union[dtype, dtype]] = {dtype.f32}, engine_capability: EngineCapability = EngineCapability.STANDARD, make_refittable: bool = False, debug: bool = False, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 5, torch_executed_ops: Optional[Collection[Union[Callable[[...], Any], str]]] = None, torch_executed_modules: Optional[List[str]] = None, pass_through_build_failures: bool = False, max_aux_streams: Optional[int] = None, version_compatible: bool = False, optimization_level: Optional[int] = None, use_python_runtime: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, dryrun: bool = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', **kwargs: Any) None[source]¶
    +__init__(pytorch_model: Module, *, device: Optional[Union[Device, device, str]] = None, disable_tf32: bool = False, assume_dynamic_shape_support: bool = False, sparse_weights: bool = False, enabled_precisions: Set[Union[dtype, dtype]] = {dtype.f32}, engine_capability: EngineCapability = EngineCapability.STANDARD, immutable_weights: bool = True, debug: bool = False, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 5, torch_executed_ops: Optional[Collection[Union[Callable[[...], Any], str]]] = None, torch_executed_modules: Optional[List[str]] = None, pass_through_build_failures: bool = False, max_aux_streams: Optional[int] = None, version_compatible: bool = False, optimization_level: Optional[int] = None, use_python_runtime: bool = False, use_fast_partitioner: bool = True, enable_experimental_decompositions: bool = False, dryrun: bool = False, hardware_compatible: bool = False, timing_cache_path: str = '/tmp/torch_tensorrt_engine_cache/timing_cache.bin', **kwargs: Any) None[source]¶
    Parameters

    pytorch_model (torch.nn.module) – Source module that needs to be accelerated

    @@ -627,7 +627,7 @@

    ClassesEngineCapability) – Restrict kernel selection to safe gpu kernels or safe dla kernels

  • num_avg_timing_iters (python:int) – Number of averaging timing iterations used to select kernels

  • diff --git a/docs/py_api/ts.html b/docs/py_api/ts.html index 0dfed3f6fd..4eaf9c81da 100644 --- a/docs/py_api/ts.html +++ b/docs/py_api/ts.html @@ -10,7 +10,7 @@ - torch_tensorrt.ts — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + torch_tensorrt.ts — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -691,7 +691,7 @@

    Functions
    -torch_tensorrt.ts.TensorRTCompileSpec(inputs: Optional[List[torch.Tensor | Input]] = None, input_signature: Optional[Any] = None, device: Optional[Union[device, Device]] = None, disable_tf32: bool = False, sparse_weights: bool = False, enabled_precisions: Optional[Set[Union[dtype, dtype]]] = None, refit: bool = False, debug: bool = False, capability: EngineCapability = EngineCapability.STANDARD, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_long_and_double: bool = False, calibrator: object = None, allow_shape_tensors: bool = False) <torch.ScriptClass object at 0x7f690a9985b0>[source]¶
    +torch_tensorrt.ts.TensorRTCompileSpec(inputs: Optional[List[torch.Tensor | Input]] = None, input_signature: Optional[Any] = None, device: Optional[Union[device, Device]] = None, disable_tf32: bool = False, sparse_weights: bool = False, enabled_precisions: Optional[Set[Union[dtype, dtype]]] = None, refit: bool = False, debug: bool = False, capability: EngineCapability = EngineCapability.STANDARD, num_avg_timing_iters: int = 1, workspace_size: int = 0, dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_long_and_double: bool = False, calibrator: object = None, allow_shape_tensors: bool = False) <torch.ScriptClass object at 0x7fe4a04e4030>[source]¶

    Utility to create a formatted spec dictionary for using the PyTorch TensorRT backend

    Keyword Arguments
    diff --git a/docs/search.html b/docs/search.html index 567ad1a4dd..71798dae99 100644 --- a/docs/search.html +++ b/docs/search.html @@ -9,7 +9,7 @@ - Search — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Search — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -272,7 +272,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/searchindex.js b/docs/searchindex.js index b15267481d..2dae9f230c 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["_cpp_api/classtorch__tensorrt_1_1DataType", "_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType", "_cpp_api/classtorch__tensorrt_1_1TensorFormat", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator", "_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502", "_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268", "_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e", "_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827", "_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b", "_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da", "_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59", "_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883", "_cpp_api/dir_cpp", "_cpp_api/dir_cpp_include", "_cpp_api/dir_cpp_include_torch_tensorrt", "_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558", "_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb", "_cpp_api/file_cpp_include_torch_tensorrt_logging.h", "_cpp_api/file_cpp_include_torch_tensorrt_macros.h", "_cpp_api/file_cpp_include_torch_tensorrt_ptq.h", "_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h", "_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3", "_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650", "_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a", "_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2", "_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8", "_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5", "_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc", "_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c", "_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178", "_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797", "_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9", "_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9", "_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528", "_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384", "_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1", "_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2", "_cpp_api/namespace_torch_tensorrt", "_cpp_api/namespace_torch_tensorrt__logging", "_cpp_api/namespace_torch_tensorrt__ptq", "_cpp_api/namespace_torch_tensorrt__torchscript", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h", "_cpp_api/structtorch__tensorrt_1_1Device", "_cpp_api/structtorch__tensorrt_1_1GraphInputs", "_cpp_api/structtorch__tensorrt_1_1Input", "_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec", "_cpp_api/torch_tensort_cpp", "_cpp_api/unabridged_orphan", "cli/torchtrtc", "contributors/conversion", "contributors/dynamo_converters", "contributors/lowering", "contributors/partitioning", "contributors/phases", "contributors/runtime", "contributors/system_overview", "contributors/ts_converters", "contributors/useful_links", "contributors/writing_dynamo_aten_lowering_passes", "dynamo/dynamo_export", "dynamo/torch_compile", "fx/getting_started_with_fx_path", "getting_started/installation", "getting_started/jetpack", "getting_started/quick_start", "index", "indices/supported_ops", "py_api/dynamo", "py_api/fx", "py_api/logging", "py_api/ptq", "py_api/runtime", "py_api/torch_tensorrt", "py_api/ts", "sg_execution_times", "src/pytorch-sphinx-theme/docs/changelog", "src/pytorch-sphinx-theme/docs/configuring", "src/pytorch-sphinx-theme/docs/demo/api", "src/pytorch-sphinx-theme/docs/demo/demo", "src/pytorch-sphinx-theme/docs/demo/lists_tables", "src/pytorch-sphinx-theme/docs/demo/long", "src/pytorch-sphinx-theme/docs/demo/structure", "src/pytorch-sphinx-theme/docs/index", "src/pytorch-sphinx-theme/docs/installing", "ts/creating_torchscript_module_in_python", "ts/getting_started_with_cpp_api", "ts/getting_started_with_python_api", "ts/ptq", "ts/torchscript_frontend_from_pytorch", "tutorials/_rendered_examples/dynamo/converter_overloading", "tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows", "tutorials/_rendered_examples/dynamo/custom_kernel_plugins", "tutorials/_rendered_examples/dynamo/engine_caching_bert_example", "tutorials/_rendered_examples/dynamo/engine_caching_example", "tutorials/_rendered_examples/dynamo/index", "tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example", "tutorials/_rendered_examples/dynamo/refit_engine_example", "tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage", "tutorials/_rendered_examples/dynamo/torch_compile_resnet_example", "tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion", "tutorials/_rendered_examples/dynamo/torch_compile_transformers_example", "tutorials/_rendered_examples/dynamo/torch_export_cudagraphs", "tutorials/_rendered_examples/dynamo/torch_export_gpt2", "tutorials/_rendered_examples/dynamo/torch_export_llama2", "tutorials/_rendered_examples/dynamo/vgg16_ptq", "tutorials/_rendered_examples/dynamo/weight_streaming_example", "tutorials/_rendered_examples/index", "tutorials/_rendered_examples/triton/index", "tutorials/notebooks", "tutorials/serving_torch_tensorrt_with_triton", "user_guide/dynamic_shapes", "user_guide/mixed_precision", "user_guide/runtime", "user_guide/saving_models", "user_guide/torch_tensorrt_explained", "user_guide/using_dla"], "filenames": ["_cpp_api/classtorch__tensorrt_1_1DataType.rst", "_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.rst", "_cpp_api/classtorch__tensorrt_1_1TensorFormat.rst", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.rst", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.rst", "_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.rst", "_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.rst", "_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.rst", "_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.rst", "_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.rst", "_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.rst", "_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.rst", "_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.rst", "_cpp_api/dir_cpp.rst", "_cpp_api/dir_cpp_include.rst", "_cpp_api/dir_cpp_include_torch_tensorrt.rst", "_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.rst", "_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.rst", "_cpp_api/file_cpp_include_torch_tensorrt_logging.h.rst", "_cpp_api/file_cpp_include_torch_tensorrt_macros.h.rst", "_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.rst", "_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.rst", "_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.rst", "_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.rst", "_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.rst", "_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.rst", "_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.rst", "_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.rst", "_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.rst", "_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.rst", "_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.rst", "_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.rst", "_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.rst", "_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.rst", "_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.rst", "_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.rst", "_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.rst", "_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.rst", "_cpp_api/namespace_torch_tensorrt.rst", "_cpp_api/namespace_torch_tensorrt__logging.rst", "_cpp_api/namespace_torch_tensorrt__ptq.rst", "_cpp_api/namespace_torch_tensorrt__torchscript.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.rst", "_cpp_api/structtorch__tensorrt_1_1Device.rst", "_cpp_api/structtorch__tensorrt_1_1GraphInputs.rst", "_cpp_api/structtorch__tensorrt_1_1Input.rst", "_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.rst", "_cpp_api/torch_tensort_cpp.rst", "_cpp_api/unabridged_orphan.rst", "cli/torchtrtc.rst", "contributors/conversion.rst", "contributors/dynamo_converters.rst", "contributors/lowering.rst", "contributors/partitioning.rst", "contributors/phases.rst", "contributors/runtime.rst", "contributors/system_overview.rst", "contributors/ts_converters.rst", "contributors/useful_links.rst", "contributors/writing_dynamo_aten_lowering_passes.rst", "dynamo/dynamo_export.rst", "dynamo/torch_compile.rst", "fx/getting_started_with_fx_path.rst", "getting_started/installation.rst", "getting_started/jetpack.rst", "getting_started/quick_start.rst", "index.rst", "indices/supported_ops.rst", "py_api/dynamo.rst", "py_api/fx.rst", "py_api/logging.rst", "py_api/ptq.rst", "py_api/runtime.rst", "py_api/torch_tensorrt.rst", "py_api/ts.rst", "sg_execution_times.rst", "src/pytorch-sphinx-theme/docs/changelog.rst", "src/pytorch-sphinx-theme/docs/configuring.rst", "src/pytorch-sphinx-theme/docs/demo/api.rst", "src/pytorch-sphinx-theme/docs/demo/demo.rst", "src/pytorch-sphinx-theme/docs/demo/lists_tables.rst", "src/pytorch-sphinx-theme/docs/demo/long.rst", "src/pytorch-sphinx-theme/docs/demo/structure.rst", "src/pytorch-sphinx-theme/docs/index.rst", "src/pytorch-sphinx-theme/docs/installing.rst", "ts/creating_torchscript_module_in_python.rst", "ts/getting_started_with_cpp_api.rst", "ts/getting_started_with_python_api.rst", "ts/ptq.rst", "ts/torchscript_frontend_from_pytorch.rst", "tutorials/_rendered_examples/dynamo/converter_overloading.rst", "tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows.rst", "tutorials/_rendered_examples/dynamo/custom_kernel_plugins.rst", "tutorials/_rendered_examples/dynamo/engine_caching_bert_example.rst", "tutorials/_rendered_examples/dynamo/engine_caching_example.rst", "tutorials/_rendered_examples/dynamo/index.rst", "tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.rst", "tutorials/_rendered_examples/dynamo/refit_engine_example.rst", "tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage.rst", "tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.rst", "tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.rst", "tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.rst", "tutorials/_rendered_examples/dynamo/torch_export_cudagraphs.rst", "tutorials/_rendered_examples/dynamo/torch_export_gpt2.rst", "tutorials/_rendered_examples/dynamo/torch_export_llama2.rst", "tutorials/_rendered_examples/dynamo/vgg16_ptq.rst", "tutorials/_rendered_examples/dynamo/weight_streaming_example.rst", "tutorials/_rendered_examples/index.rst", "tutorials/_rendered_examples/triton/index.rst", "tutorials/notebooks.rst", "tutorials/serving_torch_tensorrt_with_triton.rst", "user_guide/dynamic_shapes.rst", "user_guide/mixed_precision.rst", "user_guide/runtime.rst", "user_guide/saving_models.rst", "user_guide/torch_tensorrt_explained.rst", "user_guide/using_dla.rst"], "titles": ["Class DataType", "Class Device::DeviceType", "Class TensorFormat", "Template Class Int8CacheCalibrator", "Template Class Int8Calibrator", "Define STR", "Define TORCH_TENSORRT_PATCH_VERSION", "Define TORCH_TENSORRT_MAJOR_VERSION", "Define TORCH_TENSORRT_MINOR_VERSION", "Define TORCHTRT_API", "Define XSTR", "Define TORCHTRT_HIDDEN", "Define TORCH_TENSORRT_VERSION", "Directory cpp", "Directory include", "Directory torch_tensorrt", "Enum Level", "Enum EngineCapability", "File logging.h", "File macros.h", "File ptq.h", "File torch_tensorrt.h", "Function torch_tensorrt::logging::get_logging_prefix", "Function torch_tensorrt::logging::get_reportable_log_level", "Function torch_tensorrt::logging::get_is_colored_output_on", "Function torch_tensorrt::logging::set_reportable_log_level", "Function torch_tensorrt::logging::log", "Function torch_tensorrt::logging::set_is_colored_output_on", "Function torch_tensorrt::logging::set_logging_prefix", "Template Function torch_tensorrt::ptq::make_int8_cache_calibrator", "Template Function torch_tensorrt::ptq::make_int8_calibrator", "Function torch_tensorrt::torchscript::check_method_operator_support", "Function torch_tensorrt::torchscript::compile", "Function torch_tensorrt::torchscript::embed_engine_in_new_module", "Function torch_tensorrt::get_build_info", "Function torch_tensorrt::set_device", "Function torch_tensorrt::dump_build_info", "Function torch_tensorrt::torchscript::convert_method_to_trt_engine", "Namespace torch_tensorrt", "Namespace torch_tensorrt::logging", "Namespace torch_tensorrt::ptq", "Namespace torch_tensorrt::torchscript", "Program Listing for File logging.h", "Program Listing for File macros.h", "Program Listing for File ptq.h", "Program Listing for File torch_tensorrt.h", "Struct Device", "Struct GraphInputs", "Struct Input", "Struct CompileSpec", "Torch-TensorRT C++ API", "Full API", "torchtrtc", "Conversion Phase", "Writing Dynamo Converters", "Lowering Phase", "Partitioning Phase", "Compiler Phases", "Runtime Phase", "System Overview", "Writing TorchScript Converters", "Useful Links for Torch-TensorRT Development", "Writing Dynamo ATen Lowering Passes", "Compiling Exported Programs with Torch-TensorRT", "TensorRT Backend for torch.compile", "Torch-TensorRT (FX Frontend) User Guide", "Installation", "Overview", "Quick Start", "Torch-TensorRT", "Operators Supported", "torch_tensorrt.dynamo", "torch_tensorrt.fx", "torch_tensorrt.logging", "torch_tensorrt.ts.ptq", "torch_tensorrt.runtime", "torch_tensorrt", "torch_tensorrt.ts", "Computation times", "Changelog", "Configuration", "5. :mod:`test_py_module`", "3. Paragraph Level Markup", "4. Lists & Tables", "1. Long Sticky Nav", "1. Structural Elements", "<no title>", "Installation", "Creating a TorchScript Module", "Using Torch-TensorRT in C++", "Using Torch-TensorRT in Python", "Post Training Quantization (PTQ)", "Using Torch-TensorRT TorchScript Frontend Directly From PyTorch", "Overloading Torch-TensorRT Converters with Custom Converters", "Cross runtime compilation for windows example", "Using Custom Kernels within TensorRT Engines with Torch-TensorRT", "Engine Caching (BERT)", "Engine Caching", "Dependencies", "Mutable Torch TensorRT Module", "Refitting Torch-TensorRT Programs with New Weights", "Torch Compile Advanced Usage", "Compiling ResNet with dynamic shapes using the torch.compile backend", "Compiling Stable Diffusion model using the torch.compile backend", "Compiling BERT using the torch.compile backend", "Torch Export with Cudagraphs", "Compiling GPT2 using the dynamo backend", "Compiling Llama2 using the dynamo backend", "Deploy Quantized Models using Torch-TensorRT", "Weight Streaming", "Torch-TensorRT Tutorials", "Serving a Torch-TensorRT model with Triton", "Legacy notebooks", "Serving a Torch-TensorRT model with Triton", "Dynamic shapes with Torch-TensorRT", "Compile Mixed Precision models with Torch-TensorRT", "Deploying Torch-TensorRT Programs", "Saving models compiled with Torch-TensorRT", "Torch-TensorRT Explained", "DLA"], "terms": {"defin": [0, 1, 2, 3, 4, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 46, 47, 48, 49, 51, 52, 54, 65, 68, 75, 76, 80, 88, 89, 90, 91, 93, 95, 97, 101, 104, 105, 106, 107, 112], "file": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 46, 47, 48, 49, 52, 54, 56, 58, 59, 64, 65, 66, 67, 68, 71, 72, 74, 76, 77, 78, 80, 81, 83, 87, 89, 91, 94, 110, 111, 113, 114, 117], "torch_tensorrt": [0, 1, 2, 14, 16, 17, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 56, 62, 63, 64, 65, 68, 69, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 103, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119], "h": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 46, 47, 48, 49, 50, 51, 52, 55, 68, 76, 89, 91], "support": [0, 1, 2, 27, 31, 46, 48, 49, 52, 54, 56, 61, 63, 65, 67, 68, 69, 72, 75, 76, 77, 80, 81, 88, 89, 90, 93, 95, 100, 102, 104, 106, 107, 108, 109, 110, 111, 113, 115, 118, 119], "data": [0, 2, 3, 4, 29, 30, 44, 46, 48, 49, 52, 53, 56, 57, 59, 60, 64, 65, 70, 71, 72, 74, 76, 77, 82, 86, 90, 91, 95, 97, 108, 109, 112], "type": [0, 1, 2, 30, 49, 50, 52, 53, 56, 58, 60, 62, 63, 64, 65, 71, 72, 74, 75, 76, 77, 82, 89, 90, 91, 93, 94, 95, 97, 108, 109, 112, 115, 117], "can": [0, 1, 4, 29, 30, 37, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 71, 74, 75, 76, 77, 80, 82, 88, 89, 90, 91, 92, 93, 94, 95, 97, 99, 100, 101, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "us": [0, 1, 2, 3, 4, 29, 30, 32, 35, 37, 43, 44, 45, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 80, 81, 82, 83, 88, 91, 94, 97, 98, 99, 100, 109, 110, 111, 113, 115, 116, 117, 118, 119], "tensorrt": [0, 1, 3, 4, 29, 30, 31, 32, 33, 36, 37, 44, 45, 46, 48, 49, 52, 53, 54, 55, 56, 57, 59, 60, 62, 67, 68, 71, 72, 74, 75, 76, 77, 88, 91, 94, 97, 98, 101, 102, 103, 104, 105, 109], "engin": [0, 1, 17, 32, 33, 37, 45, 46, 48, 49, 52, 53, 56, 57, 59, 62, 63, 64, 69, 71, 72, 75, 76, 77, 80, 89, 90, 91, 92, 93, 98, 100, 102, 104, 109, 110, 114, 116, 118, 119], "thi": [0, 1, 2, 29, 30, 42, 43, 44, 45, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 71, 72, 75, 76, 77, 80, 81, 82, 84, 85, 88, 89, 91, 92, 93, 95, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "compat": [0, 1, 46, 55, 58, 64, 65, 71, 75, 76, 77, 118], "c10": [0, 1, 45, 46, 48, 49, 89, 91], "check": [0, 1, 31, 46, 52, 55, 60, 65, 67, 71, 75, 77, 89, 95, 99, 100, 110, 111, 113, 116], "trt": [0, 1, 3, 4, 46, 48, 53, 55, 58, 60, 62, 64, 65, 67, 68, 70, 71, 75, 76, 89, 93, 95, 104, 106, 107, 109, 114, 116, 117], "so": [0, 44, 52, 53, 54, 55, 58, 59, 60, 62, 64, 65, 66, 67, 72, 75, 76, 81, 82, 83, 89, 91, 93, 95, 97, 101, 102, 104, 106, 107, 114], "should": [0, 3, 4, 29, 45, 49, 52, 53, 54, 55, 56, 57, 59, 60, 63, 64, 65, 67, 71, 75, 76, 77, 80, 82, 85, 91, 93, 95, 96, 97, 100, 105, 110, 111, 113], "reason": [0, 65, 88, 93, 95, 97, 118], "you": [0, 1, 2, 29, 30, 46, 48, 49, 52, 53, 54, 55, 56, 58, 59, 60, 63, 65, 66, 67, 68, 71, 75, 76, 77, 80, 82, 83, 84, 88, 89, 90, 91, 92, 93, 95, 97, 98, 99, 100, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "need": [0, 1, 2, 25, 29, 43, 46, 53, 54, 55, 60, 65, 66, 67, 71, 72, 75, 76, 82, 89, 90, 91, 93, 95, 96, 97, 99, 100, 110, 111, 112, 113, 114, 116], "explictli": 0, "public": [0, 1, 2, 3, 4, 44, 45, 46, 47, 48, 49, 83, 91], "enum": [0, 1, 2, 42, 45, 46, 51, 71, 77, 91, 93], "valu": [0, 1, 2, 16, 17, 45, 46, 48, 53, 56, 58, 60, 63, 70, 71, 74, 76, 80, 89, 99, 101, 102, 104, 109, 112], "underli": [0, 1, 2, 46, 60], "In": [0, 1, 2, 46, 53, 54, 56, 57, 58, 59, 60, 64, 65, 66, 75, 76, 82, 83, 85, 90, 91, 93, 95, 99, 110, 111, 112, 113, 114, 115, 116, 117], "case": [0, 1, 2, 46, 49, 53, 54, 56, 58, 60, 62, 64, 65, 66, 67, 75, 76, 91, 93, 95, 99, 100, 114, 115, 116], "itself": [0, 1, 2, 46, 52, 55, 92, 93, 110, 111, 113], "interfac": [0, 1, 2, 46, 58, 59, 60, 64, 69, 91], "vs": [0, 1, 2, 46, 55, 66, 71, 76, 77, 92], "normal": [0, 1, 2, 46, 65, 82, 88, 89, 91, 93, 99, 100, 105, 108, 110, 111, 113, 119], "instatin": [0, 1, 2, 46], "ex": [0, 1, 2, 33, 46, 67, 77, 83, 85], "kfloat": [0, 45, 49], "enumer": [0, 1, 2, 16, 17, 46], "klong": [0, 45], "int64": [0, 76, 77, 109], "kdoubl": [0, 45], "fp64": [0, 76], "fp32": [0, 48, 49, 52, 64, 65, 71, 76, 77, 91, 106, 107, 110, 111, 112, 113, 115], "khalf": [0, 45, 89], "fp16": [0, 48, 49, 52, 64, 65, 71, 72, 76, 89, 90, 99, 103, 106, 107, 109, 115, 119], "kchar": [0, 45], "int8": [0, 44, 48, 49, 52, 64, 71, 76, 77, 91, 108, 119], "kint": [0, 45], "int": [0, 3, 4, 35, 44, 45, 49, 52, 54, 56, 63, 64, 70, 71, 72, 76, 77, 80, 89, 95, 108, 109], "kbool": [0, 45], "bool": [0, 1, 2, 3, 4, 24, 27, 30, 31, 42, 44, 45, 46, 49, 55, 60, 64, 70, 71, 72, 74, 75, 76, 77, 80, 89, 91, 94, 95], "kunknown": [0, 2, 45], "sentinel": [0, 2, 76], "function": [0, 1, 2, 3, 4, 46, 48, 49, 51, 54, 55, 56, 58, 60, 62, 64, 65, 66, 88, 89, 91, 92, 93, 95, 100, 101, 104, 105, 106, 107, 110, 111, 112, 113, 114, 116, 118, 119], "default": [0, 1, 2, 3, 4, 16, 29, 30, 33, 43, 45, 46, 48, 49, 52, 54, 56, 62, 64, 65, 66, 71, 72, 75, 76, 77, 80, 81, 82, 89, 90, 91, 92, 93, 94, 95, 97, 108, 114, 116, 117, 118], "construct": [0, 1, 2, 3, 4, 46, 48, 49, 53, 54, 55, 57, 59, 60, 65, 74, 75, 76, 82, 83, 89, 91, 93, 95, 97, 114], "new": [0, 1, 2, 3, 4, 32, 33, 46, 48, 49, 56, 58, 59, 60, 62, 64, 65, 68, 69, 71, 77, 82, 89, 97, 98, 99, 102, 104, 105, 110, 111, 113, 116], "object": [0, 1, 2, 3, 4, 46, 48, 49, 52, 58, 60, 62, 63, 64, 71, 75, 76, 77, 91, 92, 93, 114, 117], "inlin": [0, 1, 2, 3, 4, 29, 30, 44, 46, 48, 55, 83, 86, 89], "constexpr": [0, 1, 2, 45, 46, 95], "t": [0, 1, 2, 45, 46, 55, 60, 65, 66, 70, 76, 80, 82, 83, 88, 89, 91, 93, 95, 108, 110, 111, 113, 114], "constructor": [0, 2, 46, 48, 49, 58, 88], "from": [0, 1, 2, 3, 4, 29, 30, 44, 46, 48, 49, 52, 53, 55, 56, 57, 58, 59, 60, 63, 64, 65, 67, 69, 71, 72, 75, 76, 77, 78, 80, 81, 82, 83, 88, 89, 91, 93, 94, 95, 96, 97, 99, 100, 103, 104, 106, 107, 108, 109, 110, 111, 112, 113, 116, 117, 118], "torchtrt_api": [0, 2, 19, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 48, 49, 50], "scalartyp": [0, 45, 70], "torch": [0, 1, 2, 4, 20, 21, 29, 30, 31, 32, 33, 36, 37, 44, 45, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 67, 71, 72, 74, 75, 76, 77, 78, 88, 91, 94, 96, 97, 98, 109, 119], "paramet": [0, 1, 2, 3, 4, 25, 26, 27, 29, 30, 31, 32, 33, 35, 37, 46, 48, 49, 53, 54, 55, 60, 64, 65, 71, 72, 74, 75, 76, 77, 86, 88, 89, 100, 106, 107], "oper": [0, 1, 2, 3, 4, 31, 44, 45, 46, 49, 52, 53, 55, 56, 57, 58, 59, 60, 62, 63, 65, 69, 71, 76, 77, 90, 91, 93, 100, 102, 104, 118, 119], "const": [0, 1, 2, 3, 4, 29, 30, 31, 32, 33, 35, 37, 44, 45, 46, 55, 60, 70, 89, 91], "get": [0, 1, 2, 3, 4, 23, 34, 44, 46, 55, 56, 60, 62, 63, 65, 67, 75, 76, 89, 91, 93, 97, 106, 107, 109, 110, 111, 112, 113], "return": [0, 1, 2, 3, 4, 23, 24, 29, 30, 31, 32, 33, 34, 37, 42, 43, 44, 45, 46, 54, 55, 56, 57, 58, 59, 60, 62, 64, 65, 71, 72, 75, 76, 77, 88, 89, 90, 91, 93, 95, 97, 100, 101, 108, 109, 110, 111, 113, 114, 115], "explicit": [0, 1, 2, 3, 4, 45, 46, 55, 65, 72, 75, 82, 91, 118], "delet": [0, 1, 2, 45, 46, 55], "other": [0, 1, 2, 45, 46, 52, 53, 55, 58, 62, 64, 65, 66, 70, 71, 75, 76, 81, 82, 89, 90, 93, 116], "comparis": [0, 2], "true": [0, 1, 2, 4, 46, 49, 55, 56, 60, 62, 64, 65, 70, 71, 72, 75, 76, 77, 80, 83, 89, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 115, 116, 119], "fals": [0, 1, 2, 3, 4, 44, 45, 46, 49, 54, 62, 64, 65, 70, 71, 72, 75, 76, 77, 80, 81, 82, 83, 89, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 103, 104, 105, 106, 107, 108, 109, 116], "struct": [1, 21, 38, 41, 45, 54, 91], "onli": [1, 3, 4, 16, 29, 44, 46, 48, 52, 54, 55, 56, 59, 60, 64, 65, 67, 68, 71, 72, 75, 76, 82, 91, 93, 94, 95, 99, 100, 107, 109, 115, 116, 119], "applic": [1, 29, 46, 52, 55, 59, 64, 71, 75, 76, 89, 90, 92, 116, 119], "kcuda": [1, 46, 56, 89], "which": [1, 2, 29, 32, 37, 46, 49, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 71, 72, 74, 75, 76, 77, 80, 82, 83, 88, 89, 90, 91, 92, 93, 94, 95, 97, 101, 102, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118], "map": [1, 46, 53, 54, 55, 57, 59, 60, 65, 76, 89, 91, 92, 97, 101, 110, 111, 112, 113], "kgpu": [1, 45, 46], "To": [1, 46, 52, 54, 56, 64, 66, 71, 80, 88, 89, 90, 92, 95, 100, 106, 107, 110, 111, 113], "datatyp": [1, 21, 38, 45, 46, 48, 49, 50, 71, 76, 77, 90, 95, 110, 111, 113, 115], "target": [1, 33, 45, 46, 48, 49, 52, 54, 56, 58, 59, 64, 65, 66, 69, 71, 75, 76, 77, 90, 91, 92, 93, 95, 100, 118, 119], "gpu": [1, 32, 35, 37, 45, 46, 52, 64, 65, 71, 75, 76, 77, 89, 91, 92, 95, 106, 107, 109, 110, 111, 113, 116, 118, 119], "run": [1, 37, 46, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 64, 65, 66, 67, 68, 71, 72, 75, 76, 77, 82, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119], "kdla": [1, 45, 46, 119], "dla": [1, 45, 46, 49, 52, 64, 69, 71, 76, 77], "intern": [1, 16, 46, 60, 63, 73, 75, 82, 89], "note": [1, 46, 48, 54, 60, 62, 65, 66, 67, 75, 76, 80, 82, 89, 95, 100, 110, 111, 113, 114, 119], "The": [1, 46, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 71, 75, 76, 77, 80, 83, 88, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 105, 106, 109, 110, 111, 112, 113, 114, 117, 118], "valid": [1, 46, 56, 60, 62, 71, 75, 76, 93], "kcpu": [1, 46], "comparison": [1, 46], "an": [2, 3, 4, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 64, 65, 66, 68, 71, 72, 74, 75, 76, 77, 80, 82, 83, 88, 89, 90, 91, 93, 95, 97, 100, 101, 105, 106, 107, 109, 110, 111, 112, 113, 114, 116, 117, 118], "memeori": 2, "layout": [2, 48, 70, 71, 76, 77], "store": [2, 4, 49, 52, 53, 58, 60, 64, 65, 71, 75, 76, 77, 88, 89, 95, 97, 100], "tensor": [2, 33, 44, 45, 48, 49, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 70, 71, 72, 75, 76, 77, 88, 89, 90, 91, 93, 95, 101, 109, 112], "kcontigu": [2, 45, 48], "contigu": [2, 48, 49, 52, 71, 76, 77], "nchw": [2, 71, 76, 77], "linear": [2, 56, 70, 76, 88, 95, 108, 115], "kchannelslast": [2, 45], "channel": [2, 76, 81], "last": [2, 55, 65, 76, 108], "nhwc": [2, 52], "memoryformat": [2, 45], "ptq": [3, 4, 15, 18, 19, 38, 50, 51, 52, 69, 71, 76, 77], "privat": [3, 4, 44, 45, 91], "algorithm": [3, 4, 29, 30, 44, 65, 74, 91, 107], "typenam": [3, 4, 29, 30, 44], "gener": [3, 4, 29, 52, 55, 58, 59, 60, 62, 64, 65, 66, 71, 72, 80, 82, 83, 86, 88, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 108, 109, 110, 116], "int8calibr": [3, 20, 30, 40, 44, 50], "implement": [3, 4, 55, 56, 58, 63, 65, 75, 81, 89, 91, 95, 97, 116], "specifi": [3, 4, 33, 52, 54, 60, 64, 65, 66, 71, 76, 77, 80, 82, 90, 92, 109, 110, 111, 113, 114, 115, 117, 118], "calibr": [3, 4, 29, 30, 44, 49, 52, 71, 74, 76, 77, 89, 91], "read": [3, 4, 29, 30, 44, 80, 82, 91], "nvinfer1": [3, 4, 29, 30, 44, 45, 49, 60, 91], "iint8calibr": [3, 4, 29, 30, 44, 45, 49, 71, 76, 77, 91], "iint8entropycalibrator2": [3, 4, 29, 30, 44, 91], "std": [3, 4, 22, 26, 28, 29, 30, 31, 33, 34, 37, 42, 44, 45, 47, 48, 49, 56, 89, 91, 110, 111, 113, 119], "string": [3, 4, 18, 20, 21, 22, 26, 28, 29, 30, 31, 33, 34, 37, 42, 44, 45, 49, 54, 56, 58, 60, 64, 71, 76, 80, 89, 91], "cache_file_path": [3, 4, 29, 30, 44], "8": [3, 52, 55, 63, 64, 66, 75, 76, 82, 83, 86, 89, 94, 95, 102, 105, 110, 111, 113, 114], "cach": [3, 4, 29, 30, 44, 52, 64, 65, 69, 71, 72, 74, 76, 89, 91, 98, 110, 116], "getbatchs": [3, 4, 44], "noexcept": [3, 4, 44, 91], "overrid": [3, 4, 29, 30, 44, 54, 65, 91], "batch": [3, 4, 44, 64, 65, 72, 75, 91, 97, 102, 104, 108, 109, 110, 111, 113, 114, 119], "size": [3, 4, 44, 48, 49, 52, 55, 56, 64, 65, 70, 71, 72, 76, 77, 80, 89, 91, 95, 97, 102, 104, 108, 112, 114], "next": [3, 4, 53, 54, 58, 63, 72, 76, 80, 82, 83, 91, 93, 101, 105, 108, 110, 111, 113], "alwai": [3, 4, 27, 52, 76, 82, 100, 109], "1": [3, 4, 33, 44, 45, 48, 49, 52, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 70, 71, 72, 74, 75, 76, 77, 79, 80, 82, 83, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 104, 105, 106, 107, 108, 109, 112, 114, 115, 117, 119], "due": [3, 4, 66, 81, 82, 108], "issu": [3, 4, 64, 71, 76, 89, 101, 104], "getbatch": [3, 4, 44], "void": [3, 4, 25, 26, 27, 28, 35, 36, 42, 44, 45], "bind": [3, 4, 33, 44, 75, 77, 82], "char": [3, 4, 44, 52, 89], "name": [3, 4, 31, 33, 37, 44, 54, 56, 58, 60, 65, 66, 67, 72, 74, 75, 76, 77, 82, 83, 88, 89, 92, 93, 95, 100, 105, 108, 110, 111, 113, 115], "nbbind": [3, 4, 44], "Not": 3, "arrai": [3, 4, 33, 53, 54, 76, 77, 93, 95, 109], "pointer": [3, 4, 91], "fed": [3, 4, 48], "buffer": [3, 4, 65, 95], "each": [3, 4, 49, 53, 55, 56, 58, 60, 64, 65, 66, 71, 72, 75, 80, 82, 89, 93, 100, 107, 116], "input": [3, 4, 21, 29, 33, 38, 44, 45, 47, 49, 50, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 68, 70, 71, 72, 73, 75, 76, 77, 83, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119], "number": [3, 4, 49, 52, 54, 55, 56, 60, 63, 64, 65, 71, 72, 76, 77, 80, 89, 90, 95, 100, 102, 104, 109, 112, 118], "readcalibrationcach": [3, 4, 44], "size_t": [3, 4, 44, 91], "length": [3, 4, 44, 65, 70, 83, 109], "how": [3, 4, 66, 67, 82, 84, 86, 88, 92, 93, 95, 97, 99, 101, 108, 109, 110, 111, 112, 113, 114, 116], "enabl": [3, 4, 24, 49, 52, 54, 56, 57, 59, 64, 65, 66, 71, 72, 74, 75, 76, 77, 80, 97, 99, 100, 102, 104, 105, 106, 107, 109, 115, 116], "use_cach": [3, 4, 30, 44, 74, 91, 106, 107, 109], "set": [3, 4, 16, 21, 25, 27, 29, 32, 35, 37, 45, 46, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 65, 66, 71, 72, 75, 76, 77, 80, 84, 87, 88, 89, 90, 91, 93, 95, 100, 106, 108, 109, 112, 114, 115, 116, 118, 119], "writecalibrationcach": [3, 4, 44], "write": [3, 4, 29, 30, 44, 65, 69, 82, 89, 91, 110, 111, 113], "provid": [3, 4, 49, 52, 54, 56, 58, 60, 62, 64, 65, 66, 68, 71, 72, 75, 76, 77, 82, 89, 90, 91, 92, 93, 97, 98, 100, 101, 105, 109, 110, 111, 113, 114, 116, 117, 118], "cast": [3, 4, 55, 64, 71, 106, 107, 115], "convienc": [3, 4, 49], "convert": [3, 4, 31, 32, 37, 52, 55, 56, 57, 59, 63, 64, 69, 71, 76, 77, 90, 92, 95, 98, 102, 104, 109, 110, 112, 116], "easili": [3, 4, 99], "assign": [3, 4, 81], "ptq_calibr": [3, 4, 45, 49, 91], "field": [3, 4, 63, 72, 76, 91], "compilespec": [3, 4, 21, 32, 37, 41, 45, 50, 56, 77, 89, 91, 119], "dataloaderuniqueptr": [4, 44], "libtorch": [4, 36, 60, 66, 68, 89, 91, 118], "dataload": [4, 29, 30, 44, 49, 74, 91, 108], "unique_ptr": [4, 30], "unqiue_ptr": 4, "A": [4, 29, 30, 32, 33, 47, 48, 54, 55, 56, 60, 65, 66, 71, 72, 76, 77, 83, 91, 103, 110, 111, 113], "uniqu": [4, 90], "what": [4, 54, 55, 65, 68, 76, 82, 88, 89, 90, 106, 107, 118], "make_data_load": [4, 91], "factori": [4, 29, 30, 64, 71, 91], "path": [4, 13, 14, 15, 29, 30, 52, 64, 65, 66, 67, 71, 74, 76, 88, 89, 91, 94, 97, 105, 108, 118], "find": [4, 65, 66, 67, 89, 95, 109], "whether": [4, 52, 54, 64, 65, 71, 72, 76, 81, 91, 102, 104, 116], "exist": [4, 31, 32, 37, 54, 63, 64, 65, 67, 71, 74, 76, 77, 91, 97, 112], "There": [4, 53, 54, 59, 60, 62, 63, 65, 66, 83, 88, 91, 100, 110, 111, 112, 113, 114, 116], "consum": [4, 53, 88], "macro": [5, 6, 7, 8, 9, 10, 11, 12, 15, 18, 20, 21, 42, 44, 45, 50, 51], "x": [5, 10, 33, 43, 55, 56, 66, 67, 68, 75, 77, 83, 88, 89, 93, 95, 97, 101, 105, 108, 109, 110, 111, 113, 114, 115, 117], "includ": [13, 15, 16, 34, 36, 42, 43, 44, 45, 51, 52, 54, 56, 57, 58, 59, 62, 64, 65, 66, 67, 68, 71, 72, 75, 76, 80, 82, 88, 89, 91, 95, 116], "parent": [14, 15, 18, 19, 20, 21], "cpp": [14, 15, 42, 43, 44, 45, 51, 55, 59, 66, 89, 91], "log": [15, 16, 19, 20, 38, 44, 50, 51, 55, 60, 64, 65, 69, 70, 71, 72, 76, 93, 102, 104, 115], "emum": [16, 17], "messag": [16, 25, 26, 52, 73], "sever": [16, 26, 73], "kinternal_error": [16, 42], "print": [16, 31, 44, 62, 64, 67, 71, 77, 82, 89, 92, 93, 94, 95, 96, 97, 99, 100, 102, 104, 106, 107, 108, 109, 110, 111, 113], "error": [16, 49, 52, 53, 55, 59, 64, 65, 71, 73, 76, 77, 82, 89, 114], "kerror": [16, 42], "all": [16, 42, 43, 44, 45, 49, 52, 54, 55, 56, 58, 62, 64, 65, 66, 67, 71, 73, 75, 76, 78, 82, 83, 88, 89, 90, 91, 93, 95, 106, 107, 110, 111, 112, 113, 115, 116, 118], "kwarn": [16, 42], "warn": [16, 44, 52, 60, 73, 75], "kinfo": [16, 42, 44], "info": [16, 32, 37, 45, 52, 60, 73, 75, 76, 115], "kdebug": [16, 42, 44], "debug": [16, 27, 45, 49, 52, 60, 62, 64, 71, 73, 75, 76, 77, 92, 94, 95, 96, 97, 99, 100, 101, 102, 104, 108, 115], "kgraph": [16, 42, 55], "everyth": [16, 64, 71, 76], "intermedi": [16, 49, 52, 54, 64, 71, 73, 76, 77, 88, 115, 118], "graph": [16, 31, 32, 37, 45, 49, 52, 53, 54, 56, 57, 59, 60, 62, 63, 64, 65, 71, 72, 73, 76, 77, 88, 89, 93, 95, 97, 99, 100, 102, 104, 105, 112, 114, 116], "lower": [16, 54, 63, 65, 69, 71, 72, 73, 76, 83, 95, 97, 102, 104, 109, 112, 118], "phase": [16, 60, 63, 89, 93, 100, 114, 118], "class": [17, 29, 30, 44, 45, 46, 51, 58, 60, 64, 65, 73, 77, 82, 83, 88, 89, 90, 91, 93, 95, 97, 101, 108, 112, 114, 115], "int8_t": [17, 45], "select": [17, 29, 30, 37, 49, 52, 58, 64, 65, 66, 70, 71, 76, 77, 81, 84, 90, 91, 95, 118], "capabl": [17, 45, 49, 52, 58, 71, 76, 77, 92, 93, 94], "kstandard": [17, 45, 49], "ksafeti": [17, 45], "kdla_standalon": [17, 45], "directori": [18, 19, 20, 21, 42, 43, 44, 45, 50, 66, 67, 71, 91, 97, 110, 111, 113], "program": [18, 19, 20, 21, 29, 51, 52, 57, 58, 59, 69, 71, 88, 97, 98, 106, 107, 110, 114], "list": [18, 19, 20, 21, 31, 49, 51, 53, 56, 58, 60, 62, 63, 65, 68, 70, 71, 72, 75, 76, 77, 86, 89, 90, 93, 95, 110, 111, 113], "level": [18, 23, 25, 26, 39, 42, 44, 50, 54, 55, 56, 59, 64, 65, 71, 76, 77, 86, 88, 93, 95, 110, 111, 113, 118], "get_is_colored_output_on": [18, 39, 42, 50], "get_logging_prefix": [18, 39, 42, 50], "get_reportable_log_level": [18, 39, 42, 50], "set_is_colored_output_on": [18, 39, 42, 50], "set_logging_prefix": [18, 39, 42, 50], "set_reportable_log_level": [18, 39, 42, 50], "torchscript": [19, 21, 38, 43, 45, 49, 50, 52, 56, 57, 58, 59, 63, 68, 71, 72, 74, 75, 76, 77, 90, 110, 111, 112, 113, 114, 119], "str": [19, 43, 44, 50, 54, 64, 65, 70, 71, 74, 75, 76, 77, 93, 94, 95, 97, 108], "torch_tensorrt_major_vers": [19, 43, 50], "torch_tensorrt_minor_vers": [19, 43, 50], "torch_tensorrt_patch_vers": [19, 43, 50], "torch_tensorrt_vers": [19, 43, 50], "torchtrt_hidden": [19, 43, 50], "xstr": [19, 43, 50], "nvinfer": [20, 44], "fstream": [20, 44], "iostream": [20, 21, 44, 45, 89], "iter": [20, 44, 49, 52, 53, 64, 71, 74, 76, 77, 96, 97, 108, 109], "memori": [20, 21, 44, 45, 55, 60, 71, 76, 77, 89, 90, 95, 97, 106, 107, 109], "sstream": [20, 44], "vector": [20, 21, 33, 44, 45, 47, 48, 49, 56, 58, 76, 89, 91, 119], "templat": [20, 40, 44, 45, 50, 80, 89], "int8cachecalibr": [20, 29, 40, 44, 50], "make_int8_cache_calibr": [20, 40, 44, 50, 91], "make_int8_calibr": [20, 29, 40, 44, 50, 91], "cuda_runtim": [21, 45], "custom_class": [21, 45], "devic": [21, 33, 35, 38, 45, 49, 50, 52, 58, 64, 70, 71, 72, 74, 75, 76, 77, 90, 91, 92, 95, 99, 103, 106, 107, 109, 112, 119], "graphinput": [21, 38, 45, 49, 50], "devicetyp": [21, 38, 45, 46, 50, 75, 76, 77, 91, 92, 95, 119], "tensorformat": [21, 38, 45, 48, 50, 76, 95], "enginecap": [21, 38, 45, 49, 50, 64, 71, 75, 76, 77, 92, 95], "dump_build_info": [21, 38, 45, 50], "get_build_info": [21, 38, 45, 50], "set_devic": [21, 38, 45, 50, 116], "check_method_operator_support": [21, 41, 45, 50], "compil": [21, 31, 37, 41, 45, 49, 50, 52, 54, 55, 56, 58, 60, 62, 65, 71, 72, 73, 75, 76, 77, 78, 80, 88, 90, 91, 92, 93, 95, 96, 98, 99, 108, 110, 111, 113, 116, 119], "convert_method_to_trt_engin": [21, 41, 45, 50, 76, 77, 89, 92], "embed_engine_in_new_modul": [21, 41, 45, 50, 77], "current": [23, 54, 56, 58, 60, 62, 63, 64, 65, 66, 67, 71, 72, 75, 76, 77, 80, 93, 95, 99, 106, 107, 108, 109, 116], "report": [23, 44, 75], "Is": [24, 76], "color": [24, 27, 82], "output": [24, 27, 33, 49, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 71, 73, 75, 76, 77, 80, 82, 83, 89, 93, 95, 97, 99, 100, 103, 109, 110, 111, 112, 113, 114, 115, 117], "lvl": [25, 26, 42], "inform": [25, 33, 34, 36, 48, 52, 53, 56, 58, 62, 64, 65, 66, 71, 72, 73, 76, 82, 88, 89, 91, 92, 95, 97, 109, 114], "ad": [25, 52, 53, 54, 56, 62, 65, 66, 95, 99], "abov": [25, 54, 56, 62, 65, 66, 73, 81, 82, 89, 95, 102, 104, 115, 117], "msg": [26, 42], "add": [26, 53, 54, 55, 56, 60, 63, 66, 70, 80, 82, 87, 89, 90, 93, 95], "global": [26, 52, 64, 71, 76, 89], "colored_output_on": [27, 42], "prefix": [27, 28, 42, 82], "help": [27, 52, 53, 60, 64, 65, 89, 94, 97, 108, 109, 112, 116], "when": [27, 44, 45, 46, 52, 53, 55, 56, 57, 58, 59, 60, 64, 65, 66, 71, 75, 76, 77, 80, 82, 84, 88, 89, 91, 93, 95, 97, 99, 100, 109, 112, 114, 116], "termin": [27, 52, 89], "If": [27, 33, 53, 54, 55, 56, 62, 63, 64, 65, 66, 68, 71, 72, 76, 80, 82, 89, 90, 91, 93, 95, 97, 100, 101, 105, 109, 110, 111, 113, 114, 115, 116, 118, 119], "build": [29, 30, 34, 49, 52, 53, 57, 59, 60, 63, 64, 65, 71, 75, 76, 81, 86, 89, 91, 93, 95, 102, 104, 109, 114], "post": [29, 30, 49, 52, 63, 69, 89, 97], "train": [29, 30, 49, 52, 69, 70, 89, 90, 97, 109], "quantiz": [29, 30, 52, 64, 69, 74, 76, 89, 98, 110], "creat": [29, 30, 33, 52, 53, 54, 56, 58, 60, 65, 69, 76, 77, 82, 89, 93, 95, 100, 109, 110, 111, 113], "previous": [29, 33, 89, 97, 100], "therefor": [29, 58, 65, 66, 75, 82, 89, 112, 116], "have": [29, 33, 44, 52, 53, 54, 55, 56, 60, 62, 63, 64, 65, 66, 67, 71, 72, 74, 75, 76, 77, 82, 88, 89, 90, 91, 95, 98, 102, 104, 108, 110, 111, 112, 113, 114], "requir": [29, 49, 52, 53, 54, 55, 63, 64, 65, 66, 67, 71, 76, 77, 80, 89, 91, 93, 94, 95, 98, 108, 109, 110, 111, 113, 114, 116], "dataset": [29, 74, 91, 112], "save": [29, 44, 52, 58, 64, 65, 68, 69, 71, 75, 76, 77, 89, 90, 94, 96, 97, 100, 103, 109, 110, 111, 112, 113, 116, 118], "later": [29, 71, 89, 100, 117, 118], "differ": [29, 55, 56, 59, 64, 65, 66, 76, 80, 88, 93, 95, 97, 99, 106, 109, 112, 116, 118], "scratch": [29, 97, 100], "depend": [29, 34, 53, 59, 64, 65, 67, 68, 71, 89, 90, 109, 111, 113, 116], "howev": [29, 66, 80, 81, 89, 93, 95, 97, 110, 111, 113, 114, 118], "network": [29, 30, 54, 60, 65, 76, 89, 91, 93, 95, 109, 110, 111, 112, 113, 119], "also": [29, 53, 54, 60, 62, 64, 66, 68, 80, 82, 83, 89, 90, 91, 97, 105, 108, 112], "recalibr": 29, "its": [29, 53, 56, 58, 60, 66, 75, 76, 82, 95, 108, 110, 111, 113, 116, 118], "structur": [29, 46, 49, 56, 59, 60, 64, 71, 76, 80, 82, 86, 88, 95, 110, 111, 113], "chang": [29, 55, 56, 59, 62, 64, 65, 75, 76, 77, 80, 91, 93, 97, 99, 100, 110, 111, 113, 116, 118], "respons": [29, 54, 58, 82, 116], "ensur": [29, 54, 55, 56, 62, 64, 66, 67, 71, 75, 106, 107], "By": [29, 30, 51, 56, 64, 66, 71, 80, 88, 97, 114], "entropi": [29, 30, 91], "v2": [29, 30, 82], "perform": [29, 30, 54, 62, 63, 71, 75, 76, 91, 95, 105, 109, 110, 111, 112, 113, 115, 116, 117, 118], "recommend": [29, 30, 65, 66, 76, 82, 89, 95, 110, 111, 113, 114], "feed": [29, 30, 89], "forward": [29, 30, 32, 33, 56, 58, 60, 64, 68, 71, 75, 76, 77, 88, 89, 90, 91, 92, 93, 95, 101, 108, 114, 115], "minmax": [29, 30, 91], "recomend": [29, 30], "nlp": [29, 30, 91], "task": [29, 30, 65, 91, 112], "call": [29, 30, 32, 49, 54, 55, 58, 60, 65, 71, 72, 75, 76, 77, 82, 88, 89, 92, 93, 95, 97, 99, 101, 104, 112, 114, 116, 118], "e": [29, 30, 52, 55, 60, 65, 66, 67, 68, 72, 76, 88, 89, 91, 95, 97, 100, 110, 111, 113], "g": [29, 30, 52, 55, 65, 66, 67, 72, 76, 82, 91, 95, 100, 110, 111, 113], "iint8minmaxcalibr": [29, 30, 91], "calibration_cache_fil": [29, 30, 91], "move": [30, 44, 55, 58, 77, 89, 91, 93, 106, 107], "calibration_dataload": [30, 91], "contain": [30, 31, 52, 53, 54, 55, 56, 60, 65, 66, 72, 75, 76, 82, 83, 88, 89, 91, 95, 97, 110, 111, 113, 116], "jit": [31, 32, 33, 37, 45, 47, 49, 52, 53, 55, 56, 57, 58, 59, 60, 61, 64, 68, 69, 71, 75, 76, 77, 88, 89, 90, 92, 95, 100, 110, 111, 113, 117, 118], "modul": [31, 32, 33, 37, 45, 49, 52, 56, 57, 58, 59, 60, 64, 65, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 81, 82, 83, 90, 91, 92, 93, 94, 95, 98, 100, 101, 108, 110, 112, 114, 115, 117, 119], "method_nam": [31, 37, 45, 52, 76, 77, 89], "see": [31, 55, 56, 58, 62, 64, 65, 66, 76, 77, 82, 88, 89, 90, 93, 95, 97, 100, 101], "fulli": [31, 52, 55, 64, 71, 75, 76, 77, 89, 91, 95, 119], "take": [31, 32, 33, 37, 53, 54, 57, 58, 59, 60, 62, 65, 71, 72, 75, 76, 77, 80, 82, 89, 91, 92, 93, 95, 101, 112, 114], "method": [31, 32, 33, 37, 48, 52, 55, 60, 66, 71, 76, 77, 82, 88, 89, 92, 97, 112], "pure": [31, 71, 76], "Will": 31, "out": [31, 44, 53, 55, 56, 57, 59, 60, 64, 66, 71, 76, 77, 82, 89, 95, 99, 108, 109, 110, 111, 113, 114], "unsupport": [31, 49, 54, 64, 76, 95, 118], "script": [31, 55, 56, 68, 76, 77, 88, 89, 90, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 116, 118], "nvidia": [32, 37, 42, 43, 44, 45, 52, 61, 64, 65, 66, 67, 71, 76, 77, 89, 101, 104, 110, 111, 113, 118, 119], "configur": [32, 37, 48, 62, 64, 66, 71, 75, 76, 77, 86, 89, 91, 95, 109, 110, 111, 113, 114], "equival": [32, 57, 59, 60, 71, 76, 77, 88, 89, 91, 93, 95, 102, 104], "specif": [32, 49, 54, 55, 57, 59, 62, 64, 71, 76, 77, 82, 93, 109, 118], "traget": 32, "input_binding_nam": [33, 45, 75, 77], "output_binding_nam": [33, 45, 75, 77], "emb": [33, 52, 63, 77, 83], "pre": [33, 55, 74, 77, 91, 97, 109, 116], "built": [33, 52, 58, 59, 64, 66, 71, 75, 76, 77, 97, 100], "serial": [33, 37, 52, 57, 59, 66, 71, 75, 76, 77, 89, 95, 97, 110, 111, 113, 118], "regist": [33, 54, 58, 60, 65, 75, 77, 93, 95], "execut": [33, 49, 52, 55, 57, 58, 59, 63, 64, 65, 66, 69, 71, 72, 75, 76, 77, 78, 88, 89, 91, 93, 95, 110, 111, 113], "must": [33, 48, 49, 52, 54, 55, 56, 60, 62, 65, 66, 71, 72, 76, 77, 82, 83, 89, 97, 114, 116, 118], "follow": [33, 52, 54, 56, 58, 62, 63, 64, 65, 66, 77, 80, 82, 83, 87, 88, 89, 91, 93, 95, 97, 98, 102, 106, 107, 110, 111, 112, 113, 114, 115, 116], "format": [33, 45, 48, 49, 52, 70, 71, 76, 77, 82, 83, 90, 95, 97, 108, 110, 111, 112, 113, 115, 117], "symbol": [33, 65, 66, 77, 82, 116], "index": [33, 61, 62, 66, 67, 69, 70, 77, 80, 86, 91, 95], "0": [33, 43, 44, 45, 49, 52, 54, 56, 59, 60, 62, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 78, 79, 81, 82, 89, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 119], "2": [33, 43, 54, 56, 60, 63, 64, 65, 66, 67, 69, 70, 71, 74, 75, 76, 77, 80, 82, 83, 86, 88, 89, 91, 93, 95, 96, 97, 99, 100, 101, 102, 104, 106, 107, 108, 109, 114, 117], "y": [33, 56, 77, 83, 93, 95, 101], "compilesepc": 33, "order": [33, 49, 54, 56, 60, 62, 65, 66, 71, 72, 75, 76, 77, 89, 90, 93, 97, 115], "pass": [33, 53, 54, 56, 57, 58, 59, 60, 63, 64, 65, 66, 69, 73, 74, 75, 76, 77, 88, 89, 91, 93, 95, 97, 100], "origin": [33, 65, 72, 76, 95, 97, 99, 118], "pytorch": [33, 48, 49, 52, 54, 55, 56, 57, 58, 59, 60, 63, 64, 66, 67, 68, 71, 74, 75, 76, 77, 88, 89, 90, 91, 93, 97, 99, 100, 108, 110, 111, 113, 114, 115, 116, 117, 118], "assum": [33, 75, 92, 95, 98, 110], "convent": 33, "below": [33, 56, 60, 62, 63, 64, 65, 66, 67, 82, 89, 90, 97, 103, 110, 111, 113], "librari": [34, 42, 43, 44, 45, 52, 54, 57, 58, 59, 60, 76, 89, 95, 98, 110], "version": [34, 36, 59, 62, 64, 65, 67, 71, 75, 76, 80, 83, 95, 110, 111, 112, 113, 117], "gpu_id": [35, 45, 46, 52, 75, 76, 77, 91, 92, 95, 119], "id": [35, 45, 52, 76, 80, 81, 85, 119], "cudasetdevic": 35, "dump": [36, 52, 95], "base": [36, 50, 58, 63, 64, 66, 71, 72, 76, 82, 88, 90, 91, 96, 100, 104, 112, 118], "stdout": [36, 75], "equivil": 37, "document": [42, 43, 44, 45, 50, 59, 80, 82, 83, 87, 88, 89, 91, 92, 110, 111, 113, 114, 116], "copyright": [42, 43, 44, 45, 83, 89], "c": [42, 43, 44, 45, 52, 59, 64, 67, 70, 71, 72, 75, 76, 83, 90, 95, 99, 110, 111, 113, 116, 119], "corpor": [42, 43, 44, 45], "right": [42, 43, 44, 45, 55, 59, 60, 82, 110, 111, 113], "reserv": [42, 43, 44, 45, 106, 107], "licens": [42, 43, 44, 45, 89], "under": [42, 43, 44, 45, 59, 65, 82, 93, 102, 118], "bsd": [42, 43, 44, 45], "style": [42, 43, 44, 45, 64, 68, 80, 82, 83], "found": [42, 43, 44, 45, 63, 66, 75, 82, 89, 91, 93, 95, 97, 116], "root": [42, 43, 44, 45, 66, 80, 91, 108], "sourc": [42, 43, 44, 45, 54, 59, 64, 65, 67, 71, 72, 73, 74, 75, 76, 77, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "tree": [42, 43, 44, 45, 80, 91, 108, 116], "pragma": [42, 43, 44, 45, 91], "onc": [42, 43, 44, 45, 53, 55, 56, 58, 64, 65, 66, 67, 76, 91, 95, 107, 109, 110, 111, 113, 116], "namespac": [42, 43, 44, 45, 51, 55, 69, 76, 91, 95], "ar": [42, 46, 49, 52, 53, 54, 55, 56, 58, 59, 60, 62, 63, 64, 65, 66, 71, 74, 75, 76, 77, 80, 82, 83, 84, 88, 89, 91, 92, 93, 95, 96, 97, 99, 100, 102, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "ones": [42, 56, 57, 59, 66, 82, 89, 93, 95, 118], "necessari": [42, 62, 64, 66, 75, 93, 100, 116], "user": [42, 48, 54, 56, 57, 58, 59, 62, 63, 64, 66, 67, 71, 82, 83, 89, 90, 91, 93, 97, 100, 109, 110, 111, 113, 114, 115, 116, 118], "dont": 42, "know": [42, 60, 80, 82, 93, 95], "we": [42, 44, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 72, 75, 80, 82, 88, 89, 91, 93, 95, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118], "want": [42, 56, 65, 66, 67, 68, 72, 88, 89, 91, 92, 93, 95, 100, 101, 110, 111, 113], "use_cmake_generated_export_head": 43, "torch_tensorrt_export": 43, "els": [43, 44, 48, 77, 82, 83, 94, 96, 97, 108], "__gnuc__": 43, "__attribute__": 43, "__visibility__": 43, "hidden": [43, 80], "endif": [43, 44, 45], "doe": [43, 44, 55, 56, 60, 62, 65, 66, 76, 82, 91, 95, 102, 104], "gaurd": 43, "someth": [43, 55, 82, 110, 111, 113], "6": [43, 55, 56, 58, 66, 70, 82, 86, 88, 89, 94, 95], "setup": [43, 67, 91, 110, 111, 113], "alias": 43, "eas": 43, "ts": [43, 52, 56, 68, 69, 76, 88, 89, 90, 92, 114, 117], "torchtrt": [43, 56, 94, 95, 108, 110, 111, 113], "ifndef": [44, 45], "doxygen_should_skip_thi": [44, 45], "get_batch_impl": 44, "element_typ": 44, "super": [44, 88, 93, 95, 101, 108, 114, 115], "batchtyp": 44, "dataloader_": 44, "cache_file_path_": 44, "use_cache_": 44, "auto": [44, 56, 60, 64, 68, 71, 82, 83, 89, 91, 106, 107, 109, 119], "batched_data_": 44, "push_back": [44, 56], "it_": 44, "begin": [44, 65, 66, 82, 101, 105], "hack": 44, "explict": 44, "work": [44, 55, 59, 60, 64, 65, 68, 71, 74, 75, 76, 82, 83, 91, 95, 100, 101, 105, 109, 110, 111, 113, 114], "here": [44, 53, 54, 56, 58, 63, 64, 65, 66, 68, 80, 82, 83, 88, 89, 91, 93, 95, 98, 105, 106, 107, 108, 110, 111, 113, 114, 116, 117], "explic": 44, "just": [44, 45, 55, 56, 64, 65, 69, 73, 75, 82, 84, 88, 89, 90, 92, 95, 97, 99, 112, 116], "still": [44, 56, 65, 66, 91, 93, 101, 118], "static_cast": 44, "option": [44, 48, 52, 56, 57, 59, 62, 63, 64, 65, 71, 75, 76, 77, 82, 86, 91, 93, 95, 96, 97, 101, 103, 115, 116, 117, 119], "batch_siz": [44, 91, 108], "end": [44, 52, 60, 62, 70, 71, 76, 77, 82, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "statu": [44, 83], "reset": [44, 96, 97, 101, 104, 116], "incas": 44, "go": [44, 55, 56, 65, 68, 88, 89, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 118], "again": [44, 58, 60, 82, 95, 99], "stringstream": 44, "ss": 44, "cache_": 44, "clear": 44, "ifstream": 44, "io": [44, 67, 110, 111, 113], "binari": [44, 91], "noskipw": 44, "good": [44, 60, 65, 82, 97], "copi": [44, 60, 65, 67, 70, 74, 83, 109], "istream_iter": 44, "back_insert": 44, "nullptr": [44, 45, 49], "ofstream": [44, 89], "cache_fil": [44, 74, 91], "reinterpret_cast": 44, "cache_size_": 44, "arrayref": [45, 48, 49], "friend": 45, "ostream": 45, "os": [45, 67, 97], "dtype": [45, 48, 49, 52, 63, 64, 65, 70, 71, 72, 75, 76, 77, 90, 95, 96, 102, 104, 105, 109, 110, 111, 112, 113, 114, 115], "device_typ": [45, 46, 76, 91, 92, 119], "int64_t": [45, 46, 48, 49, 91, 119], "core": [45, 52, 55, 56, 59, 64, 71, 76, 89, 93, 118, 119], "agx": 45, "platform": [45, 52, 59, 64, 66, 67, 71, 94, 119], "xavier": [45, 119], "dla_cor": [45, 46, 52, 76, 91, 92, 119], "allow_gpu_fallback": [45, 46, 71, 76, 77, 91, 92, 119], "customclasshold": [45, 48], "min_shap": [45, 48, 63, 65, 71, 76, 77, 90, 102, 105, 112, 114], "opt_shap": [45, 48, 63, 71, 76, 77, 90, 102, 105, 112, 114], "max_shap": [45, 48, 63, 65, 71, 76, 77, 90, 102, 105, 112, 114], "shape": [45, 47, 48, 49, 52, 56, 60, 63, 65, 69, 70, 71, 72, 75, 76, 77, 78, 90, 93, 95, 98, 105, 108, 109, 110, 111, 113, 116, 119], "doubl": [45, 48, 49, 52, 63, 71, 76, 77, 82, 116], "tensor_domain": [45, 48, 76], "input_is_dynam": 45, "ivalu": [45, 47, 49, 53, 58, 60, 89], "input_signatur": [45, 47, 49, 77, 90], "nest": [45, 49, 50, 82, 83], "full": [45, 49, 52, 60, 64, 71, 73, 76, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 116, 119], "spec": [45, 48, 49, 52, 73, 76, 77, 92, 97], "flatten": [45, 47, 70, 88, 89, 108], "fixed_s": [45, 49], "reflect": [45, 76], "builderconfig": 45, "graph_input": [45, 49], "enabled_precis": [45, 49, 63, 64, 71, 75, 76, 77, 89, 90, 91, 92, 95, 96, 97, 99, 100, 101, 102, 103, 104, 106, 107, 108, 109, 110, 111, 113, 115, 119], "disable_tf32": [45, 49, 64, 71, 75, 76, 77, 91, 95, 106, 107], "sparse_weight": [45, 49, 64, 65, 71, 75, 76, 77, 95], "refit": [45, 49, 64, 69, 71, 76, 77, 92, 95, 97, 98, 99, 110], "truncate_long_and_doubl": [45, 49, 63, 64, 77, 103], "allow_shape_tensor": [45, 49, 77], "uint64_t": [45, 49], "num_avg_timing_it": [45, 49, 64, 71, 75, 76, 77, 92, 95], "workspace_s": [45, 49, 52, 64, 71, 75, 76, 77, 95, 100, 102, 104], "dla_sram_s": [45, 49, 52, 64, 71, 75, 76, 77, 95], "1048576": [45, 49, 64, 71, 75, 76, 77, 95], "dla_local_dram_s": [45, 49, 52, 64, 71, 75, 76, 77, 95], "1073741824": [45, 49, 64, 71, 75, 76, 77, 95], "dla_global_dram_s": [45, 49, 52, 64, 71, 75, 76, 77, 95], "536870912": [45, 49, 64, 71, 75, 76, 77, 95], "require_full_compil": [45, 49, 64, 71, 75, 76, 77, 95], "min_block_s": [45, 49, 56, 63, 64, 71, 75, 76, 77, 93, 94, 95, 96, 97, 100, 101, 102, 104, 108], "3": [45, 49, 52, 55, 56, 58, 63, 64, 65, 67, 68, 70, 71, 74, 76, 77, 82, 83, 86, 88, 89, 91, 92, 94, 95, 96, 97, 99, 100, 102, 105, 106, 107, 108, 109, 112, 114, 117, 119], "torch_executed_op": [45, 49, 56, 63, 64, 71, 75, 76, 77, 95, 100, 101, 102, 104], "torch_executed_modul": [45, 49, 56, 71, 76, 77], "member": [46, 47, 48, 49], "hold": [46, 47, 48, 53, 60, 76, 91], "relat": [46, 82, 101, 104], "let": [46, 52, 55, 60, 65, 71, 76, 77, 80, 82, 110, 111, 112, 113, 118], "layer": [46, 49, 52, 53, 55, 60, 62, 64, 65, 71, 75, 76, 77, 89, 91, 93, 95, 106, 107, 108, 110, 111, 112, 113, 114, 115, 118, 119], "thei": [46, 52, 53, 54, 55, 58, 60, 64, 65, 71, 74, 75, 76, 80, 82, 90, 93, 97], "complex": [47, 49, 64, 66, 88, 90, 99, 107], "either": [47, 48, 52, 60, 62, 71, 76, 77, 80, 82, 88, 89, 90, 93, 94, 95, 97, 117], "one": [47, 54, 55, 60, 64, 65, 67, 71, 75, 76, 82, 88, 89, 90, 93, 95, 101, 104, 106, 107, 110, 111, 113], "rang": [48, 49, 52, 65, 76, 95, 96, 97, 102, 109, 112, 114], "optim": [48, 52, 63, 64, 65, 69, 71, 72, 74, 76, 88, 89, 90, 100, 102, 103, 104, 109, 112, 114, 118], "profil": [48, 72, 75, 115], "singl": [48, 52, 55, 56, 65, 76, 82, 88, 89, 91, 109, 116], "repres": [48, 49, 54, 60, 65, 68, 82], "signifi": [48, 55], "static": [48, 49, 53, 60, 63, 64, 71, 76, 77, 80, 89, 108, 114], "three": [48, 57, 59, 65, 72, 76, 82, 83, 110, 111, 112, 113], "min": [48, 52, 60, 70, 76, 97, 102, 114], "optimin": 48, "max": [48, 52, 60, 70, 76, 80, 97, 102, 108, 114], "allow": [48, 49, 52, 53, 54, 55, 56, 62, 64, 65, 66, 71, 76, 77, 80, 93, 95, 97, 100, 102, 104, 109, 116], "argument": [48, 52, 54, 55, 58, 60, 62, 64, 65, 71, 75, 76, 77, 82, 83, 89, 90, 93, 94, 95, 114], "expect": [48, 54, 55, 60, 76, 89, 90, 112], "tradit": [48, 71, 76, 77, 91], "convect": 48, "produc": [48, 53, 54, 58, 60, 63, 76, 82, 89, 112], "low": [48, 65, 93, 99], "high": [48, 55, 56, 80, 93, 95, 118], "weight": [48, 49, 52, 53, 64, 65, 69, 70, 71, 76, 77, 82, 89, 97, 98, 99, 103, 110, 112], "first": [48, 53, 54, 55, 65, 68, 82, 83, 89, 90, 91, 93, 95, 97, 99, 101, 110, 111, 113, 114, 117, 118], "calcul": [48, 53, 56, 89, 95, 109], "detect": [48, 58, 76], "float32": [48, 49, 52, 63, 64, 65, 71, 76, 77, 95, 99, 103, 106, 107, 109, 114, 115], "dynam": [48, 49, 63, 65, 69, 71, 72, 76, 77, 78, 93, 97, 98, 101, 103, 104, 107, 109, 110, 116], "opt": [48, 66, 75, 76, 105], "minimum": [48, 49, 52, 56, 63, 64, 71, 76, 77, 95, 109], "maximum": [48, 49, 52, 64, 65, 71, 72, 76, 77, 102, 104, 109, 110, 111, 113], "accept": [48, 52, 54, 58, 60, 66, 76, 89, 90, 101, 117], "exampl": [48, 56, 58, 59, 60, 65, 66, 71, 73, 75, 76, 77, 78, 80, 81, 83, 86, 88, 89, 90, 91, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117], "s": [48, 49, 53, 56, 58, 60, 63, 65, 66, 67, 69, 71, 72, 75, 76, 80, 82, 83, 88, 89, 91, 93, 95, 97, 109, 110, 111, 112, 113, 114, 116, 117], "cannot": [48, 55, 56, 65, 66, 71, 75, 76, 77, 81, 88, 94, 95], "through": [48, 53, 54, 55, 56, 58, 64, 65, 71, 73, 74, 82, 89, 90, 95, 99, 100, 112, 118], "altern": [48, 56, 62, 63, 76, 90, 93, 105, 112, 117], "refer": [48, 54, 57, 59, 65, 81, 86, 89, 91, 95, 108, 110, 111, 113, 114, 117], "given": [48, 49, 52, 54, 55, 65, 71, 72, 74, 76, 77, 88, 89, 90, 92, 93, 114], "kernel": [48, 49, 52, 60, 64, 65, 69, 71, 76, 77, 93, 98, 110, 115, 116], "ani": [48, 52, 53, 54, 60, 62, 64, 65, 70, 71, 74, 75, 76, 77, 80, 82, 89, 90, 91, 93, 95, 102, 114], "event": [48, 64, 96, 97], "place": [48, 55, 62, 65, 82, 83, 84, 91, 95, 108], "variabl": [48, 65, 75, 76], "dimens": [48, 55, 65, 72, 76, 102, 112, 114, 115], "domain": [48, 76, 83, 91], "convien": 49, "fix": [49, 65, 82, 95, 116, 119], "describ": [49, 56, 60, 76, 88, 92, 110, 111, 113], "entri": [49, 60, 97], "okai": 49, "ha": [49, 53, 54, 55, 56, 57, 59, 60, 62, 64, 65, 66, 67, 71, 72, 76, 82, 83, 88, 89, 91, 93, 94, 97, 100, 108, 112, 114, 118], "flaten": 49, "precis": [49, 52, 63, 64, 65, 69, 71, 76, 89, 90, 91, 102, 104, 106, 107, 109, 119], "dure": [49, 52, 54, 56, 60, 63, 64, 71, 74, 76, 91, 93, 106, 107, 109, 110, 111, 112, 113, 114, 116], "prevent": [49, 52, 54, 56], "tf32": [49, 52, 64, 71], "comput": [49, 64, 65, 66, 67, 71, 75, 82, 91, 94, 98, 110, 112], "inner": [49, 83, 112], "product": [49, 67, 76], "round": [49, 71, 76, 77, 95], "10": [49, 66, 67, 71, 72, 76, 77, 86, 88, 89, 91, 108, 109, 110, 111, 112, 113, 114, 115], "bit": [49, 60, 65, 66, 71, 76, 77, 89], "mantissa": [49, 71, 76, 77], "befor": [49, 54, 55, 56, 59, 60, 65, 71, 76, 77, 89, 110, 111, 113, 114], "multipli": [49, 71, 76, 77], "accumul": [49, 64, 71, 76, 77, 106, 107], "sum": [49, 65, 70, 71, 76, 77, 95, 108], "23": [49, 55, 71, 76, 77, 83], "behavior": [49, 56, 65, 71, 76, 77, 93, 106, 107, 114, 116, 117], "sparsiti": [49, 52, 65, 71, 76, 77], "conv": [49, 52, 89, 95], "fc": [49, 52, 55], "truncat": [49, 52, 63, 64, 71, 76, 77], "long": [49, 52, 53, 63, 76, 82, 83], "float": [49, 52, 63, 64, 70, 76, 88, 89, 90, 91, 92, 95, 96, 97, 100, 101, 104, 105, 115], "ishap": 49, "restrict": [49, 64, 71, 76, 77, 114], "cuda": [49, 58, 63, 65, 67, 68, 71, 72, 75, 76, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 103, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117], "safeti": [49, 52, 76], "averag": [49, 52, 64, 71, 76, 77, 95], "time": [49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 64, 65, 66, 68, 69, 71, 72, 75, 76, 77, 80, 82, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "workspac": [49, 52, 64, 65, 66, 71, 72, 76, 77, 95, 101, 102, 104], "fast": [49, 52, 64, 68, 71, 76, 77], "softwar": [49, 52, 64, 71, 76, 77, 82], "manag": [49, 52, 53, 55, 57, 59, 60, 64, 66, 67, 71, 73, 75, 76, 77, 89, 105, 116], "ram": [49, 52, 64, 71, 76, 77], "commun": [49, 52, 64, 71, 76, 77, 89], "within": [49, 52, 57, 59, 64, 69, 71, 75, 76, 77, 80, 82, 98, 106, 107, 110, 112], "host": [49, 52, 64, 66, 71, 76, 77, 95, 109, 110, 111, 113], "share": [49, 52, 64, 66, 71, 75, 76, 77, 97], "across": [49, 52, 55, 56, 64, 71, 76, 77, 80], "metadata": [49, 52, 54, 58, 60, 64, 71, 76, 77, 80, 100, 114, 115], "quantizatiom": 49, "instead": [49, 52, 53, 54, 55, 66, 71, 75, 76, 89, 93, 100, 108, 116], "potenti": [49, 71, 76, 85], "subgraph": [49, 52, 53, 54, 55, 60, 62, 89, 95, 97, 118], "aten": [49, 54, 55, 56, 60, 61, 64, 69, 70, 71, 76, 77, 89, 93, 101, 118], "thrown": [49, 71, 76, 77], "empti": [49, 71, 72, 76, 77, 83, 88, 95], "torch_tensorrtnamespac": 50, "loggingenum": 50, "levelnamespac": 50, "ptqtemplat": 50, "int8cachecalibratortempl": 50, "int8calibratornamespac": 50, "torchscriptstruct": 50, "compilespecstruct": 50, "deviceclass": 50, "devicetypestruct": 50, "graphinputsstruct": 50, "inputclass": 50, "datatypeclass": 50, "tensorformatenum": 50, "cppdirectori": 50, "includedirectori": 50, "torch_tensorrtfil": 50, "hfile": 50, "relationship": 50, "inherit": [50, 65, 71, 91], "subdirectori": 51, "definit": [51, 54, 60, 82], "cli": [52, 90], "It": [52, 54, 55, 56, 57, 59, 60, 65, 66, 69, 76, 80, 82, 94, 95, 109, 112, 116, 118], "serv": [52, 58, 65, 69, 71, 76], "easi": [52, 53, 55, 89, 91], "wai": [52, 64, 65, 66, 88, 89, 91, 93, 95, 97, 100, 112, 116, 117], "command": [52, 64, 66, 82, 83, 88, 89, 110, 111, 113], "line": [52, 66, 83, 89, 99], "quickli": [52, 89, 91, 110, 111, 113], "part": [52, 56, 59, 65, 75, 80, 81, 82, 95, 97], "deploy": [52, 75, 89, 90, 91, 110, 111, 112, 113, 116, 119], "pipelin": [52, 89, 99, 103, 119], "basic": [52, 56, 65, 83, 110, 111, 113], "featur": [52, 56, 65, 66, 89, 91, 92, 103, 108, 109, 112, 118], "though": [52, 59, 60, 88, 89, 118], "alreadi": [52, 53, 54, 55, 89, 91, 93, 95, 98, 110, 111, 113, 114], "two": [52, 55, 60, 62, 64, 65, 66, 76, 82, 83, 87, 88, 90, 91, 93, 97, 110, 111, 113, 114], "embed": [52, 54, 58, 70, 77, 82, 119], "plan": [52, 59, 63, 64, 71], "after": [52, 53, 55, 56, 62, 65, 71, 75, 76, 88, 89, 90, 101, 104, 110, 111, 113, 116], "link": [52, 53, 62, 69, 80, 81, 86, 89, 95, 116], "against": [52, 89, 93], "libtorchtrt": [52, 66, 89], "python": [52, 56, 59, 62, 64, 65, 67, 71, 72, 75, 76, 77, 82, 83, 89, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 116, 119], "import": [52, 55, 56, 63, 64, 65, 66, 67, 68, 75, 80, 82, 88, 89, 90, 92, 93, 95, 96, 97, 99, 110, 111, 113, 114, 116, 117], "packag": [52, 55, 64, 67, 89], "aspect": 52, "ident": [52, 62, 71, 76, 100], "standard": [52, 58, 66, 69, 71, 75, 76, 77, 82, 92, 93, 95, 99, 112, 116], "load": [52, 56, 58, 64, 65, 68, 71, 74, 75, 76, 77, 89, 90, 91, 92, 94, 95, 96, 97, 99, 100, 109, 110, 111, 112, 113, 116, 118], "like": [52, 53, 55, 58, 60, 65, 66, 68, 76, 81, 82, 88, 89, 90, 91, 93, 95, 97, 99, 100, 109, 110, 111, 113, 116], "would": [52, 54, 60, 64, 65, 66, 67, 75, 89, 90, 92, 93, 95, 110, 111, 113, 116], "input_file_path": [52, 119], "output_file_path": [52, 119], "input_spec": [52, 65, 72], "displai": [52, 62, 64, 73, 80, 116], "menu": [52, 80, 82], "verbios": 52, "v": [52, 67, 83, 108, 110, 111, 113], "verbos": [52, 64, 65, 71, 72, 83, 102, 104], "about": [52, 53, 58, 60, 66, 75, 80, 89, 110, 111, 113, 114], "process": [52, 56, 64, 76, 81, 82, 88, 91, 92, 100, 101, 105, 110, 111, 112, 113, 116], "onto": [52, 58], "consol": 52, "w": [52, 66, 76], "disabl": [52, 64, 66, 71, 75, 80, 81, 93, 97, 109, 116], "i": [52, 55, 60, 66, 68, 70, 82, 83, 88, 89, 91, 95, 96, 97, 106, 108], "debugg": [52, 71, 76, 77], "fallback": [52, 57, 59, 60, 100, 119], "model": [52, 56, 58, 63, 68, 71, 72, 73, 74, 76, 78, 88, 89, 90, 91, 92, 96, 97, 99, 114, 116, 118], "throw": [52, 55, 76, 89], "spars": [52, 54, 64, 70, 71], "p": [52, 70, 89, 110, 111, 113, 119], "repeat": [52, 70], "f32": [52, 71, 75, 76, 95], "half": [52, 64, 76, 82, 89, 90, 91, 92, 95, 101, 102, 106, 107, 109, 115, 119], "float16": [52, 76, 95, 99, 103, 115], "f16": [52, 76, 89, 110, 111, 113, 119], "i8": [52, 76], "d": [52, 67, 76, 82, 83, 89, 119], "multi": [52, 75], "dlacor": 52, "avail": [52, 54, 60, 62, 64, 65, 66, 67, 71, 75, 76, 80, 95, 109, 112, 118, 119], "dla_standalon": [52, 76], "file_path": [52, 76, 94, 117], "teo": 52, "op_nam": 52, "op": [52, 53, 54, 55, 56, 57, 59, 60, 62, 63, 64, 75, 76, 89, 93, 101, 116, 118], "partial": [52, 82], "tem": 52, "module_nam": 52, "mod": [52, 56, 65, 71, 86, 89, 91, 115], "mb": [52, 78], "num_op": 52, "block": [52, 53, 55, 56, 64, 71, 86, 118], "treat": 52, "num": 52, "avg": 52, "num_it": 52, "sram": 52, "local": [52, 55, 66, 67, 80, 89], "dram": 52, "atol": 52, "absolut": [52, 66], "toler": 52, "threshold": 52, "numer": [52, 65, 83], "deviat": 52, "1e": [52, 99, 100], "rtol": 52, "rel": [52, 56], "5": [52, 56, 58, 59, 64, 65, 66, 67, 71, 75, 76, 82, 83, 86, 88, 89, 93, 95, 99, 101, 109, 110, 111, 113], "skip": 52, "complianc": 52, "64bit": [52, 94], "32bit": 52, "custom": [52, 62, 63, 65, 66, 69, 98, 106, 107, 110], "dll": 52, "n": [52, 60, 62, 76, 89, 91, 93, 95, 96], "min_n": 52, "min_c": 52, "min_h": 52, "min_w": 52, "opt_n": 52, "opt_c": 52, "opt_h": 52, "opt_w": 52, "max_n": 52, "max_c": 52, "max_h": 52, "max_w": 52, "32": [52, 76, 88, 89, 90, 91, 106, 107, 108, 119], "flag": [52, 56, 57, 59, 64, 66, 71, 74, 76, 90, 105, 106, 107, 116, 117], "forc": [52, 63, 65, 71, 76, 77, 80], "posit": [52, 54, 65, 76, 80], "test": [52, 56, 59, 65, 66, 67, 71, 76, 82, 83, 91, 108, 110, 111, 112, 113], "ssd_trace": 52, "pt": [52, 65, 89, 106, 107, 110, 111, 113], "ssd_trt": 52, "300": [52, 92], "512": [52, 71, 76, 77, 108, 112], "1024": [52, 71, 76, 77, 106, 112], "simplifi": [53, 95], "form": [53, 75, 76, 82, 90, 110, 111, 113], "up": [53, 55, 56, 57, 58, 59, 62, 65, 66, 71, 76, 82, 88, 93, 95, 97, 100, 101, 104, 109, 112], "context": [53, 57, 58, 59, 64, 73, 75, 93, 105, 116], "inetworkdefinit": [53, 54], "record": [53, 88, 96, 97, 105, 116], "togeth": [53, 60, 89], "start": [53, 56, 65, 70, 74, 76, 83, 89, 92, 95, 96, 97, 112], "look": [53, 54, 55, 68, 71, 76, 88, 91, 92, 93, 97, 110, 111, 113, 114], "assembl": [53, 62, 89], "resourc": [53, 91, 95], "coupl": [53, 59, 65, 116], "state": [53, 54, 60, 62, 75, 89, 93, 99], "been": [53, 60, 64, 66, 67, 74, 83, 89, 94, 97, 100, 118], "evaluated_value_map": [53, 60], "stage": [53, 65], "arg": [53, 54, 62, 65, 71, 74, 75, 76, 86, 89, 93, 94, 95, 97, 108, 112], "itensor": [53, 54, 60, 65, 89, 93, 95], "value_tensor_map": [53, 60], "typic": [53, 60, 76, 110, 111, 113], "abl": [53, 55, 60, 62, 65, 91, 92, 95, 100], "system": [53, 60, 62, 64, 69, 71, 75, 76, 77, 93, 94, 95, 97, 100, 118], "registri": [53, 54, 89, 95], "enter": [53, 76], "recurs": 53, "resolv": [53, 55, 57, 59, 101, 104], "until": [53, 56, 59, 60, 66, 71, 76, 118], "final": [53, 56, 57, 59, 66, 93, 95, 101, 104, 112], "some": [53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 65, 66, 76, 81, 82, 89, 91, 93, 95, 97, 114, 118], "These": [53, 54, 56, 58, 62, 64, 66, 71, 74, 75, 76, 80, 82, 91, 93, 110, 111, 113, 118], "those": [53, 54, 62, 64, 82], "do": [53, 54, 55, 56, 60, 63, 65, 81, 83, 88, 89, 90, 91, 93, 95, 106, 107, 119], "theori": [53, 82], "kind": [53, 65], "common": [53, 55, 65, 72, 82, 93, 97], "prim": [53, 55, 56, 58, 70, 88, 89], "constant": [53, 54, 55, 56, 89, 95], "emit": 53, "listconstruct": [53, 56, 58, 89], "make": [53, 54, 65, 66, 67, 71, 76, 82, 84, 89, 90, 91, 95, 97, 110, 111, 112, 113, 119], "associ": [53, 60, 89, 97, 116], "where": [53, 54, 55, 60, 62, 64, 65, 71, 75, 76, 77, 83, 89, 91, 93, 100], "result": [53, 55, 56, 66, 68, 71, 73, 75, 76, 77, 80, 88, 90, 94, 95, 99, 100, 109, 110, 111, 113, 115, 118], "done": [53, 56, 59, 95, 100, 110, 111, 113, 117], "mai": [53, 54, 56, 58, 59, 65, 66, 71, 75, 76, 77, 82, 83, 88, 89, 90, 91, 93, 95, 100, 101, 104, 109, 110, 111, 113, 116], "For": [53, 56, 62, 63, 64, 65, 66, 68, 72, 76, 80, 82, 83, 88, 89, 91, 92, 93, 95, 99, 101, 108, 110, 111, 112, 113, 116, 117], "more": [53, 64, 65, 66, 67, 69, 71, 76, 80, 83, 88, 89, 90, 91, 92, 95, 97, 99, 102, 104, 110, 111, 113, 116], "writing_convert": [53, 89], "locat": [54, 62, 66, 91, 93, 95], "py": [54, 55, 59, 62, 65, 66, 67, 78, 80, 82, 87, 88, 89, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114], "convers": [54, 55, 56, 58, 63, 64, 65, 71, 76, 77, 89, 93, 95, 112, 114], "decror": 54, "dynamo_tensorrt_convert": [54, 93, 95], "signatur": [54, 77], "leaky_relu": [54, 70], "def": [54, 62, 65, 82, 88, 90, 93, 95, 96, 97, 101, 108, 109, 110, 111, 113, 114, 115], "leaky_relu_convert": 54, "ctx": [54, 60, 89, 93, 95, 109], "conversionctx": [54, 60, 89, 93], "tupl": [54, 58, 63, 65, 71, 72, 75, 76, 77, 90, 93, 95, 97, 100, 114, 115], "kwarg": [54, 65, 71, 74, 75, 76, 93, 95, 112], "dict": [54, 71, 75, 76, 77, 93, 95, 97], "union": [54, 60, 64, 71, 75, 76, 77, 89, 93], "sequenc": [54, 62, 65, 71, 72, 75, 76, 77, 82, 93, 95, 109, 112], "decor": [54, 62, 65, 93], "kei": [54, 82, 88, 97, 110, 111, 113, 114], "node": [54, 55, 56, 57, 59, 60, 62, 64, 65, 71, 72, 89, 93, 95, 108, 112, 114], "capability_valid": [54, 93], "lambda": [54, 60, 82, 89, 93, 110, 111, 113], "fx": [54, 62, 63, 71, 75, 76, 89, 90, 93, 95, 100, 117], "determin": [54, 55, 64, 65, 76, 93, 109, 114, 116], "properli": [54, 66], "handl": [54, 55, 56, 58, 64, 65, 75, 76, 95], "partition": [54, 71, 76, 95], "sure": [54, 66, 67, 89, 90, 110, 111, 113, 119], "prioriti": [54, 93], "develop": [54, 65, 66, 67, 69, 82, 83, 89, 93, 95], "bodi": [54, 82, 83], "nativ": [54, 59, 61, 89, 93, 95, 100], "numpi": [54, 76, 95, 96, 97, 99, 100, 109, 110, 111, 113], "frozen": 54, "attribut": [54, 55, 56, 58, 65, 76, 82, 89], "previou": [54, 80, 101], "correspond": [54, 60, 65, 66, 75, 76, 93, 97, 99, 108, 116], "edg": [54, 82], "well": [54, 63, 66, 69, 73, 75, 82, 89, 91, 93, 97, 105, 117], "being": [54, 65, 66, 71, 89, 93, 95, 100], "truth": 54, "http": [54, 61, 64, 66, 67, 80, 82, 88, 89, 91, 93, 95, 99, 101, 104, 108, 110, 111, 112, 113, 114, 116], "github": [54, 61, 64, 66, 67, 80, 89, 91, 101, 104, 108, 110, 111, 113, 116], "com": [54, 61, 64, 66, 67, 89, 91, 99, 101, 104, 108, 110, 111, 113, 116], "blob": [54, 61, 66, 80, 91, 97], "main": [54, 55, 56, 57, 58, 59, 60, 63, 65, 66, 80, 82, 84, 89, 93, 95, 106, 108], "src": [54, 58, 61, 70], "native_funct": [54, 61], "yaml": [54, 61], "sinc": [54, 55, 64, 65, 67, 75, 82, 88, 89, 91, 93, 96, 97, 100], "mani": [54, 56, 64, 65, 80, 82, 83, 93, 97, 100, 118], "composit": [54, 89], "raw": [54, 80, 93], "impl": [54, 93], "subpackag": 54, "chain": [54, 60], "primarili": [54, 59, 66, 89, 93], "manipul": [54, 62, 76], "net": [54, 60, 82, 83, 89, 95, 110, 111, 113], "addit": [54, 55, 64, 65, 75, 76, 89, 93, 95, 97, 100, 112, 114], "call_modul": 54, "call_funct": [54, 62, 65], "eg": [54, 110, 111, 113, 115], "aten_": 54, "_leaky_relu": 54, "opoverloadpacket": 54, "while": [54, 56, 66, 75, 91, 93, 99, 109, 110, 111, 112, 113, 116, 118], "opoverload": 54, "particular": [54, 64, 97], "collect": [54, 56, 64, 71, 76, 77, 89, 90, 108], "trtinterpret": [54, 65, 72], "along": [54, 76], "match": [54, 55, 93, 100], "special": [54, 56], "account": [54, 110, 111, 113], "illustr": [54, 65, 102, 106, 107, 112], "scale_grad_by_freq": [54, 70], "embedding_param_valid": 54, "establish": 54, "subset": [54, 64, 71, 76, 91, 112], "converter_util": [54, 95], "enforce_tensor_typ": 54, "dictionari": [54, 76, 77, 92, 101], "between": [54, 55, 56, 60, 66, 76, 82, 83, 91, 97, 99, 106, 109], "possibl": [54, 66, 82, 93, 95, 97, 110, 111, 112, 113], "prefer": [54, 64, 66, 89], "keyword": [54, 62, 71, 75, 76, 77, 93, 101, 104], "both": [54, 56, 64, 66, 69, 71, 72, 75, 76, 80, 82, 88, 91, 93, 95, 97, 110, 111, 113], "enforc": [54, 89], "situat": 54, "partit": [54, 55, 63, 64, 71, 76, 93, 118], "greater": [54, 71, 73, 76], "than": [54, 55, 64, 66, 71, 76, 81, 82, 93, 96, 97, 99, 109, 112, 116], "3d": [54, 65], "autocast": 54, "therebi": [54, 58, 95, 112], "limit": [54, 55, 73, 81, 91, 94, 97, 98, 109, 110, 118], "author": [54, 83], "conv_nod": 54, "7": [54, 56, 58, 59, 75, 76, 86, 89, 95, 101, 102, 104, 108, 114], "ignor": [54, 71, 75, 76, 95], "misc": [54, 95], "trttensor": 54, "np": [54, 93, 95, 96, 97, 99, 100, 109, 110, 111, 113], "ndarrai": [54, 95], "aten_ops_convolut": 54, "conversioncontext": [54, 93, 95], "side": [54, 55, 80, 89, 93], "effect": [54, 55, 64, 65, 71, 80, 89, 91, 93, 95, 112], "term": [54, 76, 82, 83, 91, 93, 95, 112], "getitem": 54, "categor": 54, "modif": [54, 62, 76], "op_evalu": 54, "capbility_valid": 54, "opcod": 54, "decompos": 54, "suboper": 54, "separ": [54, 56, 57, 59, 66], "Such": 54, "via": [54, 64, 65, 67, 69, 71, 75, 76, 77, 80, 86, 90, 91, 101, 102, 104, 106, 107, 112, 114, 116, 117, 118], "register_torch_trt_decomposit": 54, "addmm_replac": 54, "replac": [54, 56, 62, 66, 67, 74, 95, 108, 118], "input_": 54, "mat1": 54, "mat2": [54, 70], "beta": [54, 65, 70, 77], "alpha": [54, 65, 70, 83], "mul": [54, 56, 70, 93], "matmul": [54, 55, 64, 70, 71, 89, 106, 107, 114], "modifi": [54, 56, 62, 65, 83, 99, 114], "edit": [54, 66, 80], "torch_enabled_decomposit": 54, "torch_disabled_decomposit": 54, "disjoint": 54, "preced": [54, 82], "over": [54, 57, 59, 65, 82, 108, 109, 110, 111, 113, 118], "much": [54, 60, 80, 82, 91], "significantli": [54, 55, 80, 97], "easier": [54, 57, 59, 60, 65, 71, 75, 76, 89, 91, 95, 99], "tri": 54, "made": [55, 57, 59, 76, 82], "represent": [55, 60, 65, 88, 112, 118], "instanc": [55, 62, 64, 66, 71, 74, 75, 88, 89, 93, 112, 116], "idea": [55, 82, 93], "reduc": [55, 56, 57, 59, 65, 71, 76, 91, 95, 97, 112, 116], "actual": [55, 58, 60, 65, 88, 89, 95], "aim": [55, 118], "closer": 55, "scope": [55, 95, 101, 104], "csrc": [55, 61], "common_subexpression_elimin": 55, "subexpress": 55, "dead_code_elimin": 55, "exception_elimin": 55, "wa": [55, 58, 62, 64, 65, 71, 75, 76, 82, 89, 93, 94, 118], "1013": 55, "ne": [55, 70], "1012": 55, "24": [55, 67, 110, 111, 113], "lib": [55, 66, 67, 89], "python3": [55, 66, 89], "site": [55, 66, 82, 89], "nn": [55, 61, 65, 71, 72, 75, 76, 77, 88, 89, 90, 93, 95, 101, 108, 114, 115, 118], "batchnorm": 55, "248": 55, "11": [55, 66, 82, 86, 89, 110, 111, 113], "block0": 55, "raiseexcept": 55, "249": 55, "12": [55, 56, 67, 82, 86, 88, 89, 102, 110, 111, 113, 114], "block1": 55, "guard_elimin": 55, "whose": [55, 65, 102], "freeze_modul": 55, "propag": 55, "fuse_addmm_branch": 55, "variant": [55, 116], "caught": 55, "ret": 55, "622": 55, "self": [55, 58, 60, 70, 75, 76, 88, 89, 90, 93, 95, 97, 101, 108, 112, 114, 115, 119], "bia": [55, 70, 89, 108], "x9": 55, "3677": 55, "output0": [55, 110, 111, 113, 115], "add_": [55, 70, 89, 93], "fuse_linear": 55, "back": [55, 56, 58, 59, 75, 76, 82, 88, 89, 95, 118], "fuse_flatten_linear": 55, "implicitli": [55, 76], "connect": [55, 71, 76, 77, 82, 99, 110, 111, 113, 119], "higher": [55, 64, 71, 76, 80, 82, 88, 109], "1d": 55, "lower_graph": 55, "access": [55, 60, 65, 80, 89, 92, 118], "rather": 55, "getattr": [55, 58, 88, 89], "trainabl": 55, "remain": [55, 76, 91, 118], "lower_tupl": 55, "lowersimpletupl": 55, "tupleconstruct": [55, 58], "tupleunpack": 55, "leav": [55, 62, 64, 71], "statement": [55, 82, 93], "loweralltupl": 55, "_all_": 55, "rais": [55, 65, 76, 94], "onnx": 55, "module_fallback": 55, "consist": [55, 65, 82, 95, 116, 118], "pair": [55, 60, 66, 82, 91, 112], "delimit": 55, "around": [55, 58, 60, 64, 66, 71, 75, 82, 85, 88, 95], "second": [55, 65, 82, 90, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "mark": [55, 56, 80, 97], "notatemoduleforfallback": 55, "marknodesforfallback": 55, "tell": [55, 56, 57, 58, 59, 60, 82, 118], "them": [55, 56, 58, 63, 64, 65, 66, 71, 75, 80, 89, 95, 97, 107, 112, 114, 118], "peephole_optimz": 55, "intent": [55, 82], "catch": [55, 76, 89], "small": [55, 95, 96, 110, 111, 113], "might": [55, 66, 80, 100, 114], "interest": [55, 82], "now": [55, 56, 59, 60, 65, 66, 76, 82, 89, 92, 93, 95, 97, 100, 109, 115, 116], "expand": [55, 70], "simpli": [55, 101, 112], "remove_contigu": 55, "remove_dropout": 55, "infer": [55, 64, 65, 71, 76, 77, 89, 91, 94, 100, 101, 109, 112, 114, 116, 117, 118], "remove_to": 55, "unpack_addmm": 55, "reus": [55, 65, 91, 97], "dedic": [55, 83], "unpack_log_softmax": 55, "softmax": [55, 65, 70, 108], "loop_unrol": 55, "suffici": [55, 66, 76], "short": [55, 64, 71, 82, 83, 100], "tile_to_repeat": 55, "instruct": [56, 57, 59, 65, 66, 89, 110, 111, 113], "criteria": [56, 57, 59, 64], "lack": [56, 57, 59, 65, 95, 109], "explicitli": [56, 57, 59, 66, 77, 90, 91, 92, 106, 107, 115], "On": 56, "segment": [56, 63, 95, 102, 104, 112], "verifi": [56, 71, 93, 95, 100], "Then": [56, 91, 92, 100, 110, 111, 113], "roughli": [56, 110, 111, 113], "analysi": 56, "everi": [56, 72, 75, 76, 89, 116], "complet": [56, 63, 71, 76, 88, 89], "mean": [56, 60, 64, 65, 70, 71, 72, 101, 109, 110, 111, 113, 118], "trace": [56, 65, 71, 75, 77, 88, 89, 110, 111, 113, 114, 117, 118], "tensorlist": [56, 60], "figur": [56, 83, 85], "our": [56, 59, 63, 88, 89, 110, 111, 113], "stitch": [56, 89], "altogeth": [56, 80], "brief": 56, "descript": [56, 83, 94, 108], "partitioninfo": 56, "api": [56, 59, 60, 62, 63, 64, 65, 75, 76, 77, 81, 89, 90, 91, 92, 95, 101, 102, 105, 109, 110, 111, 112, 113, 114, 116, 117], "maintain": [56, 58, 60, 76, 99, 118], "code": [56, 59, 62, 64, 65, 66, 81, 83, 88, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 114], "mymodel": [56, 63, 68, 90, 95, 114, 117], "ts_model": [56, 89], "trt_model": [56, 92, 95, 102, 106, 107, 108, 109, 110, 111, 113, 117], "off": [56, 58, 105], "consecut": [56, 63], "satisfi": [56, 62, 65], "forced_fallback_op": 56, "randn": [56, 63, 68, 71, 76, 77, 89, 92, 93, 97, 102, 105, 114, 115, 117], "224": [56, 63, 68, 71, 72, 76, 77, 89, 94, 97, 99, 100, 102, 105, 110, 111, 112, 113, 114, 117], "trt_ts_modul": [56, 90], "input_s": 56, "inputrang": 56, "cfg": [56, 89], "relu": [56, 70, 88, 89, 101, 108], "trt_mod": [56, 68, 89, 91, 119], "consid": [56, 77, 89, 95, 115], "segmentmodelwithdependencyawar": 56, "test_segment": 56, "20": [56, 67, 86, 100, 102, 104], "x_lgamma": 56, "lgamma": 56, "y_lgamma": 56, "div": [56, 70], "div_lgamma": 56, "27": [56, 89], "cat": [56, 66, 67, 70, 108, 109], "greedi": [56, 106, 107, 109], "strategi": [56, 76], "travers": [56, 57, 59, 64], "gather": 56, "same": [56, 58, 62, 64, 65, 66, 71, 76, 80, 82, 88, 89, 92, 94, 95, 97, 100, 102, 104, 110, 111, 113, 114, 116, 117], "encount": [56, 64, 66, 93, 101, 104], "4": [56, 58, 63, 64, 65, 66, 70, 76, 78, 80, 82, 83, 86, 89, 95, 101, 103, 104, 105, 108, 114], "suboptim": 56, "arithmet": 56, "split": [56, 65, 70], "own": [56, 60, 64, 66, 71, 82, 89, 97, 108, 110, 111, 113], "could": [56, 64, 65, 95, 102, 104, 116], "rewrit": [56, 62], "portion": [56, 82, 95, 103], "without": [56, 60, 68, 71, 80, 82, 89, 91, 95, 96, 97, 100, 116], "reorder": 56, "seri": 56, "cleanli": 56, "approach": [56, 97], "achiev": [56, 112], "hit": 56, "larger": [56, 71, 76, 80, 109, 112], "boundari": [56, 74, 76], "guarante": [56, 75], "trigger": [56, 64, 65, 76, 89, 97, 99, 100, 118], "appear": [56, 82], "adjac": [56, 71, 76, 82], "As": [56, 65, 66, 76, 89, 93, 95, 97, 100, 118], "clean": [56, 62, 82, 101, 104], "step": [56, 65, 67, 70, 76, 91, 95, 100, 112], "consolid": [56, 88], "further": [56, 64, 65, 116, 118], "merg": 56, "identifi": 56, "do_not_merg": 56, "combin": [56, 64, 65], "condit": [56, 82, 118], "loop": [56, 64, 65, 106, 107], "ir": [57, 59, 60, 63, 64, 68, 71, 76, 88, 89, 90, 98, 101, 102, 104, 105, 110, 114], "larg": [57, 59, 80, 82, 89, 91, 100, 109, 112], "opset": [57, 59, 93], "compon": [57, 59, 66, 67, 74, 88, 116, 118], "evalu": [57, 58, 59, 108], "deploi": [57, 59, 69, 89, 91, 98, 110, 111, 113], "instanti": [57, 58, 59, 60, 89, 103], "wrap": [57, 58, 59, 65, 82, 85, 89, 92, 101, 104], "extend": [57, 59, 60, 70, 89, 97, 112], "providi": [57, 59], "stand": [58, 82], "interpret": [58, 65, 82], "execute_engin": [58, 75, 89], "stack": [58, 70, 91, 108, 118], "machin": [58, 66, 91, 94, 110, 111, 113], "pop": 58, "push": 58, "element": [58, 65, 82, 83, 86], "realiz": 58, "abstract": [58, 60, 83, 93], "__torch__": [58, 88, 89], "portabl": [58, 66, 77], "serializ": [58, 64, 88, 118], "instnanti": 58, "whatev": [58, 65, 95], "self_1": [58, 89], "torchvis": [58, 91, 92, 94, 97, 99, 100, 102, 105, 108, 110, 111, 113], "resnet": [58, 69, 78, 94, 98, 99, 110, 111, 112, 113], "___torch_mangle_4847": 58, "resnet_trt": 58, "input_0": [58, 89], "__torch___torchvision_models_resnet____torch_mangle_4847_resnet_trt_engin": 58, "listunpack": [58, 89], "multipl": [58, 66, 71, 75, 76, 82, 83, 91, 109, 110, 111, 113, 116], "repack": 58, "ssd": 58, "ssd300_trt": 58, "__torch___pytorch_detection_ssd_src_model_ssd300_trt_engin": 58, "holder": [58, 84], "torchbind": 58, "pickler": 58, "seril": 58, "zip": [58, 66, 99, 100, 110], "depickl": 58, "encod": [58, 112], "sm": 58, "correct": [58, 66, 80, 98, 99, 100, 108, 110, 111, 113], "bazel": [59, 66, 67], "linux": [59, 64, 67, 71, 89, 94], "x86_64": [59, 66], "aarch64": 59, "gcc": [59, 89], "untest": 59, "try": [59, 76, 82, 83, 89, 92, 95, 97, 110, 111, 113, 118], "older": 59, "repositori": [59, 66, 80, 87, 110, 111, 113], "notebook": [59, 69, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "doc": [59, 61, 66, 67, 80, 81, 82, 87, 93, 95, 114], "docsrc": 59, "third_parti": [59, 66], "toolchain": [59, 66, 67], "unstabl": 59, "subject": [59, 62, 118], "matur": 59, "most": [59, 65, 66, 72, 95, 100, 110, 111, 113, 116, 118], "hood": [59, 102, 118], "major": [59, 65, 76], "top": [59, 80, 84], "coordin": [59, 76], "ingest": 59, "flow": [60, 65, 82, 88, 112], "ilay": 60, "analogu": 60, "goal": [60, 64, 97], "registernodeconversionpattern": [60, 89], "helper": [60, 93], "pattern": [60, 76, 89, 109], "schema": [60, 89, 93, 95], "caus": [60, 64, 80, 101, 102, 104, 109, 116], "acthardtanh": 60, "torchtrt_unus": 60, "hardtanh": [60, 70], "scalar": [60, 70], "min_val": [60, 70], "max_val": [60, 70], "unwraptodoubl": 60, "new_lay": 60, "addactiv": 60, "activationtyp": [60, 65], "kclip": 60, "torchtrt_check": 60, "unabl": [60, 89, 95], "setalpha": 60, "setbeta": 60, "setnam": [60, 89], "util": [60, 62, 74, 77, 89, 91, 101, 104, 106, 107, 108, 109, 110, 111, 112, 113, 118], "node_info": [60, 89], "c_str": [60, 89], "out_tensor": [60, 89], "associatevalueandtensor": [60, 89], "getoutput": [60, 89], "log_debug": 60, "getdimens": [60, 89], "accord": [60, 64, 77, 94], "unwrap": 60, "tool": [60, 64, 65, 66, 89, 93, 97, 112], "don": [60, 65, 80, 82, 83, 91, 93, 108, 110, 111, 113, 114], "annot": [60, 89], "your": [60, 63, 64, 66, 67, 68, 75, 80, 82, 83, 87, 88, 89, 90, 92, 97, 114, 116], "Its": [60, 82], "track": [60, 91], "sort": [60, 70, 92], "live": [60, 82], "directli": [60, 62, 63, 66, 69, 74, 76, 91, 93, 95, 101, 117], "associatevalueandivalu": 60, "inspect": [60, 88, 89], "dataflow": [60, 89], "mechan": [60, 64, 65, 95, 100, 112], "safe": [60, 64, 71, 75, 76, 77], "unsur": 60, "deep": [60, 64, 69, 80, 91, 95, 119], "straight": 60, "chanc": 60, "none": [60, 64, 65, 70, 71, 72, 74, 75, 76, 77, 80, 82, 93, 95, 97, 101, 108, 109], "wrapper": [60, 65, 117], "similar": [60, 63, 64, 65, 66, 89, 92, 95, 106, 107], "tocustomclass": 60, "tensorcontain": 60, "istensor": 60, "iscustomclass": 60, "lot": [60, 63], "singular": 60, "becaus": [60, 65, 66, 72, 88, 89, 93, 95, 96, 97, 109, 115], "alloc": 60, "freed": 60, "destructor": 60, "destroi": [60, 83], "realli": 60, "think": [60, 82], "becom": [60, 66, 99], "benefit": [60, 89, 97, 109], "deal": [60, 97], "quit": [60, 66, 89, 112], "effici": 60, "batch_norm": [60, 70], "fusion": [60, 62, 65], "deeplearn": [61, 65, 67], "sdk": [61, 67, 110, 111, 113, 118], "matrix": 61, "html": [61, 66, 67, 82, 88, 91, 93, 95, 114], "c_api": 61, "python_api": 61, "org": [61, 66, 80, 82, 88, 89, 91, 93, 95, 114, 116], "stabl": [61, 67, 69, 77, 78, 80, 98, 110, 114], "master": [61, 66, 91, 116], "overview": [61, 69, 101, 105], "md": 61, "appli": [62, 63, 91, 100], "desir": [62, 71, 83, 91, 97], "coalesc": 62, "insert": [62, 64, 71, 89, 91, 93, 97, 100], "graphmodul": [62, 63, 71, 72, 76, 89, 90, 95, 100, 117, 118], "caller": 62, "invok": [62, 64, 65, 88, 89, 116], "lint": 62, "recompil": [62, 71, 76, 93, 97, 100, 104, 114, 118], "repair": 62, "disallow": 62, "repair_input_as_output": 62, "gm": [62, 71], "sample_input": [62, 65, 101], "scenario": [62, 64, 99, 109], "clone": [62, 66, 70, 95], "modified_graph": 62, "extract": [62, 89, 112], "placehold": [62, 93], "isinst": [62, 65, 95, 108], "issubclass": 62, "direct": [62, 86, 100, 116], "len": [62, 70, 95], "direct_output": 62, "inserting_aft": 62, "cloned_placehold": 62, "replace_input_with": 62, "date": [62, 83, 118], "eliminate_dead_cod": 62, "logger": [62, 73], "f": [62, 64, 65, 67, 76, 82, 88, 93, 94, 95, 108, 109], "__init__": [62, 75, 76, 82, 88, 93, 95, 97, 101, 108, 114, 115], "pass_manag": 62, "passmanag": 62, "backend": [62, 68, 69, 77, 78, 81, 92, 96, 97, 98, 101, 108, 110, 111, 113, 114], "offer": [62, 64], "registr": [62, 65], "conveni": [62, 91, 104, 112, 116, 118], "control": [62, 65, 88, 100, 109, 116], "_aten_lowering_pass": 62, "my_custom_pass": 62, "front": [62, 71], "passlist": 62, "arbitrari": [62, 75], "remov": [62, 63, 71, 80, 96, 97, 108], "dump_lowering_pass": 62, "apply_lowering_pass": 62, "graph_modul": [62, 71], "_remove_lowering_pass": 62, "evolv": 62, "introduc": [63, 65, 112], "exportedprogram": [63, 68, 71, 76, 100, 106, 107, 109, 114, 118], "dynamo": [63, 64, 66, 68, 74, 75, 76, 78, 89, 93, 94, 95, 96, 97, 98, 100, 101, 102, 104, 105, 108, 109, 110, 111, 113, 114, 115], "frontend": [63, 71, 74, 90, 95, 98, 102, 104, 108, 110, 111, 113, 114], "simpl": [63, 64, 65, 82, 83, 88, 110, 111, 112, 113, 114], "usag": [63, 65, 69, 74, 78, 82, 89, 98, 109, 110, 114, 117], "eval": [63, 68, 89, 90, 93, 94, 96, 97, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 117], "exp_program": [63, 97, 100, 108, 114], "trt_gm": [63, 68, 97, 100, 114, 115, 117], "interact": [63, 82, 99, 101, 102, 103, 104, 105], "ideal": 63, "discuss": [63, 64, 110, 111, 113], "section": [63, 65, 80, 82, 83, 84, 86, 89, 91, 110, 111, 113, 117], "frequent": 63, "builder": [63, 64, 65, 71], "respect": [63, 64, 66, 71, 76, 106, 107, 115], "releas": [63, 64, 67, 82], "insid": [63, 82, 95], "decomposit": [63, 64, 71, 76, 95], "downstream": [63, 112], "constraint": [63, 109], "guid": [64, 81], "present": [64, 100], "learn": [64, 66, 69, 89, 91, 95, 110, 111, 113, 119], "acceler": [64, 72, 76, 116, 118, 119], "workflow": [64, 65, 68, 69, 71, 72, 76, 89, 92, 97, 98, 99, 102, 103, 104, 106, 107, 110, 112], "wide": [64, 76, 86], "varieti": [64, 110, 111, 113], "primari": [64, 93, 97, 117], "simplic": 64, "optimized_model": [64, 68, 96, 101, 102, 104], "depth": [64, 80, 112], "challeng": [64, 99, 110, 111, 113], "addition": [64, 95], "fit": [64, 82], "compilationset": [64, 71, 75, 93, 95, 101], "_enum": [64, 71], "callabl": [64, 71, 76], "pass_through_build_failur": [64, 71, 75, 76, 95], "max_aux_stream": [64, 71, 75, 76, 95], "version_compat": [64, 71, 75, 76, 95], "optimization_level": [64, 71, 75, 76, 95, 101], "use_python_runtim": [64, 71, 75, 76, 95, 96, 97, 99, 100, 101], "truncate_doubl": [64, 71, 75, 76, 95, 96, 106, 107, 109], "use_fast_partition": [64, 71, 75, 76, 95], "enable_experimental_decomposit": [64, 71, 75, 76, 95], "_devic": [64, 71], "assume_dynamic_shape_support": [64, 71, 75, 76], "make_refitt": [64, 71, 75, 76, 96, 97, 99, 100], "engine_cap": [64, 71, 75, 76, 95], "dryrun": [64, 71, 75, 76, 95], "hardware_compat": [64, 71, 75, 76, 95], "timing_cache_path": [64, 71, 75, 76, 97], "tmp": [64, 71, 75, 76, 89, 96], "torch_tensorrt_engine_cach": [64, 71, 75, 76], "timing_cach": [64, 65, 71, 75, 76], "bin": [64, 66, 67, 71, 75, 76], "lazy_engine_init": [64, 71, 75, 76], "cache_built_engin": [64, 71, 75, 96, 97], "reuse_cached_engin": [64, 71, 75, 96, 97, 100], "use_explicit_typ": [64, 71, 75, 106, 107, 109, 115], "use_fp32_acc": [64, 71, 75, 106, 107], "enable_weight_stream": [64, 71, 75, 109], "enable_cross_compile_for_window": [64, 71, 75], "dpython": [64, 71, 76, 77], "per": [64, 71, 95, 116], "regardless": [64, 71, 83, 102, 104], "fail": [64, 71, 76, 89, 99, 100, 108, 119], "auxiliari": [64, 71], "stream": [64, 69, 71, 76, 95, 98, 110], "impli": [64, 71], "longer": [64, 66, 71, 76, 80, 94, 116], "search": [64, 69, 71, 76, 80], "strictli": [64, 71], "runtim": [64, 66, 68, 69, 71, 76, 89, 93, 98, 99, 101, 104, 105, 109, 110, 118], "presenc": [64, 71], "preferenti": [64, 71], "choos": [64, 65, 71, 88], "float64": [64, 71, 76, 77], "refitt": [64, 71, 97], "toggl": [64, 71, 76], "mode": [64, 65, 71, 75, 76, 90, 91, 93, 105, 108], "detail": [64, 65, 67, 71, 88, 89, 95, 97, 110, 111, 113, 116], "natur": [64, 71, 82], "architectur": [64, 66, 69, 71, 76, 94, 97, 112], "amper": [64, 71, 76], "newer": [64, 66, 71, 76], "storag": [64, 71, 91], "use_strong_typ": [64, 71], "strong": [64, 71, 82], "mix": [64, 69, 71], "happen": [64, 65, 71, 88, 99, 102, 114], "were": [64, 71, 95, 100, 116], "cross": [64, 71, 82, 98, 110], "window": [64, 71, 82], "sub": [64, 70, 82, 88, 101], "slate": 64, "futur": [64, 65, 71, 76, 77, 116], "occur": [64, 109], "first_output": 64, "subsequ": [64, 97], "second_output": 64, "session": [64, 68, 82, 97, 105], "point": [64, 66, 76, 80, 81, 82, 89, 108, 110, 111, 113], "cover": [64, 93], "benchmark": [64, 70], "automat": [64, 67, 76, 82, 89, 100, 114, 118], "vari": [64, 72, 109, 114], "distribut": [64, 67, 89, 91, 109, 116], "inf": 64, "dynamo_convers": 64, "contribut": 64, "demonstr": [64, 82, 83, 84, 91, 93, 95, 97, 99, 108, 110, 111, 112, 113], "break": [64, 65, 71, 75, 76, 82, 95, 107], "successfulli": [64, 94, 99, 100], "_dynamo": [64, 96, 97, 101, 102, 104, 114], "explain": [64, 65, 69], "veri": [64, 65, 83, 84, 91, 92, 106, 107, 110, 111, 113], "explan": [64, 65], "graph_break_count": 64, "furthermor": 64, "durat": [64, 82], "latter": [64, 75], "logic": [64, 65, 93], "guard": 64, "compos": [65, 88, 91, 93, 108, 110, 111, 113], "variou": [65, 119], "etc": [65, 80, 82, 95, 119], "environ": [65, 68, 110, 111, 113], "research": 65, "few": [65, 66, 76, 93], "nightli": 65, "lower_exampl": 65, "welcom": [65, 89], "finish": 65, "converison": 65, "pleas": [65, 67, 76, 82, 89, 98, 108, 110, 111, 113, 114], "max_batch_s": [65, 72, 110, 111, 113], "2048": [65, 72], "max_workspace_s": [65, 72], "33554432": [65, 72], "explicit_batch_dimens": [65, 72], "lower_precis": [65, 72], "lowerprecis": [65, 72], "verbose_log": [65, 72], "timing_cache_prefix": [65, 72], "save_timing_cach": [65, 72], "cuda_graph_batch_s": [65, 72], "dynamic_batch": [65, 72], "turn": [65, 72, 105], "trtmodul": [65, 72], "otherwis": [65, 66, 72, 97, 116], "implicit": [65, 70, 72, 82], "config": [65, 66, 72, 110, 111, 113], "updat": [65, 66, 67, 71, 72, 76, 95, 100], "dim": [65, 70, 72, 95, 97, 108, 109, 110, 111, 113, 114], "fx2trt_exampl": 65, "acc_trac": 65, "come": [65, 66, 81, 95, 99, 110, 111, 113], "my_pytorch_model": 65, "build_model": 65, "prepar": [65, 110, 111, 113], "acc_mod": 65, "earli": [65, 100], "deprec": [65, 70], "continu": [65, 82, 116], "backward": [65, 75, 95, 118], "vision": [65, 98, 110, 111, 113], "activ": [65, 75, 77, 82, 89, 91, 93, 112, 116, 119], "except": [65, 71, 76], "permut": [65, 70], "transpos": [65, 70, 114], "ll": [65, 97], "inputtensorspec": [65, 72, 76], "experiment": [65, 76, 77], "dataclass": [65, 101], "re": [65, 76, 82, 97, 99, 105, 116], "manual": [65, 76, 81, 82, 100, 109], "sampl": [65, 71, 82, 90, 91, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 111, 113], "rand": [65, 89, 94, 97, 99, 100, 101, 110, 111, 113], "from_tensor": [65, 76], "slightli": [65, 66, 95], "promis": 65, "optimize_target_shap": 65, "input_tensor_spec": 65, "shape_rang": [65, 72], "100": [65, 72, 95, 97, 108, 109], "accordingli": [65, 80, 114, 116], "trtinterpreterresult": [65, 72], "namedtupl": 65, "input_nam": [65, 72], "output_nam": [65, 72], "serialized_cach": [65, 72], "bytearrai": [65, 75, 77], "afford": 65, "temporari": [65, 97], "best": [65, 71, 76, 82, 99, 109, 115], "perforamnc": 65, "examin": 65, "suitabl": [65, 93], "force_fp32_output": 65, "strict_type_constraint": 65, "usual": [65, 66, 80], "unless": 65, "certain": [65, 66, 101, 106, 107, 109, 116], "algorithm_selector": 65, "profiling_verbos": 65, "trt_interpreter_result": 65, "64": [65, 76, 90, 107, 108, 114], "25": [65, 72, 89], "runtimeerror": [65, 108], "xxx": 65, "One": [65, 82, 83, 89, 112, 116], "reload_trt_mod": 65, "reload_model_output": 65, "far": [65, 82], "give": [65, 80, 82], "convtert": 65, "scheme": [65, 71, 76], "action": [65, 82], "tensort": [65, 118], "thing": [65, 66, 82], "compar": [65, 71, 76, 90, 100], "vanilla": 65, "mainli": 65, "builtin": 65, "purpos": [65, 110, 111, 112, 113], "acc_op": 65, "leverag": [65, 91], "power": [65, 82, 89, 109, 112], "goe": [65, 82], "whole": 65, "sigmoid": [65, 70], "tensorrt_convert": 65, "acc_ops_sigmoid": 65, "rest": [65, 82, 83], "input_v": [65, 93], "receiv": 65, "region": 65, "add_activ": 65, "get_output": [65, 95], "wherev": 65, "rememb": [65, 66, 110, 111, 113], "mapper": 65, "todo": [65, 67, 80], "logist": 65, "down": [65, 66, 80, 107], "acc_norm": 65, "foo": [65, 82, 83], "register_acc_op": 65, "register_acc_op_map": 65, "this_arg_is_opt": 65, "op_and_target": 65, "arg_replacement_tupl": 65, "rule": [65, 66, 77], "third": [65, 83], "boolean": [65, 76, 93], "matter": [65, 95], "register_custom_acc_mapper_fn": 65, "design": [65, 74, 93, 99, 106, 109, 112, 119], "redund": 65, "throught": 65, "custom_mapp": 65, "_": [65, 82, 95, 108, 109, 115], "foo_kwarg": 65, "inserting_befor": 65, "foo_nod": 65, "meta": [65, 67, 86, 107, 109], "children": 65, "unit": [65, 76], "test_acc_trac": 65, "acc_op_convert": 65, "essenti": 65, "plugin": [65, 95], "yet": [65, 112], "folder": 65, "center": 66, "pypi": 66, "m": [66, 67, 83, 108], "pip": [66, 67, 98, 110, 111, 113], "upload": [66, 110, 111, 113], "x86": [66, 116], "extra": [66, 75, 89, 95, 99], "url": [66, 80, 110, 111, 113], "download": [66, 67, 86, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113], "whl": [66, 67], "cu118": 66, "cu124": 66, "tarbal": [66, 89, 91], "easiest": [66, 95], "bazelisk": [66, 67], "bazelbuild": [66, 67], "export": [66, 67, 69, 71, 76, 97, 98, 100, 102, 106, 107, 108, 109, 110, 111, 113, 115, 117, 118], "bazel_vers": 66, "path_to_torchtrt_root": 66, "bazelvers": 66, "mkdir": 66, "cd": [66, 110, 111, 113], "curl": [66, 82], "fssl": 66, "o": [66, 82, 110, 111, 113], "dist": 66, "unzip": 66, "bash": [66, 110, 111, 113], "sh": 66, "cp": [66, 67, 95], "usr": [66, 67], "driver": 66, "branch": [66, 67], "4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d": 66, "l46": 66, "pull": [66, 97, 110, 111, 113], "latest": [66, 67, 80], "l53c1": 66, "fact": 66, "reproduc": 66, "l71": 66, "http_archiv": 66, "build_fil": 66, "archiv": [66, 67], "sha256": 66, "strip_prefix": 66, "OR": 66, "TO": [66, 89], "gnu": 66, "tar": [66, 67, 82, 91], "gz": [66, 82, 83, 91], "ld_library_path": 66, "comment": [66, 82], "uncom": 66, "l114c1": 66, "l124c3": 66, "uv": 66, "astral": 66, "project": [66, 81, 86], "simpler": [66, 91], "wheel": [66, 67], "dep": 66, "lighter": 66, "executor": 66, "avoid": [66, 93, 95, 100, 114], "implic": 66, "python_onli": 66, "legaci": [66, 74], "mainten": 66, "torchdynamo": [66, 114, 118], "technolog": [66, 118], "exclud": [66, 95], "speed": [66, 97, 100], "no_torchscript": 66, "dbg": 66, "pre_cxx11_abi": 66, "complic": 66, "incompat": 66, "popular": [66, 81, 98, 106, 107, 110, 112], "ngc": [66, 67, 110, 111, 113], "tabl": [66, 86], "bdist_wheel": 66, "preinstal": 66, "forum": 66, "correctli": [66, 95], "declar": 66, "intend": [66, 101, 102, 103, 104, 105], "microsoft": 66, "2022": [66, 69], "open": [66, 110, 111, 112, 113], "app": 66, "x64": 66, "prompt": [66, 99, 103, 106, 107], "admin": 66, "privileg": 66, "launcher": 66, "chocolatei": 66, "navig": [66, 80], "ninja": 66, "setuptool": 66, "r": [66, 67, 82, 98, 110], "txt": [66, 67, 98, 110], "distutils_use_sdk": 66, "cuda_win": 66, "libtorch_win": 66, "tensorrt_win": 66, "non": [66, 76, 83, 85, 116], "similarli": [66, 97, 105, 116], "ci_workspac": 66, "win": 66, "tmpl": [66, 67], "torchtrtc": [66, 69, 119], "websit": 66, "finder": 66, "dcmake_module_path": 66, "doesn": [66, 82, 88, 89], "dtorch_dir": 66, "dtensorrt_root": 66, "choic": [66, 74], "b": [66, 70, 76, 83, 109, 110, 111, 113], "dcmake_build_typ": 66, "72048": 66, "jp_workspac": [66, 67], "new_local_repositori": 66, "sudo": [66, 67], "home": 66, "unlik": [66, 92], "libtorch_pre_cxx11_abi": 66, "shift": [66, 70, 82], "jetpack": 66, "jetpack_x": 66, "jetpack_5": 66, "drop": [66, 80, 108], "nvida": 67, "ofjetpack": 67, "With": [67, 80, 82, 89, 91, 97, 110, 111, 113], "incorpor": [67, 83], "cudnn": 67, "9": [67, 86, 89, 94, 95, 110, 111, 113], "dlfw": 67, "09": 67, "jetson": [67, 112], "framework": 67, "instal": [67, 69, 86, 89, 98, 110, 111, 113, 116], "kit": 67, "flash": 67, "board": 67, "apt": 67, "show": [67, 80, 82, 97, 103, 109, 112], "dev": 67, "everth": 67, "nvcc": 67, "cmd": 67, "toolkit": [67, 74], "libcusparselt": 67, "lib64": 67, "wget": [67, 110, 111, 113], "cusparselt": 67, "redist": 67, "libcusparse_lt": 67, "sbsa": 67, "xz": 67, "xf": 67, "v1": [67, 99, 103], "arm64": 67, "mv": 67, "chmod": 67, "pypa": 67, "en": [67, 80], "bootstrap": 67, "jp": 67, "v61": 67, "0a0": 67, "872d972e41": 67, "nv24": 67, "08": [67, 110, 111, 113], "17622132": 67, "cp310": 67, "linux_aarch64": 67, "test_requir": 67, "jetpack6": 67, "lanl": 67, "cuda_vers": 67, "grep": 67, "cut": [67, 82, 100], "sed": [67, 83, 85], "torch_install_path": 67, "dirnam": 67, "__file__": 67, "site_package_path": 67, "cuda_hom": 67, "envsubst": 67, "cxx11": [67, 116], "abi": [67, 116], "anywher": 68, "ahead": [68, 69, 89, 99], "ep": [68, 70, 94, 100, 115, 117], "output_format": [68, 76, 117], "input_tensor": [68, 95, 108, 109], "fill": 68, "aot": [69, 89, 98, 99, 100, 110, 118], "integr": [69, 99, 101], "seamlessli": [69, 76], "ecosystem": [69, 118], "hybrid": [69, 71, 76, 77, 118], "advanc": [69, 78, 83, 91, 98, 110], "bert": [69, 78, 98, 110], "triton": [69, 95], "cudagraph": [69, 98, 110], "overload": [69, 98, 110], "mutabl": [69, 98, 110], "diffus": [69, 78, 98, 110], "gpt2": [69, 98, 110], "llama2": [69, 98, 110], "page": [69, 84, 86, 110, 111, 113], "introductori": 69, "blog": [69, 116], "gtc": 69, "2020": [69, 89], "talk": 69, "fall": [69, 76, 95], "2021": 69, "dai": 69, "confer": 69, "_convolut": [70, 89], "stride": [70, 76, 95, 108], "pad": [70, 76, 95, 108], "dilat": 70, "output_pad": 70, "group": [70, 82, 83], "determinist": 70, "cudnn_en": 70, "allow_tf32": 70, "ab": 70, "aco": 70, "acosh": 70, "adaptive_avg_pool1d": 70, "output_s": 70, "adaptive_avg_pool2d": 70, "adaptive_avg_pool3d": 70, "adaptive_max_pool1d": 70, "adaptive_max_pool2d": 70, "adaptive_max_pool3d": 70, "argmax": [70, 109], "keepdim": 70, "argmin": 70, "asin": 70, "asinh": 70, "atan": 70, "atanh": 70, "avg_pool1d": 70, "kernel_s": [70, 95, 108], "ceil_mod": 70, "count_include_pad": 70, "avg_pool2d": 70, "divisor_overrid": 70, "avg_pool3d": 70, "gamma": 70, "var": 70, "momentum": 70, "bitwise_not": 70, "bmm": 70, "ceil": 70, "clamp": 70, "clamp_max": 70, "clamp_min": 70, "constant_pad_nd": 70, "co": [70, 83, 112], "cosh": 70, "cumsum": 70, "tensor_mod": 70, "rounding_mod": 70, "div_": 70, "elu": 70, "scale": [70, 91, 112], "input_scal": 70, "indic": [70, 71, 80, 82, 93, 100, 102, 114, 115], "padding_idx": 70, "eq": [70, 82], "erf": [70, 93], "exp": 70, "expand_a": 70, "fake_quantize_per_channel_affin": 70, "zero_point": 70, "axi": [70, 76], "quant_min": 70, "quant_max": 70, "fake_quantize_per_tensor_affin": 70, "using_int": [70, 89], "start_dim": [70, 89], "end_dim": [70, 89], "floor": 70, "floor_divid": 70, "ge": 70, "gru_cel": 70, "hx": 70, "w_ih": 70, "w_hh": 70, "b_ih": 70, "b_hh": 70, "gt": 70, "hardtanh_": 70, "instance_norm": 70, "running_mean": 70, "running_var": 70, "use_input_stat": 70, "layer_norm": 70, "normalized_shap": 70, "le": 70, "negative_slop": 70, "01": [70, 83, 89, 108], "leaky_relu_": 70, "lstm_cell": 70, "lt": 70, "masked_fil": 70, "mask": [70, 95], "max_pool1d": 70, "max_pool2d": [70, 88, 89], "max_pool3d": 70, "mul_": [70, 93], "narrow": 70, "neg": [70, 99], "norm": 70, "scalaropt_dim": 70, "pixel_shuffl": 70, "upscale_factor": 70, "pow": 70, "tensor_scalar": 70, "expon": 70, "tensor_tensor": 70, "prelu": 70, "prod": [70, 95], "dim_int": 70, "reciproc": 70, "reflection_pad1d": 70, "reflection_pad2d": 70, "relu_": 70, "repeat_interleav": 70, "self_int": 70, "replication_pad1d": 70, "replication_pad2d": 70, "replication_pad3d": 70, "reshap": [70, 95], "roll": 70, "rsub": 70, "scatter": 70, "sigmoid_": 70, "sin": [70, 82], "sinh": 70, "slice": 70, "split_siz": 70, "split_with_s": 70, "sqrt": 70, "squar": 70, "squeez": [70, 112], "sub_": 70, "dim_intlist": 70, "tan": 70, "tanh": [70, 93], "tanh_": [70, 93], "non_block": [70, 108], "memory_format": [70, 76], "prim_devic": 70, "topk": 70, "k": [70, 91, 108], "largest": 70, "dim0": [70, 97], "dim1": 70, "unbind": 70, "unsqueez": [70, 110, 111, 113], "upsample_bilinear2d": 70, "align_corn": 70, "scales_h": 70, "scales_w": 70, "vec": 70, "scale_factor": 70, "upsample_linear1d": 70, "upsample_nearest1d": 70, "upsample_nearest2d": 70, "upsample_nearest3d": 70, "scales_d": 70, "upsample_trilinear3d": 70, "view": [70, 80], "__and__": 70, "__derive_index": 70, "idx": 70, "__getitem__": 70, "__is__": 70, "t1": 70, "t2": 70, "obj": 70, "__isnot__": 70, "__not__": 70, "__or__": 70, "__range_length": 70, "lo": 70, "hi": [70, 82, 83], "__round_to_zero_floordiv": 70, "__xor__": 70, "append": [70, 93, 96, 97, 108, 109], "el": 70, "arang": [70, 95], "pin_memori": 70, "start_step": 70, "copy_": 70, "float_int": 70, "int_float": 70, "floordiv": 70, "is_floating_point": 70, "numel": 70, "l": [70, 108], "9223372036854775807": 70, "requires_grad": 70, "tupleindex": 70, "tup": 70, "exported_program": [71, 76, 117], "arg_input": [71, 76, 93, 100], "kwarg_input": [71, 76, 100], "engine_cache_dir": [71, 96, 97], "engine_cache_s": [71, 96, 97], "custom_engine_cach": [71, 97], "baseenginecach": [71, 97], "int32": [71, 76, 77, 95, 96, 104, 112], "channel_last": [71, 76, 77, 112], "244": [71, 76, 77], "alia": [71, 76], "better": [71, 76, 88, 112, 118], "understand": [71, 76, 114], "convolut": [71, 76, 77, 91, 95, 119], "_c": [71, 76, 77, 92], "oppos": [71, 76, 77], "lean": [71, 76], "spend": [71, 76], "integ": [71, 76, 85], "faster": [71, 76, 96, 97, 112], "parition": [71, 76], "increas": [71, 76, 97, 109], "amount": [71, 76, 109], "defer": [71, 76, 118], "lead": [71, 76, 82, 109, 116], "oversubscript": [71, 76], "hard": [71, 100], "disk": [71, 76, 97], "space": [71, 82, 83, 91], "byte": [71, 75, 76, 77, 95, 97, 109, 112], "1gb": [71, 96, 97], "exce": 71, "oldest": 71, "gear": [71, 91], "toward": [71, 91], "cross_compile_flag": 71, "cross_compil": 71, "refit_module_weight": [71, 100], "compiled_modul": [71, 100], "new_weight_modul": [71, 100], "verify_output": [71, 100], "use_weight_map_cach": [71, 100], "in_plac": [71, 100], "compmil": 71, "coverag": [71, 95], "min_acc_module_s": 72, "is_aten": 72, "use_experimental_fx_rt": 72, "correctness_atol": 72, "correctness_rtol": 72, "minim": [72, 91, 95], "submodul": [72, 88, 95], "fx2trt": 72, "cpu": [72, 106, 107, 109], "has_batch_dim": 72, "dtyep": 72, "prop": 72, "min_input_shap": 72, "optimized_input_shap": 72, "max_input_shap": 72, "popul": 72, "225": [72, 110, 111, 113], "explicit_precis": 72, "logger_level": 72, "model_trt": 73, "model_torchtrt": 73, "internal_error": 73, "dataloadercalibr": [74, 91], "preprocess": [74, 91, 110, 111, 113], "algo_typ": [74, 91], "calibrationalgo": [74, 91], "cachecalibr": [74, 91], "qualnam": [74, 76], "entropy_calibr": 74, "entropy_calibration_2": [74, 91], "legacy_calibr": 74, "minmax_calibr": 74, "set_multi_device_safe_mod": [75, 116], "_multidevicesafemodecontextmanag": 75, "impact": 75, "suppress": 75, "unsaf": 75, "trt_compiled_modul": 75, "torchtensorrtmodul": [75, 95], "encompass": [75, 77], "simpili": 75, "de": 75, "initi": [75, 76, 82, 100, 101, 102, 104, 105, 106, 107], "scriptmodul": [75, 76, 77, 89, 90, 117, 118], "overridden": [75, 76], "subclass": 75, "although": [75, 82], "recip": [75, 91], "afterward": 75, "former": 75, "care": 75, "hook": 75, "silent": 75, "get_extra_st": 75, "state_dict": [75, 76, 99], "set_extra_st": 75, "picklabl": 75, "pickl": [75, 95, 97], "load_state_dict": [75, 99, 108], "pythontorchtensorrtmodul": 75, "serialized_engin": [75, 77], "_set": [75, 101], "weight_name_map": 75, "trt_modul": 75, "engine_str": 75, "my_modul": 75, "current_devic": 75, "cudagraphs_validate_shap": 75, "versu": 75, "disable_profil": 75, "enable_profil": 75, "iprofil": 75, "spent": 75, "get_layer_info": 75, "request": [76, 89, 110, 111, 113], "decid": 76, "deseri": [76, 77, 89, 95], "retrac": 76, "strict": [76, 116], "valueerror": [76, 94], "mutabletorchtensorrtmodul": [76, 99], "pytorch_model": 76, "regular": 76, "whenev": 76, "refit_gm": 76, "shape_mod": 76, "_shapemod": 76, "interv": 76, "notat": 76, "bound": 76, "torch_tensor": 76, "tracer": 76, "example_tensor": 76, "optimization_profile_field": 76, "classmethod": 76, "disable_memory_format_check": 76, "core_id": 76, "schedul": [76, 110, 111, 113], "use_default": 76, "try_to": 76, "anoth": [76, 82, 83, 88, 90, 100], "typeerror": 76, "unknown": 76, "succe": 76, "float_dtyp": 76, "failur": 76, "bf16": 76, "try_from": [76, 95], "complex128": 76, "16": [76, 86, 88, 89, 90, 102, 105], "brain": 76, "bfloat16": 76, "f64": 76, "f8": 76, "fp8": 76, "float8": 76, "i32": 76, "sign": [76, 110, 111, 113], "i64": 76, "u8": 76, "unsign": 76, "uint8": 76, "trt_dla": 76, "torchtrt_dla": 76, "_from": 76, "torchtrt_dla_ec": 76, "torchtrt_safety_ec": 76, "saefti": 76, "trt_dla_ec": 76, "standalon": [76, 82], "certifi": 76, "tf": 76, "torchtrt_linear": 76, "cdhw32": 76, "thirti": 76, "row": [76, 83], "spatial": 76, "31": [76, 89], "subscript": [76, 82], "chw16": 76, "sixteen": 76, "15": [76, 82, 86], "chw2": 76, "chw32": 76, "chw4": 76, "four": [76, 82, 83], "dhwc": 76, "equivi": 76, "channels_last_3d": 76, "dhwc8": 76, "eight": 76, "dla_hwc4": 76, "imag": [76, 91, 95, 99, 103, 108, 110, 111, 113], "roundup": 76, "elements": 76, "dla_linear": 76, "planar": 76, "hwc": 76, "channels_last": 76, "hwc16": 76, "hwc8": 76, "least": [76, 82, 83], "ishapelay": 77, "check_method_op_support": 77, "seriali": 77, "put_binding_nam": 77, "tensorrtcompilespec": [77, 92], "scriptclass": 77, "0x7f690a9985b0": 77, "_jit_to_tensorrt": 77, "00": 78, "000": [78, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "total": [78, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "galleri": [78, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "mem": 78, "torch_compile_advanced_usag": [78, 101], "torch_compile_resnet_exampl": [78, 102], "torch_compile_stable_diffus": [78, 103], "torch_compile_transformers_exampl": [78, 104], "v0": [79, 110, 111, 113], "pytorch_sphinx_them": [80, 87], "conf": [80, 87], "html_theme_opt": 80, "canonical_url": 80, "analytics_id": 80, "logo_onli": 80, "display_vers": 80, "prev_next_buttons_loc": 80, "bottom": 80, "style_external_link": 80, "vcs_pageview_mod": 80, "collapse_navig": 80, "sticky_navig": [80, 84], "navigation_depth": 80, "includehidden": 80, "titles_onli": 80, "canon": 80, "rank": 80, "trail": 80, "slash": 80, "googl": 80, "analyt": 80, "isn": [80, 82, 95], "shown": [80, 82, 89, 115], "sidebar": [80, 86], "button": [80, 82], "icon": [80, 82], "extern": [80, 82, 98, 110], "display_github": 80, "display_gitlab": 80, "gitlab": 80, "bitbucket": 80, "bar": [80, 82], "www": [80, 82, 89, 91, 110, 111, 113], "sphinx": [80, 81, 82, 83, 87, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "toctre": 80, "lose": 80, "scroll": [80, 84], "unlimit": 80, "header": [80, 82, 83, 89, 110, 111, 113], "render": 80, "github_url": 80, "bitbucket_url": 80, "gitlab_url": 80, "left": [80, 82], "upon": [80, 101, 104], "rst": [80, 82], "visitor": 80, "revert": 80, "misbuild": 80, "properti": [80, 95], "stick": 80, "screen": 80, "vertic": [80, 82], "too": [80, 82, 83], "sticki": [80, 86], "nav": [80, 86], "At": [81, 93, 100], "django": 81, "payment": 81, "dotpai": 81, "dotpayprovid": 81, "seller_id": 81, "pin": 81, "lock": 81, "lang": 81, "pl": 81, "polish": 81, "gatewai": 81, "transfer": 81, "purchas": 81, "item": [81, 83, 108], "param": 81, "seller": 81, "consult": 81, "ui": 81, "languag": [81, 82, 83, 88, 95, 98, 106, 110, 111, 113], "data_item_1": 81, "emphasi": 82, "hyperlink": 82, "uri": 82, "web": 82, "anonym": 82, "label": [82, 91, 108, 110, 111, 112, 113], "substitut": 82, "charact": 82, "exceedingli": 82, "ugli": 82, "problem": [82, 107], "problemat": 82, "ext": [82, 83], "autodoc": [82, 83], "demo": [82, 91], "test_py_modul": [82, 86], "my": [82, 106], "role": 82, "pep": 82, "287": 82, "rfc": 82, "2822": 82, "superscript": 82, "gui": 82, "taken": 82, "height": 82, "interfer": 82, "press": 82, "keyboard": 82, "mous": 82, "mmb": 82, "menuselect": 82, "seen": [82, 83], "whitespac": 82, "signific": [82, 95], "strang": 82, "hyphen": 82, "word": [82, 112], "adjust": 82, "width": [82, 112], "browser": 82, "sentenc": [82, 109, 112], "suppli": [82, 100], "258": 82, "equat": 82, "x_": 82, "x_0": 82, "x_1": 82, "x_2": 82, "x_3": 82, "x_4": 82, "nabla": 82, "frac": 82, "theta": 82, "phi": 82, "restructuredtext": [82, 83], "parser": [82, 94, 108], "colon": 82, "indent": 82, "literal_block": 82, "spaces_and_linebreak": 82, "preserv": [82, 88, 91], "markup_process": 82, "Or": 82, "great": [82, 89, 95, 97, 118], "why": [82, 116], "didn": 82, "blank": 82, "align": 82, "permit": 82, "awai": 82, "eric": 82, "orchestra": 82, "leader": 82, "bee": 82, "philosoph": 82, "ipso": 82, "facto": 82, "But": [82, 89, 100, 109], "got": [82, 89], "vi": 82, "entiti": 82, "said": 82, "entir": [82, 118], "ancient": 82, "injuri": 82, "sing": 82, "elk": 82, "bracket": 82, "miss": [82, 89], "brontosaurus": 82, "thin": 82, "thicker": 82, "middl": 82, "That": [82, 89], "mine": 82, "belong": 82, "me": [82, 83], "ann": 82, "begun": 82, "past": 82, "pars": [82, 89], "someurl": 82, "dev0": 82, "a945aeb": 82, "caption": [82, 85], "pane": 82, "shell_command": 82, "echo": 82, "did": 82, "window_nam": 82, "session_nam": 82, "shorthand": 82, "some_funct": 82, "highlight": 82, "THE": 82, "heaven": 82, "hexagram": 82, "six": 82, "unbroken": 82, "primal": 82, "light": [82, 117], "spirit": 82, "weak": 82, "essenc": 82, "energi": 82, "unrestrict": 82, "conceiv": 82, "motion": 82, "regard": [82, 118], "basi": 82, "thu": 82, "persist": 82, "dual": 82, "sens": [82, 89], "univers": 82, "world": 82, "men": 82, "express": 82, "deiti": 82, "human": 82, "denot": [82, 95], "holi": 82, "man": [82, 83], "sage": 82, "ruler": 82, "who": 82, "awaken": 82, "utf": [82, 83], "sphinx_rtd_them": [82, 83], "docstr": [82, 83, 90], "dl": 82, "dt": 82, "tag": [82, 110, 111, 113], "tt": 82, "descnam": 82, "descclassnam": 82, "wrote": 82, "anyth": [82, 83, 116], "programm": 82, "myclass": 82, "dothismethod": 82, "flush": 82, "meth": 82, "capit": 82, "flox": 82, "unreferenc": 82, "nonexist": 82, "extrem": 82, "stuff": 82, "mayb": 82, "bold": 82, "ital": 82, "heck": 82, "put": [82, 112], "13": [82, 86], "backlink": 82, "knowledg": 82, "mind": 82, "ey": 82, "thought": 82, "medium": 82, "peopl": 82, "subsect": 82, "interpol": 82, "indirect": 82, "phrase": 82, "docutil": [82, 83], "sourceforg": [82, 83], "ref": 82, "clickabl": 82, "legend": 82, "revis": [82, 83, 99, 103], "revisit": 82, "enhanc": 82, "structuredtext": 82, "wooden": 82, "nickel": 82, "mad": 82, "scientist": 82, "bigger": 82, "bread": 82, "box": [82, 114, 118], "wash": 82, "behind": 82, "ear": 82, "room": 82, "closet": 82, "bathroom": 82, "trash": 82, "sink": 82, "mother": 82, "g_": 82, "mu": 82, "nu": 82, "pi": 82, "t_": 82, "rho_": 82, "servic": 82, "thing1": 82, "thing2": 82, "thing3": 82, "prose": 82, "provok": 82, "mental": 82, "exert": 82, "reader": 82, "discret": 82, "strongli": [82, 109], "advis": 82, "subtitl": 82, "outsid": 82, "often": 82, "besid": 82, "border": 82, "background": [82, 88], "ok": [82, 89], "transmit": 82, "disconnect": 82, "nonetheless": 82, "semant": 82, "blue": [82, 95], "white": 82, "arab": 83, "roman": 83, "upper": 83, "iii": 83, "iv": 83, "classifi": [83, 88, 89, 108, 112], "paragraph": [83, 86], "z": 83, "commonli": 83, "vm": 83, "david": 83, "goodger": 83, "address": [83, 95, 99], "123": 83, "street": 83, "canada": 83, "a1b": 83, "2c3": 83, "contact": 83, "myself": 83, "organ": 83, "humankind": 83, "2012": 83, "03": 83, "19": [83, 86], "53": 83, "0000": 83, "tue": 83, "jan": 83, "progress": 83, "7302": 83, "wish": 83, "redistribut": 83, "reattribut": 83, "sell": 83, "bui": 83, "rent": 83, "leas": 83, "improv": [83, 116], "quot": 83, "excerpt": 83, "collat": 83, "fold": 83, "stapl": 83, "mutil": 83, "anyon": 83, "heart": 83, "bibliograph": 83, "markup": [83, 86], "literal": 83, "yahoo": 83, "oh": 83, "liter": 83, "heh": 83, "child": 83, "beat": 83, "text": [83, 85, 106, 107, 112], "hehe": 83, "kept": 83, "sai": [83, 112], "cackl": 83, "night": 83, "lone": 83, "guangzhou": 83, "destini": 83, "hope": 83, "dream": 83, "forth": 83, "fifth": 83, "sixth": 83, "lorem": [83, 85], "ipsum": [83, 85], "dolor": [83, 85], "sit": [83, 85], "amet": [83, 85], "consectetur": [83, 85], "adipisc": [83, 85], "elit": [83, 85], "donec": [83, 85], "porttitor": [83, 85], "odio": [83, 85], "posuer": [83, 85], "vita": [83, 85], "ornar": [83, 85], "libero": [83, 85], "matti": 83, "loborti": [83, 85], "justo": [83, 85], "vestibulum": [83, 85], "nibh": [83, 85], "aliquet": [83, 85], "feugiat": [83, 85], "sagitti": [83, 85], "nequ": [83, 85], "qui": [83, 85], "eleifend": 83, "dui": [83, 85], "rutrum": [83, 85], "lectu": [83, 85], "suscipit": [83, 85], "letter": [83, 112], "column": 83, "cell": 83, "span": 83, "nam": [83, 85], "mauri": [83, 85], "arcu": [83, 85], "stub": 83, "behav": 84, "area": 84, "interdum": 85, "nec": 85, "finibu": 85, "dictum": 85, "velit": 85, "ut": 85, "eu": 85, "efficitur": 85, "aliquam": 85, "erat": 85, "diam": 85, "gravida": 85, "imperdiet": 85, "tellu": 85, "nisl": 85, "praesent": 85, "eget": 85, "elementum": 85, "rhoncu": 85, "tincidunt": 85, "suspendiss": 85, "volutpat": 85, "scelerisqu": 85, "tristiqu": 85, "aenean": 85, "condimentum": 85, "risu": 85, "accumsan": 85, "laoreet": 85, "maximu": 85, "sapien": 85, "ligula": 85, "fringilla": 85, "commodo": 85, "proin": 85, "et": 85, "pharetra": 85, "etiam": 85, "turpi": 85, "ant": 85, "luctu": 85, "vel": 85, "malesuada": 85, "dignissim": 85, "mi": 85, "nunc": 85, "augu": 85, "sem": 85, "cursu": 85, "nulla": 85, "pellentesqu": 85, "habit": 85, "morbi": 85, "senectu": 85, "netu": 85, "fame": 85, "ac": 85, "egesta": 85, "placerat": 85, "tortor": 85, "iaculi": 85, "venenati": 85, "cra": 85, "puru": 85, "ero": 85, "vehicula": 85, "fusc": 85, "auctor": 85, "phasellu": 85, "est": 85, "viverra": 85, "conval": 85, "faucibu": 85, "vulput": 85, "feli": 85, "sodal": 85, "maecena": 85, "congu": 85, "semper": 85, "enim": 85, "blandit": 85, "sollicitudin": 85, "urna": 85, "orci": 85, "lacu": 85, "quisqu": 85, "facilisi": 85, "hendrerit": 85, "curabitur": 85, "variu": 85, "bibendum": 85, "massa": 85, "magna": 85, "tempu": 85, "metu": 85, "nisi": 85, "pretium": 85, "leo": 85, "euismod": 85, "ultric": 85, "dapibu": 85, "lacinia": 85, "vivamu": 85, "molesti": 85, "hac": 85, "habitass": 85, "platea": 85, "dictumst": 85, "git": 86, "content": [86, 91, 110, 111, 113], "changelog": 86, "math": 86, "14": [86, 96, 104, 110, 111, 113], "17": 86, "18": [86, 89, 99], "submenu": 86, "symlink": 87, "subtre": 87, "_theme": 87, "html_theme": 87, "html_theme_path": 87, "optimiz": 88, "tutori": [88, 91, 93, 95, 97, 99, 100, 111, 113], "beginn": 88, "intro_to_torchscript_tutori": 88, "briefli": 88, "lenet": [88, 89], "lenetfeatextractor": 88, "conv1": [88, 89], "conv2d": [88, 95, 108], "conv2": [88, 89], "lenetclassifi": 88, "fc1": [88, 89], "120": [88, 89], "fc2": [88, 89], "84": [88, 89], "fc3": [88, 89], "feat": [88, 89], "obvious": 88, "pathwai": 88, "input_data": [88, 90], "traced_model": 88, "pick": [88, 115], "script_model": [88, 92], "perspect": 88, "___torch_mangle_10": 88, "129": 88, "___torch_mangle_9": 88, "119": 88, "___torch_mangle_5": 88, "137": 88, "callmethod": 88, "138": 88, "38": 88, "39": 88, "torch_script_modul": [88, 89], "in_tensor": 88, "fly": 88, "lenet_script": [88, 89], "haven": 89, "acquir": 89, "dyanmo": 89, "almost": [89, 118], "trt_lenet_script": 89, "apr": 89, "56": 89, "04": 89, "credit": 89, "stop": 89, "argc": 89, "argv": 89, "cerr": 89, "cout": 89, "even": [89, 99], "cppdoc": 89, "pretti": 89, "fashion": [89, 112], "enable_precis": 89, "And": 89, "convertgraphtotrtengin": 89, "engine_converted_from_jit": 89, "close": [89, 93], "saw": 89, "576": 89, "346": 89, "539": 89, "0464": 89, "0383": 89, "0678": 89, "0932": 89, "1045": 89, "0805": 89, "0435": 89, "0818": 89, "0208": 89, "0358": 89, "cudafloattyp": 89, "0530": 89, "1691": 89, "2802": 89, "1502": 89, "1056": 89, "1549": 89, "input0": [89, 90], "1063": 89, "input1": [89, 90], "input2": 89, "28": 89, "29": 89, "33": 89, "35": 89, "36": 89, "37": 89, "compilegraph": [89, 91], "transform": [89, 91, 96, 98, 100, 104, 106, 107, 108, 109, 110, 111, 113, 117], "laid": 89, "translat": [89, 100], "aren": 89, "techniqu": [89, 91, 107, 116], "checkmethodoperatorsupport": 89, "modular": 89, "ship": [89, 116], "exhaust": 89, "109": 89, "addlay": 89, "yourself": 89, "question": [89, 93], "outself": 89, "flatten_convert": 89, "unwraptoint": 89, "in_shap": 89, "tovec": 89, "out_shap": 89, "shuffl": [89, 91, 108], "addshuffl": 89, "setreshapedimens": 89, "todim": 89, "extens": [89, 118], "ctype": 89, "cdll": 89, "contributor": 89, "upstream": 89, "pr": 89, "usecas": 90, "sole": [90, 91, 118], "individu": 90, "accuraci": [91, 112], "loss": [91, 112], "infrastructur": [91, 110, 111, 113], "streamlin": 91, "expos": [91, 95], "cpp_frontend": 91, "loading_data_recip": 91, "cifar10": [91, 108], "cstddef": 91, "ktrain": 91, "ktest": 91, "un": 91, "cs": 91, "toronto": 91, "edu": 91, "kriz": 91, "cifar": 91, "is_train": 91, "trim": 91, "use_subset": 91, "new_siz": 91, "mode_": 91, "images_": 91, "targets_": 91, "calibration_dataset": 91, "data_dir": 91, "320": 91, "4914": [91, 108], "4822": [91, 108], "4465": [91, 108], "2023": [91, 108], "1994": [91, 108], "2010": [91, 108], "dataloaderopt": 91, "worker": 91, "virtual": 91, "input_shap": [91, 119], "compile_spec": [91, 94, 102, 119], "kf16": [91, 119], "ki8": 91, "vgg16": [91, 108], "testing_dataset": [91, 108], "totensor": [91, 108, 110, 111, 113], "testing_dataload": [91, 108], "num_work": [91, 108], "vgg": [91, 108], "test_ptq_dataloader_calibr": 91, "test_ptq_trt_calibr": 91, "krizhevski": 91, "hinton": 91, "2009": 91, "tini": 91, "simonyan": 91, "zisserman": 91, "2014": 91, "recognit": [91, 112], "arxiv": 91, "preprint": 91, "1409": 91, "1556": 91, "_jit_to_backend": 92, "mobilenet_v2": 92, "pretrain": [92, 97, 99, 102, 105, 110, 111, 112, 113], "gelu": 93, "sy": 93, "approxim": 93, "suppos": 93, "my_mod": 93, "ex_input": [93, 95], "baselin": 93, "my_standard_gelu": 93, "supports_dynamic_shap": 93, "supersed": 93, "converterprior": 93, "vers": 93, "prior": [93, 97, 114, 116], "distinct": 93, "prepend": 93, "candid": 93, "primit": 93, "compiler_ir": 93, "boilerpl": 93, "focu": [93, 99], "interoper": 93, "aten_ops_gelu": 93, "sourceir": 93, "cheap": 93, "unqiu": 93, "op_count": 93, "get_op_count": 93, "nonloc": 93, "elementwis": 93, "source_ir": 93, "lhs_val": 93, "rhs_val": 93, "x_7": 93, "x_8": 93, "79788456080000003": 93, "x_9": 93, "044714999999999998": 93, "x_10": 93, "x_11": 93, "x_12": 93, "x_13": 93, "x_14": 93, "x_15": 93, "my_custom_gelu": 93, "allclos": [93, 99, 100], "my_mod_erf": 93, "my_gelu_erf": 93, "notic": 93, "minut": [93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "converter_overload": 93, "jupyt": [93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "ipynb": [93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "geforcertx": 94, "4080": 94, "3080": 94, "cross_runtime_compilation_for_window": 94, "trt_resnet": 94, "argpars": [94, 108], "argumentpars": [94, 108], "comil": 94, "add_argu": [94, 108], "parse_arg": [94, 108], "manual_se": [94, 96, 97, 99, 100], "resnet18": [94, 97, 99, 100, 102, 105], "amd64": 94, "loaded_model": 94, "load_cross_compiled_exported_program": 94, "trt_output": 94, "cross_compile_for_window": 94, "cost": [95, 97, 100, 116], "perhap": 95, "overhead": [95, 109, 116], "sake": 95, "circular": 95, "red": 95, "green": 95, "twice": 95, "written": 95, "openai": 95, "formal": 95, "tl": 95, "custom_op": 95, "circ_pad_kernel": 95, "all_pads_0": 95, "all_pads_2": 95, "all_pads_4": 95, "all_pads_6": 95, "orig_dims_0": 95, "orig_dims_1": 95, "orig_dims_2": 95, "orig_dims_3": 95, "y_shape_1": 95, "y_shape_2": 95, "y_shape_3": 95, "x_len": 95, "y_len": 95, "block_siz": 95, "pid": 95, "program_id": 95, "mask_i": 95, "i3": 95, "i2": 95, "i1": 95, "i0": 95, "j0": 95, "j1": 95, "j2": 95, "j3": 95, "load_idx": 95, "mask_x": 95, "launch": [95, 110, 111, 113], "torchtrt_ex": 95, "triton_circular_pad": 95, "mutates_arg": 95, "out_dim": 95, "tolist": 95, "all_pad": 95, "zero": 95, "orig_dim": 95, "blocksiz": 95, "256": [95, 108, 109, 110, 111, 113], "numblock": 95, "tracabl": 95, "prerequisit": 95, "fake": 95, "real": 95, "faketensor": 95, "register_fak": 95, "autograd": 95, "beyond": 95, "register_autograd": 95, "padded_x": 95, "my_model": 95, "2604": 95, "4232": 95, "3041": 95, "0833": 95, "2461": 95, "1270": 95, "2450": 95, "4079": 95, "2887": 95, "2828": 95, "0373": 95, "0332": 95, "3143": 95, "6344": 95, "5638": 95, "1867": 95, "5068": 95, "4363": 95, "7937": 95, "3488": 95, "1350": 95, "7966": 95, "3517": 95, "1379": 95, "5537": 95, "1088": 95, "8950": 95, "0550": 95, "6163": 95, "0109": 95, "5245": 95, "9632": 95, "5686": 95, "3775": 95, "8162": 95, "4216": 95, "4311": 95, "1649": 95, "2091": 95, "3668": 95, "1006": 95, "1447": 95, "0352": 95, "7689": 95, "8131": 95, "_run_on_gpu_0": 95, "_run_on_acc_1": 95, "dry": 95, "50": [95, 112], "count": 95, "__": 95, "aggreg": 95, "stat": 95, "latenc": [95, 109, 116], "abstractli": 95, "pkl": [95, 99], "cupi": 95, "gap": 95, "prealloc": 95, "circularpaddingplugin": 95, "ipluginv2dynamicext": 95, "field_collect": 95, "pluginfieldcollect": 95, "x_shape": 95, "num_output": 95, "plugin_namespac": 95, "plugin_typ": 95, "plugin_vers": 95, "assert": [95, 99, 100], "get_output_datatyp": 95, "input_typ": 95, "get_output_dimens": 95, "output_index": 95, "dimsexpr": 95, "exprbuild": 95, "iexprbuild": 95, "output_dim": 95, "dimensionoper": 95, "configure_plugin": 95, "inp": 95, "dynamicplugintensordesc": 95, "x_dim": 95, "desc": 95, "supports_format_combin": 95, "po": 95, "in_out": 95, "plugintensordesc": 95, "num_input": 95, "enqueu": 95, "input_desc": 95, "output_desc": 95, "in_dtyp": 95, "a_mem": 95, "unownedmemori": 95, "items": 95, "c_mem": 95, "a_ptr": 95, "memorypoint": 95, "c_ptr": 95, "a_d": 95, "memptr": 95, "c_d": 95, "a_t": 95, "as_tensor": 95, "c_t": 95, "cloned_plugin": 95, "__dict__": 95, "circularpaddingplugincr": 95, "iplugincr": 95, "field_nam": 95, "pluginfield": 95, "pluginfieldtyp": 95, "create_plugin": 95, "pluginfieldcollection_": 95, "deserialize_plugin": 95, "pads_dict": 95, "creator": 95, "trt_plugin_registri": 95, "get_plugin_registri": 95, "register_cr": 95, "untyp": 95, "get_trt_tensor": 95, "set_layer_nam": 95, "recal": 95, "intlist": 95, "circular_padding_convert": 95, "retriev": 95, "elsewher": 95, "plugin_registri": 95, "plugin_cr": 95, "get_plugin_cr": 95, "field_config": 95, "eventu": 95, "freez": 95, "_input": 95, "add_plugin_v2": 95, "circular_padding_plugin": 95, "_run_on_acc_0": 95, "grad_fn": 95, "subbackward0": 95, "custom_kernel_plugin": 95, "engine_caching_exampl": [96, 97], "remove_timing_cach": [96, 97], "bertmodel": [96, 104], "random": [96, 97, 99, 100, 109], "seed": [96, 97, 99, 100], "from_pretrain": [96, 99, 103, 104, 106, 107, 109], "uncas": [96, 104, 112], "return_dict": 96, "randint": [96, 104, 109], "compile_bert": 96, "enable_tim": [96, 97], "1st": [96, 97], "measur": [96, 97, 109], "2nd": [96, 97], "3rd": [96, 97], "slower": [96, 97], "messur": [96, 97], "compilation_kwarg": [96, 104], "torch_trt_bert_engine_cach": 96, "30": [96, 97, 99, 100, 102, 104, 115], "synchron": [96, 97, 109], "elapsed_tim": [96, 97], "millisecond": 96, "__name__": [96, 101, 104], "__main__": [96, 101, 104], "engine_caching_bert_exampl": 96, "paid": 97, "upfront": 97, "invalid": 97, "repeatedli": 97, "mitig": 97, "explor": 97, "torch_trt": [97, 99, 100], "_default": 97, "_engine_cach": 97, "flexibl": [97, 118], "histor": 97, "barrier": 97, "reconstruct": 97, "ti": 97, "hash": 97, "magnitud": 97, "torch_compil": [97, 101, 102, 104, 105, 114, 118], "compiled_model": 97, "ms": [97, 109], "dynamo_compil": 97, "example_input": 97, "200": 97, "dynamic_shap": [97, 114], "remot": 97, "systen": 97, "agnost": 97, "implent": 97, "ramenginecach": 97, "held": 97, "engine_cach": 97, "torch_compile_my_cach": 97, "_torch_export_gpt2": [98, 110], "_torch_export_llama2": [98, 110], "sphx_glr_tutorials__rendered_examples_dynamo_cross_runtime_compilation_for_window": [98, 110], "straightforward": 99, "especi": 99, "hug": [99, 106, 107], "face": [99, 106, 107], "difficult": 99, "ever": 99, "walk": [99, 100, 106], "lora": [99, 100], "use_python": 99, "mutable_modul": 99, "model2": [99, 100], "expected_output": [99, 100], "refitted_output": [99, 100], "reload": [99, 118], "checkpoint": [99, 108], "civitai": 99, "12597": 99, "moxin": 99, "diffusionpipelin": [99, 103], "no_grad": [99, 106, 107, 108, 109], "model_id": [99, 103], "runwayml": 99, "hous": 99, "forest": 99, "shuimobysim": 99, "wuchangshuo": 99, "qualiti": 99, "worst": 99, "lowr": 99, "cloudi": 99, "watermark": 99, "pipe": [99, 103], "torch_dtyp": [99, 103], "unet": [99, 103], "negative_prompt": 99, "num_inference_step": 99, "without_lora_mut": 99, "jpg": [99, 110, 111, 113], "procedur": 99, "load_lora_weight": 99, "stablediffusionapi": 99, "load_lora_embed": 99, "weight_nam": 99, "safetensor": 99, "adapter_nam": 99, "lora1": 99, "set_adapt": 99, "adapter_weight": 99, "fuse_lora": 99, "unload_lora_weight": 99, "with_lora_mut": 99, "mutable_torchtrt_module_exampl": 99, "expens": 100, "involv": 100, "occasion": [100, 101, 104], "adapt": 100, "infeas": 100, "focus": 100, "mostli": 100, "recogn": 100, "behalf": 100, "init": [100, 108], "sett": 100, "randomli": 100, "exp_program2": 100, "compiled_trt_ep": 100, "new_trt_gm": 100, "accomplish": 100, "gaurente": 100, "attempt": [100, 108, 114], "rebuild": 100, "heurist": 100, "refit_engine_exampl": 100, "x_out": 101, "y_out": 101, "x_y_out": 101, "invoc": 101, "sample_inputs_half": 101, "model_half": 101, "backend_kwarg": 101, "optimized_model_custom": 101, "exit": [101, 104], "2052": [101, 104], "compile_engine_and_inf": [101, 104], "new_input": [102, 104], "new_output": [102, 104], "new_batch_size_input": 102, "new_batch_size_output": 102, "inputs_bs8": 102, "mark_dynam": [102, 114], "outputs_bs8": 102, "No": [102, 114], "inputs_bs12": 102, "outputs_bs12": 102, "compvi": 103, "majest": 103, "castl": 103, "cloud": 103, "majestic_castl": 103, "png": 103, "enable_cudagraph": [105, 116], "out_trt": 105, "set_cudagraphs_mod": [105, 116], "inputs_2": 105, "inputs_3": 105, "out_trt_2": 105, "out_trt_3": 105, "torch_export_cudagraph": 105, "automodelforcausallm": [106, 107, 109], "autotoken": [106, 107], "export_llm": [106, 107, 109], "max_token": [106, 107, 109], "kv_cach": [106, 107], "token": [106, 107, 112], "pad_token_id": 106, "eos_token_id": [106, 107], "attn_implement": [106, 107, 109], "eager": [106, 107, 109], "enjoi": 106, "cute": 106, "dog": 106, "model_input": [106, 107], "return_tensor": [106, 107], "input_id": [106, 107], "regress": [106, 107], "huggingfac": [106, 107, 112], "pyt_gen_token": [106, 107], "gpt2_ep": 106, "max_seq_len": [106, 107, 109], "trt_gen_token": [106, 107], "skip_special_token": [106, 107], "parallel": 106, "paradigm": 106, "torch_export_gpt2": 106, "llama_path": [107, 109], "llama": [107, 109], "7b": [107, 109], "chat": [107, 109], "hf": [107, 109], "llama2_ep": [107, 109], "batch_decod": 107, "clean_up_tokenization_spac": 107, "solv": [107, 110, 111, 113], "smaller": [107, 112], "subproblem": 107, "torch_export_llama2": 107, "modelopt": 108, "mtq": 108, "export_torch_mod": 108, "layer_spec": 108, "num_class": 108, "1000": [108, 109, 110, 111, 113], "init_weight": 108, "in_channel": 108, "pool": [108, 119], "maxpool2d": 108, "batchnorm2d": 108, "sequenti": 108, "avgpool": 108, "adaptiveavgpool2d": 108, "4096": 108, "dropout": 108, "_initialize_weight": 108, "kaiming_normal_": 108, "fan_out": 108, "nonlinear": 108, "constant_": 108, "elif": 108, "normal_": 108, "vgg16_cfg": 108, "128": [108, 109], "ckpt": 108, "model_state_dict": 108, "device_count": 108, "ordereddict": 108, "new_state_dict": 108, "forget": 108, "training_dataset": 108, "randomcrop": 108, "randomhorizontalflip": 108, "training_dataload": 108, "drop_last": 108, "crit": 108, "crossentropyloss": 108, "calibrate_loop": 108, "pred": 108, "5f": 108, "acc": 108, "2f": 108, "quantize_typ": 108, "quant_cfg": 108, "int8_default_cfg": 108, "fp8_default_cfg": 108, "forward_loop": 108, "qdq": 108, "incomplet": 108, "functionaltensor": 108, "functionaltensormod": 108, "_trace": 108, "_export": 108, "float8_e4m3fn": 108, "class_prob": 108, "class_pr": 108, "test_prob": 108, "test_pr": 108, "test_loss": 108, "test_acc": 108, "vgg16_ptq": 108, "overcom": 109, "throughput": 109, "sometim": [109, 114], "outweigh": 109, "slowdown": 109, "hardwar": [109, 119], "experi": 109, "balanc": 109, "timeit": 109, "time_gener": 109, "output_seq_length": 109, "seq_len": [109, 114], "llm": 109, "input_seq": 109, "start_tim": 109, "default_tim": 109, "inputs_copi": 109, "decod": 109, "logit": 109, "next_token_logit": 109, "next_token": 109, "end_tim": 109, "time_mean_m": 109, "isl": 109, "osl": 109, "warm": 109, "solut": 109, "insight": 109, "weight_streaming_ctx": 109, "weight_stream": 109, "mean_lat": 109, "percentag": 109, "weight_budget_pct": 109, "device_budget": 109, "total_device_budget": 109, "permiss": 109, "equal": 109, "proportion": 109, "streamabl": 109, "streamable_budget": 109, "requested_budget": 109, "get_automatic_weight_streaming_budget": 109, "weight_streaming_exampl": 109, "hand": [110, 111, 113], "consider": [110, 111, 113], "concurr": [110, 111, 113], "grpc": [110, 111, 113], "aforement": [110, 111, 113], "familiar": [110, 111, 113], "resnet50": [110, 111, 113], "torchhub": [110, 111, 113], "docker": [110, 111, 113], "login": [110, 111, 113], "xx": [110, 111], "yy": [110, 111, 113], "mm": [110, 111, 113], "publish": [110, 111, 113], "pwd": [110, 111, 113], "scratch_spac": [110, 111, 113], "nvcr": [110, 111, 113], "py3": [110, 111, 113], "hub": [110, 111, 113], "_validate_not_a_forked_repo": [110, 111, 113], "ts_trt_model": [110, 111, 113], "triton_exampl": [110, 111, 113], "model_repositori": [110, 111, 113], "rm": [110, 111, 113], "highli": [110, 111, 112, 113], "suggest": [110, 111, 113], "simplest": [110, 111, 113], "pbtxt": [110, 111, 113], "data_typ": [110, 111, 113], "type_fp32": [110, 111, 113], "exact": [110, 111, 113], "encourag": [110, 111, 113], "proce": [110, 111, 113], "8000": [110, 111, 113], "8001": [110, 111, 113], "8002": [110, 111, 113], "tritonserv": [110, 111, 113], "spin": [110, 111, 113], "proceed": [110, 111, 113], "flesh": [110, 111, 113], "img1": [110, 111, 113], "hakaimagazin": [110, 111, 113], "wp": [110, 111, 113], "gulf": [110, 111, 113], "bird": [110, 111, 113], "attrdict": [110, 111, 113], "pyindex": [110, 111, 113], "tritoncli": [110, 111, 113], "jump": [110, 111, 113], "firstli": [110, 111, 113], "resiz": [110, 111, 113], "pil": [110, 111, 113], "httpclient": [110, 111, 113], "triton_to_np_dtyp": [110, 111, 113], "rn50_preprocess": [110, 111, 113], "img_path": [110, 111, 113], "img": [110, 111, 113], "centercrop": [110, 111, 113], "485": [110, 111, 113], "456": [110, 111, 113], "406": [110, 111, 113], "229": [110, 111, 113], "transformed_img": [110, 111, 113], "inferenceservercli": [110, 111, 113], "localhost": [110, 111, 113], "secondli": [110, 111, 113], "obtain": [110, 111, 112, 113, 117], "inferinput": [110, 111, 113], "set_data_from_numpi": [110, 111, 113], "binary_data": [110, 111, 113], "inferrequestedoutput": [110, 111, 113], "class_count": [110, 111, 113], "lastli": [110, 111, 113], "send": [110, 111, 113], "model_nam": [110, 111, 113], "inference_output": [110, 111, 113], "as_numpi": [110, 111, 113], "468750": [110, 111, 113], "90": [110, 111, 113], "523438": [110, 111, 113], "92": [110, 111, 113], "664062": [110, 111, 113], "429688": [110, 111, 113], "136": [110, 111, 113], "234375": [110, 111, 113], "confidence_scor": [110, 111, 113], "classification_index": [110, 111, 113], "_rendered_examples_python": 110, "_rendered_examples_jupyt": 110, "acoust": 112, "speech": 112, "quartznet": 112, "contextnet": 112, "subword": 112, "piec": 112, "excit": 112, "se": 112, "audio": 112, "transcrib": 112, "speedup": 112, "feedforward": 112, "cnn": 112, "uniformli": 112, "resolut": 112, "compound": 112, "coeffici": 112, "b0": 112, "corpu": 112, "english": 112, "supervis": 112, "walkthrough": 112, "overal": 112, "adopt": 112, "mobilenetv2": 112, "classif": 112, "imagenet": 112, "imagenett": 112, "qat": 112, "simul": 112, "eagerli": 114, "swap": 114, "exactli": 114, "_tracer": 114, "queri": 114, "attn_weight": 114, "compiler_dynamic_shap": 114, "inputs_bs2": 114, "mymodul": 115, "linear1": 115, "linear2": 115, "linear3": 115, "40": 115, "__myl_mulsum_myl0_0": 115, "layertyp": 115, "kgen": 115, "__mye116_dconst": 115, "__myln_k_arg__bb1_2": 115, "tacticnam": 115, "__myl_mulsum_0xfa6c1858aea1b13b03f90165d7149ec6": 115, "streamid": 115, "__myl_addresmulsum_myl0_1": 115, "__mye131_dconst": 115, "addmm_constant_0": 115, "addmm_add_broadcast_to_same_shape_lhs_broadcast_constantfloat": 115, "__myln_k_arg__bb1_3": 115, "__myl_addresmulsum_0xb3915d7ebfe48be45b6d49083479e12f": 115, "__myl_addresmulsumadd_myl0_2": 115, "__mye146_dconst": 115, "addmm_2_constant_0": 115, "addmm_2_add_broadcast_to_same_shape_lhs_broadcast_constantfloat": 115, "addmm_1_constant_0": 115, "addmm_1_add_broadcast_to_same_shape_lhs_broadcast_constantfloat": 115, "__myl_addresmulsumadd_0xcdd0085ad25f5f45ac5fafb72acbffd6": 115, "__myl_mulsumaddcas_myl0_0": 115, "__mye112_dconst": 115, "__myl_mulsumaddcas_0xacf8f5dd9be2f3e7bb09cdddeac6c936": 115, "__myl_resmulsumaddcas_myl0_1": 115, "__mye127_dconst": 115, "addmm_1_add_broadcast_to_same_shape_lhs_broadcast_constanthalf": 115, "__myl_resmulsumaddcas_0x5a3b318b5a1c97b7d5110c0291481337": 115, "__myl_resmulsumadd_myl0_2": 115, "__mye142_dconst": 115, "__myl_resmulsumadd_0x3fad91127c640fd6db771aa9cde67db0": 115, "libtorchtrt_runtim": 116, "dl_open": 116, "ld_preload": 116, "load_librari": 116, "wl": 116, "ltorchtrt": 116, "torchtrt_runtime_exampl": 116, "libtorchtrt_plugin": 116, "neglig": 116, "thread": 116, "alert": 116, "switch": 116, "mismatch": 116, "crash": 116, "sacrif": 116, "incur": 116, "intens": 116, "trt_ep": 117, "stai": 117, "trt_t": 117, "ergonom": 118, "deleg": 118, "believ": 118, "amen": 118, "artifact": 118, "pack": 118, "year": 118, "superset": 118, "codebas": 118, "immedi": 118, "traceabl": 118, "scriptabl": 118, "neural": 119, "deconvolut": 119, "scripted_model": 119}, "objects": {"": [[5, 0, 1, "c.STR", "STR"], [9, 0, 1, "c.TORCHTRT_API", "TORCHTRT_API"], [11, 0, 1, "c.TORCHTRT_HIDDEN", "TORCHTRT_HIDDEN"], [7, 0, 1, "c.TORCH_TENSORRT_MAJOR_VERSION", "TORCH_TENSORRT_MAJOR_VERSION"], [8, 0, 1, "c.TORCH_TENSORRT_MINOR_VERSION", "TORCH_TENSORRT_MINOR_VERSION"], [6, 0, 1, "c.TORCH_TENSORRT_PATCH_VERSION", "TORCH_TENSORRT_PATCH_VERSION"], [12, 0, 1, "c.TORCH_TENSORRT_VERSION", "TORCH_TENSORRT_VERSION"], [10, 0, 1, "c.XSTR", "XSTR"], [0, 1, 1, "_CPPv4N14torch_tensorrt8DataTypeE", "torch_tensorrt::DataType"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeE5Value", "torch_tensorrt::DataType::DataType"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeEN3c1010ScalarTypeE", "torch_tensorrt::DataType::DataType"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeEv", "torch_tensorrt::DataType::DataType"], [0, 3, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeE5Value", "torch_tensorrt::DataType::DataType::t"], [0, 3, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeEN3c1010ScalarTypeE", "torch_tensorrt::DataType::DataType::t"], [0, 4, 1, "_CPPv4N14torch_tensorrt8DataType5ValueE", "torch_tensorrt::DataType::Value"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kBoolE", "torch_tensorrt::DataType::Value::kBool"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kCharE", "torch_tensorrt::DataType::Value::kChar"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value7kDoubleE", "torch_tensorrt::DataType::Value::kDouble"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value6kFloatE", "torch_tensorrt::DataType::Value::kFloat"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kHalfE", "torch_tensorrt::DataType::Value::kHalf"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value4kIntE", "torch_tensorrt::DataType::Value::kInt"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kLongE", "torch_tensorrt::DataType::Value::kLong"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value8kUnknownE", "torch_tensorrt::DataType::Value::kUnknown"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kBoolE", "torch_tensorrt::DataType::kBool"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kCharE", "torch_tensorrt::DataType::kChar"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value7kDoubleE", "torch_tensorrt::DataType::kDouble"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value6kFloatE", "torch_tensorrt::DataType::kFloat"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kHalfE", "torch_tensorrt::DataType::kHalf"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value4kIntE", "torch_tensorrt::DataType::kInt"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kLongE", "torch_tensorrt::DataType::kLong"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value8kUnknownE", "torch_tensorrt::DataType::kUnknown"], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypecv5ValueEv", "torch_tensorrt::DataType::operator Value"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataTypecvbEv", "torch_tensorrt::DataType::operator bool"], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeneE8DataType", "torch_tensorrt::DataType::operator!="], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeneEN8DataType5ValueE", "torch_tensorrt::DataType::operator!="], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeneE8DataType", "torch_tensorrt::DataType::operator!=::other"], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeneEN8DataType5ValueE", "torch_tensorrt::DataType::operator!=::other"], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqE8DataType", "torch_tensorrt::DataType::operator=="], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqEN8DataType5ValueE", "torch_tensorrt::DataType::operator=="], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqE8DataType", "torch_tensorrt::DataType::operator==::other"], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqEN8DataType5ValueE", "torch_tensorrt::DataType::operator==::other"], [46, 1, 1, "_CPPv4N14torch_tensorrt6DeviceE", "torch_tensorrt::Device"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device6DeviceEv", "torch_tensorrt::Device::Device"], [1, 1, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypeE", "torch_tensorrt::Device::DeviceType"], [46, 1, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypeE", "torch_tensorrt::Device::DeviceType"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEv", "torch_tensorrt::Device::DeviceType::DeviceType"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEv", "torch_tensorrt::Device::DeviceType::DeviceType"], [1, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [1, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [46, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [46, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [1, 4, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5ValueE", "torch_tensorrt::Device::DeviceType::Value"], [46, 4, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5ValueE", "torch_tensorrt::Device::DeviceType::Value"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kDLAE", "torch_tensorrt::Device::DeviceType::Value::kDLA"], [46, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kDLAE", "torch_tensorrt::Device::DeviceType::Value::kDLA"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kGPUE", "torch_tensorrt::Device::DeviceType::Value::kGPU"], [46, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kGPUE", "torch_tensorrt::Device::DeviceType::Value::kGPU"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kDLAE", "torch_tensorrt::Device::DeviceType::kDLA"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kGPUE", "torch_tensorrt::Device::DeviceType::kGPU"], [1, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypecv5ValueEv", "torch_tensorrt::Device::DeviceType::operator Value"], [46, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypecv5ValueEv", "torch_tensorrt::Device::DeviceType::operator Value"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypecvbEv", "torch_tensorrt::Device::DeviceType::operator bool"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypecvbEv", "torch_tensorrt::Device::DeviceType::operator bool"], [1, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!="], [46, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!="], [1, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!=::other"], [46, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!=::other"], [1, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator=="], [46, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator=="], [1, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator==::other"], [46, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator==::other"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device18allow_gpu_fallbackE", "torch_tensorrt::Device::allow_gpu_fallback"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device11device_typeE", "torch_tensorrt::Device::device_type"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device8dla_coreE", "torch_tensorrt::Device::dla_core"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device6gpu_idE", "torch_tensorrt::Device::gpu_id"], [17, 4, 1, "_CPPv4N14torch_tensorrt16EngineCapabilityE", "torch_tensorrt::EngineCapability"], [17, 5, 1, "_CPPv4N14torch_tensorrt16EngineCapability15kDLA_STANDALONEE", "torch_tensorrt::EngineCapability::kDLA_STANDALONE"], [17, 5, 1, "_CPPv4N14torch_tensorrt16EngineCapability7kSAFETYE", "torch_tensorrt::EngineCapability::kSAFETY"], [17, 5, 1, "_CPPv4N14torch_tensorrt16EngineCapability9kSTANDARDE", "torch_tensorrt::EngineCapability::kSTANDARD"], [47, 1, 1, "_CPPv4N14torch_tensorrt11GraphInputsE", "torch_tensorrt::GraphInputs"], [47, 6, 1, "_CPPv4N14torch_tensorrt11GraphInputs15input_signatureE", "torch_tensorrt::GraphInputs::input_signature"], [47, 6, 1, "_CPPv4N14torch_tensorrt11GraphInputs6inputsE", "torch_tensorrt::GraphInputs::inputs"], [48, 1, 1, "_CPPv4N14torch_tensorrt5InputE", "torch_tensorrt::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN2at6TensorE", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEv", "torch_tensorrt::Input::Input"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN2at6TensorE", "torch_tensorrt::Input::Input::tensor"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input5dtypeE", "torch_tensorrt::Input::dtype"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input6formatE", "torch_tensorrt::Input::format"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input9max_shapeE", "torch_tensorrt::Input::max_shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input9min_shapeE", "torch_tensorrt::Input::min_shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input9opt_shapeE", "torch_tensorrt::Input::opt_shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input5shapeE", "torch_tensorrt::Input::shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input13tensor_domainE", "torch_tensorrt::Input::tensor_domain"], [2, 1, 1, "_CPPv4N14torch_tensorrt12TensorFormatE", "torch_tensorrt::TensorFormat"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatE5Value", "torch_tensorrt::TensorFormat::TensorFormat"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatEN2at12MemoryFormatE", "torch_tensorrt::TensorFormat::TensorFormat"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatEv", "torch_tensorrt::TensorFormat::TensorFormat"], [2, 3, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatE5Value", "torch_tensorrt::TensorFormat::TensorFormat::t"], [2, 3, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatEN2at12MemoryFormatE", "torch_tensorrt::TensorFormat::TensorFormat::t"], [2, 4, 1, "_CPPv4N14torch_tensorrt12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::Value"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value13kChannelsLastE", "torch_tensorrt::TensorFormat::Value::kChannelsLast"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value11kContiguousE", "torch_tensorrt::TensorFormat::Value::kContiguous"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value8kUnknownE", "torch_tensorrt::TensorFormat::Value::kUnknown"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value13kChannelsLastE", "torch_tensorrt::TensorFormat::kChannelsLast"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value11kContiguousE", "torch_tensorrt::TensorFormat::kContiguous"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value8kUnknownE", "torch_tensorrt::TensorFormat::kUnknown"], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormatcv5ValueEv", "torch_tensorrt::TensorFormat::operator Value"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormatcvbEv", "torch_tensorrt::TensorFormat::operator bool"], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneE12TensorFormat", "torch_tensorrt::TensorFormat::operator!="], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator!="], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneE12TensorFormat", "torch_tensorrt::TensorFormat::operator!=::other"], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator!=::other"], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqE12TensorFormat", "torch_tensorrt::TensorFormat::operator=="], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator=="], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqE12TensorFormat", "torch_tensorrt::TensorFormat::operator==::other"], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator==::other"], [36, 2, 1, "_CPPv4N14torch_tensorrt15dump_build_infoEv", "torch_tensorrt::dump_build_info"], [34, 2, 1, "_CPPv4N14torch_tensorrt14get_build_infoEv", "torch_tensorrt::get_build_info"], [16, 4, 1, "_CPPv4N14torch_tensorrt7logging5LevelE", "torch_tensorrt::logging::Level"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kDEBUGE", "torch_tensorrt::logging::Level::kDEBUG"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kERRORE", "torch_tensorrt::logging::Level::kERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kGRAPHE", "torch_tensorrt::logging::Level::kGRAPH"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level5kINFOE", "torch_tensorrt::logging::Level::kINFO"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level15kINTERNAL_ERRORE", "torch_tensorrt::logging::Level::kINTERNAL_ERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level8kWARNINGE", "torch_tensorrt::logging::Level::kWARNING"], [24, 2, 1, "_CPPv4N14torch_tensorrt7logging24get_is_colored_output_onEv", "torch_tensorrt::logging::get_is_colored_output_on"], [22, 2, 1, "_CPPv4N14torch_tensorrt7logging18get_logging_prefixEv", "torch_tensorrt::logging::get_logging_prefix"], [23, 2, 1, "_CPPv4N14torch_tensorrt7logging24get_reportable_log_levelEv", "torch_tensorrt::logging::get_reportable_log_level"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kDEBUGE", "torch_tensorrt::logging::kDEBUG"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kERRORE", "torch_tensorrt::logging::kERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kGRAPHE", "torch_tensorrt::logging::kGRAPH"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level5kINFOE", "torch_tensorrt::logging::kINFO"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level15kINTERNAL_ERRORE", "torch_tensorrt::logging::kINTERNAL_ERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level8kWARNINGE", "torch_tensorrt::logging::kWARNING"], [26, 2, 1, "_CPPv4N14torch_tensorrt7logging3logE5LevelNSt6stringE", "torch_tensorrt::logging::log"], [26, 3, 1, "_CPPv4N14torch_tensorrt7logging3logE5LevelNSt6stringE", "torch_tensorrt::logging::log::lvl"], [26, 3, 1, "_CPPv4N14torch_tensorrt7logging3logE5LevelNSt6stringE", "torch_tensorrt::logging::log::msg"], [27, 2, 1, "_CPPv4N14torch_tensorrt7logging24set_is_colored_output_onEb", "torch_tensorrt::logging::set_is_colored_output_on"], [27, 3, 1, "_CPPv4N14torch_tensorrt7logging24set_is_colored_output_onEb", "torch_tensorrt::logging::set_is_colored_output_on::colored_output_on"], [28, 2, 1, "_CPPv4N14torch_tensorrt7logging18set_logging_prefixENSt6stringE", "torch_tensorrt::logging::set_logging_prefix"], [28, 3, 1, "_CPPv4N14torch_tensorrt7logging18set_logging_prefixENSt6stringE", "torch_tensorrt::logging::set_logging_prefix::prefix"], [25, 2, 1, "_CPPv4N14torch_tensorrt7logging24set_reportable_log_levelE5Level", "torch_tensorrt::logging::set_reportable_log_level"], [25, 3, 1, "_CPPv4N14torch_tensorrt7logging24set_reportable_log_levelE5Level", "torch_tensorrt::logging::set_reportable_log_level::lvl"], [3, 1, 1, "_CPPv4I0EN14torch_tensorrt3ptq19Int8CacheCalibratorE", "torch_tensorrt::ptq::Int8CacheCalibrator"], [3, 7, 1, "_CPPv4I0EN14torch_tensorrt3ptq19Int8CacheCalibratorE", "torch_tensorrt::ptq::Int8CacheCalibrator::Algorithm"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator19Int8CacheCalibratorERKNSt6stringE", "torch_tensorrt::ptq::Int8CacheCalibrator::Int8CacheCalibrator"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator19Int8CacheCalibratorERKNSt6stringE", "torch_tensorrt::ptq::Int8CacheCalibrator::Int8CacheCalibrator::cache_file_path"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch::bindings"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch::names"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch::nbBindings"], [3, 2, 1, "_CPPv4NK14torch_tensorrt3ptq19Int8CacheCalibrator12getBatchSizeEv", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatchSize"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibratorcvPN8nvinfer115IInt8CalibratorEEv", "torch_tensorrt::ptq::Int8CacheCalibrator::operator nvinfer1::IInt8Calibrator*"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::readCalibrationCache"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::readCalibrationCache::length"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::writeCalibrationCache"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::writeCalibrationCache::cache"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::writeCalibrationCache::length"], [4, 1, 1, "_CPPv4I00EN14torch_tensorrt3ptq14Int8CalibratorE", "torch_tensorrt::ptq::Int8Calibrator"], [4, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq14Int8CalibratorE", "torch_tensorrt::ptq::Int8Calibrator::Algorithm"], [4, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq14Int8CalibratorE", "torch_tensorrt::ptq::Int8Calibrator::DataLoaderUniquePtr"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator::cache_file_path"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator::dataloader"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator::use_cache"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch::bindings"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch::names"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch::nbBindings"], [4, 2, 1, "_CPPv4NK14torch_tensorrt3ptq14Int8Calibrator12getBatchSizeEv", "torch_tensorrt::ptq::Int8Calibrator::getBatchSize"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8CalibratorcvPN8nvinfer115IInt8CalibratorEEv", "torch_tensorrt::ptq::Int8Calibrator::operator nvinfer1::IInt8Calibrator*"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8Calibrator::readCalibrationCache"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8Calibrator::readCalibrationCache::length"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8Calibrator::writeCalibrationCache"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8Calibrator::writeCalibrationCache::cache"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8Calibrator::writeCalibrationCache::length"], [29, 2, 1, "_CPPv4I0EN14torch_tensorrt3ptq26make_int8_cache_calibratorE19Int8CacheCalibratorI9AlgorithmERKNSt6stringE", "torch_tensorrt::ptq::make_int8_cache_calibrator"], [29, 7, 1, "_CPPv4I0EN14torch_tensorrt3ptq26make_int8_cache_calibratorE19Int8CacheCalibratorI9AlgorithmERKNSt6stringE", "torch_tensorrt::ptq::make_int8_cache_calibrator::Algorithm"], [29, 3, 1, "_CPPv4I0EN14torch_tensorrt3ptq26make_int8_cache_calibratorE19Int8CacheCalibratorI9AlgorithmERKNSt6stringE", "torch_tensorrt::ptq::make_int8_cache_calibrator::cache_file_path"], [30, 2, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator"], [30, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::Algorithm"], [30, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::DataLoader"], [30, 3, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::cache_file_path"], [30, 3, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::dataloader"], [30, 3, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::use_cache"], [35, 2, 1, "_CPPv4N14torch_tensorrt10set_deviceEKi", "torch_tensorrt::set_device"], [35, 3, 1, "_CPPv4N14torch_tensorrt10set_deviceEKi", "torch_tensorrt::set_device::gpu_id"], [49, 1, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpecE", "torch_tensorrt::torchscript::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecEN5torch3jit6IValueE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorI5InputEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorIN3c108ArrayRefI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorINSt6vectorI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorIN3c108ArrayRefI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::fixed_sizes"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorINSt6vectorI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::fixed_sizes"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecEN5torch3jit6IValueE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::input_signature"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorI5InputEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::inputs"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec19allow_shape_tensorsE", "torch_tensorrt::torchscript::CompileSpec::allow_shape_tensors"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec10capabilityE", "torch_tensorrt::torchscript::CompileSpec::capability"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec5debugE", "torch_tensorrt::torchscript::CompileSpec::debug"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec6deviceE", "torch_tensorrt::torchscript::CompileSpec::device"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec12disable_tf32E", "torch_tensorrt::torchscript::CompileSpec::disable_tf32"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec20dla_global_dram_sizeE", "torch_tensorrt::torchscript::CompileSpec::dla_global_dram_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec19dla_local_dram_sizeE", "torch_tensorrt::torchscript::CompileSpec::dla_local_dram_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec13dla_sram_sizeE", "torch_tensorrt::torchscript::CompileSpec::dla_sram_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec18enabled_precisionsE", "torch_tensorrt::torchscript::CompileSpec::enabled_precisions"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec12graph_inputsE", "torch_tensorrt::torchscript::CompileSpec::graph_inputs"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14min_block_sizeE", "torch_tensorrt::torchscript::CompileSpec::min_block_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec20num_avg_timing_itersE", "torch_tensorrt::torchscript::CompileSpec::num_avg_timing_iters"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14ptq_calibratorE", "torch_tensorrt::torchscript::CompileSpec::ptq_calibrator"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec5refitE", "torch_tensorrt::torchscript::CompileSpec::refit"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec24require_full_compilationE", "torch_tensorrt::torchscript::CompileSpec::require_full_compilation"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14sparse_weightsE", "torch_tensorrt::torchscript::CompileSpec::sparse_weights"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec22torch_executed_modulesE", "torch_tensorrt::torchscript::CompileSpec::torch_executed_modules"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec18torch_executed_opsE", "torch_tensorrt::torchscript::CompileSpec::torch_executed_ops"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec24truncate_long_and_doubleE", "torch_tensorrt::torchscript::CompileSpec::truncate_long_and_double"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14workspace_sizeE", "torch_tensorrt::torchscript::CompileSpec::workspace_size"], [31, 2, 1, "_CPPv4N14torch_tensorrt11torchscript29check_method_operator_supportERKN5torch3jit6ModuleENSt6stringE", "torch_tensorrt::torchscript::check_method_operator_support"], [31, 3, 1, "_CPPv4N14torch_tensorrt11torchscript29check_method_operator_supportERKN5torch3jit6ModuleENSt6stringE", "torch_tensorrt::torchscript::check_method_operator_support::method_name"], [31, 3, 1, "_CPPv4N14torch_tensorrt11torchscript29check_method_operator_supportERKN5torch3jit6ModuleENSt6stringE", "torch_tensorrt::torchscript::check_method_operator_support::module"], [32, 2, 1, "_CPPv4N14torch_tensorrt11torchscript7compileERKN5torch3jit6ModuleE11CompileSpec", "torch_tensorrt::torchscript::compile"], [32, 3, 1, "_CPPv4N14torch_tensorrt11torchscript7compileERKN5torch3jit6ModuleE11CompileSpec", "torch_tensorrt::torchscript::compile::info"], [32, 3, 1, "_CPPv4N14torch_tensorrt11torchscript7compileERKN5torch3jit6ModuleE11CompileSpec", "torch_tensorrt::torchscript::compile::module"], [37, 2, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine"], [37, 3, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine::info"], [37, 3, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine::method_name"], [37, 3, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine::module"], [33, 2, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::device"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::engine"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::input_binding_names"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::output_binding_names"], [76, 8, 0, "-", "torch_tensorrt"]], "torch_tensorrt": [[76, 9, 1, "", "Device"], [76, 9, 1, "", "DeviceType"], [76, 9, 1, "", "EngineCapability"], [76, 9, 1, "", "Input"], [76, 9, 1, "", "MutableTorchTensorRTModule"], [76, 12, 1, "", "compile"], [76, 12, 1, "", "convert_method_to_trt_engine"], [76, 9, 1, "", "dtype"], [117, 8, 0, "-", "dynamo"], [72, 8, 0, "-", "fx"], [76, 12, 1, "", "load"], [73, 8, 0, "-", "logging"], [76, 9, 1, "", "memory_format"], [75, 8, 0, "-", "runtime"], [76, 12, 1, "", "save"], [77, 8, 0, "-", "ts"]], "torch_tensorrt.Device": [[76, 10, 1, "", "__init__"], [76, 11, 1, "", "device_type"], [76, 11, 1, "", "dla_core"], [76, 11, 1, "", "gpu_id"]], "torch_tensorrt.DeviceType": [[76, 11, 1, "", "DLA"], [76, 11, 1, "", "GPU"], [76, 11, 1, "", "UNKNOWN"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"]], "torch_tensorrt.EngineCapability": [[76, 11, 1, "", "DLA_STANDALONE"], [76, 11, 1, "", "SAFETY"], [76, 11, 1, "", "STANDARD"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"]], "torch_tensorrt.Input": [[76, 10, 1, "", "__init__"], [76, 11, 1, "", "dtype"], [76, 10, 1, "", "example_tensor"], [76, 11, 1, "", "format"], [76, 10, 1, "", "from_tensor"], [76, 10, 1, "", "from_tensors"]], "torch_tensorrt.MutableTorchTensorRTModule": [[76, 10, 1, "", "__init__"], [76, 10, 1, "", "compile"], [76, 10, 1, "", "refit_gm"]], "torch_tensorrt.dtype": [[76, 11, 1, "", "b"], [76, 11, 1, "", "bf16"], [76, 11, 1, "", "f16"], [76, 11, 1, "", "f32"], [76, 11, 1, "", "f64"], [76, 11, 1, "", "f8"], [76, 11, 1, "", "i32"], [76, 11, 1, "", "i64"], [76, 11, 1, "", "i8"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"], [76, 11, 1, "", "u8"], [76, 11, 1, "", "unknown"]], "torch_tensorrt.dynamo": [[71, 9, 1, "", "CompilationSettings"], [71, 12, 1, "", "compile"], [71, 12, 1, "", "export"], [71, 12, 1, "", "refit_module_weights"], [71, 12, 1, "", "trace"]], "torch_tensorrt.fx": [[72, 9, 1, "", "InputTensorSpec"], [72, 9, 1, "", "TRTInterpreter"], [72, 9, 1, "", "TRTInterpreterResult"], [72, 9, 1, "", "TRTModule"], [72, 12, 1, "", "compile"]], "torch_tensorrt.logging": [[73, 9, 1, "", "debug"], [73, 9, 1, "", "errors"], [73, 9, 1, "", "graphs"], [73, 9, 1, "", "info"], [73, 9, 1, "", "internal_errors"], [73, 9, 1, "", "warnings"]], "torch_tensorrt.memory_format": [[76, 11, 1, "", "cdhw32"], [76, 11, 1, "", "chw16"], [76, 11, 1, "", "chw2"], [76, 11, 1, "", "chw32"], [76, 11, 1, "", "chw4"], [76, 11, 1, "", "dhwc"], [76, 11, 1, "", "dhwc8"], [76, 11, 1, "", "dla_hwc4"], [76, 11, 1, "", "dla_linear"], [76, 11, 1, "", "hwc"], [76, 11, 1, "", "hwc16"], [76, 11, 1, "", "hwc8"], [76, 11, 1, "", "linear"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"]], "torch_tensorrt.runtime": [[75, 9, 1, "", "PythonTorchTensorRTModule"], [75, 9, 1, "", "TorchTensorRTModule"], [75, 12, 1, "", "set_multi_device_safe_mode"]], "torch_tensorrt.runtime.PythonTorchTensorRTModule": [[75, 10, 1, "", "__init__"], [75, 10, 1, "", "cudagraphs_validate_shapes"], [75, 10, 1, "", "disable_profiling"], [75, 10, 1, "", "enable_profiling"], [75, 10, 1, "", "forward"], [75, 10, 1, "", "get_layer_info"]], "torch_tensorrt.runtime.TorchTensorRTModule": [[75, 10, 1, "", "__init__"], [75, 10, 1, "", "forward"], [75, 10, 1, "", "get_extra_state"], [75, 10, 1, "", "set_extra_state"]], "torch_tensorrt.ts": [[77, 12, 1, "", "TensorRTCompileSpec"], [77, 12, 1, "", "check_method_op_support"], [77, 12, 1, "", "compile"], [77, 12, 1, "", "convert_method_to_trt_engine"], [77, 12, 1, "", "embed_engine_in_new_module"], [74, 8, 0, "-", "ptq"]], "torch_tensorrt.ts.ptq": [[74, 9, 1, "", "CacheCalibrator"], [74, 9, 1, "", "CalibrationAlgo"], [74, 9, 1, "", "DataLoaderCalibrator"]], "torch_tensorrt.ts.ptq.CalibrationAlgo": [[74, 11, 1, "", "ENTROPY_CALIBRATION"], [74, 11, 1, "", "ENTROPY_CALIBRATION_2"], [74, 11, 1, "", "LEGACY_CALIBRATION"], [74, 11, 1, "", "MINMAX_CALIBRATION"]]}, "objtypes": {"0": "c:macro", "1": "cpp:class", "2": "cpp:function", "3": "cpp:functionParam", "4": "cpp:enum", "5": "cpp:enumerator", "6": "cpp:member", "7": "cpp:templateParam", "8": "py:module", "9": "py:class", "10": "py:method", "11": "py:attribute", "12": "py:function"}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "class", "C++ class"], "2": ["cpp", "function", "C++ function"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "enum", "C++ enum"], "5": ["cpp", "enumerator", "C++ enumerator"], "6": ["cpp", "member", "C++ member"], "7": ["cpp", "templateParam", "C++ template parameter"], "8": ["py", "module", "Python module"], "9": ["py", "class", "Python class"], "10": ["py", "method", "Python method"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "function", "Python function"]}, "titleterms": {"class": [0, 1, 2, 3, 4, 20, 21, 38, 40, 41, 50, 71, 72, 74, 75, 76], "datatyp": 0, "document": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 46, 47, 48, 49, 61, 69, 85, 86], "devic": [1, 46, 116], "devicetyp": 1, "nest": [1, 46], "relationship": [1, 3, 4, 46, 48], "tensorformat": 2, "templat": [3, 4, 29, 30], "int8cachecalibr": 3, "inherit": [3, 4, 48], "base": [3, 4, 48, 80], "type": [3, 4, 46, 48, 54], "int8calibr": 4, "defin": [5, 6, 7, 8, 9, 10, 11, 12, 19, 50, 108], "str": 5, "torch_tensorrt_patch_vers": 6, "torch_tensorrt_major_vers": 7, "torch_tensorrt_minor_vers": 8, "torchtrt_api": 9, "xstr": 10, "torchtrt_hidden": 11, "torch_tensorrt_vers": 12, "directori": [13, 14, 15, 51], "cpp": [13, 18, 19, 20, 21, 56], "subdirectori": [13, 14], "includ": [14, 18, 19, 20, 21], "torch_tensorrt": [15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 67, 71, 72, 73, 74, 75, 76, 77, 102, 104, 105, 118], "file": [15, 18, 19, 20, 21, 42, 43, 44, 45, 50, 51], "enum": [16, 17, 18, 21, 38, 39, 50, 74, 76], "level": [16, 80, 82, 83], "enginecap": 17, "log": [18, 22, 23, 24, 25, 26, 27, 28, 39, 42, 73], "h": [18, 19, 20, 21, 42, 43, 44, 45, 56], "content": [18, 19, 20, 21, 38, 39, 40, 41, 80, 81, 82, 83, 84, 85], "definit": [18, 19, 20, 21, 83, 94, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "By": [18, 19], "namespac": [18, 19, 20, 21, 38, 39, 40, 41, 50], "function": [18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 50, 61, 71, 72, 75, 76, 77, 108], "macro": [19, 43], "ptq": [20, 29, 30, 40, 44, 74, 91, 108], "get_logging_prefix": 22, "get_reportable_log_level": 23, "get_is_colored_output_on": 24, "set_reportable_log_level": 25, "set_is_colored_output_on": 27, "set_logging_prefix": 28, "make_int8_cache_calibr": 29, "make_int8_calibr": 30, "torchscript": [31, 32, 33, 37, 41, 60, 66, 69, 88, 89, 92, 117, 118], "check_method_operator_support": 31, "compil": [32, 57, 59, 63, 64, 66, 68, 69, 89, 94, 97, 100, 101, 102, 103, 104, 105, 106, 107, 109, 112, 114, 115, 117, 118], "embed_engine_in_new_modul": 33, "get_build_info": 34, "set_devic": 35, "dump_build_info": 36, "convert_method_to_trt_engin": 37, "program": [42, 43, 44, 45, 63, 100, 116], "list": [42, 43, 44, 45, 83], "struct": [46, 47, 48, 49, 50], "graphinput": 47, "input": [48, 102, 104], "compilespec": 49, "torch": [50, 61, 63, 64, 65, 66, 68, 69, 89, 90, 92, 93, 95, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118], "tensorrt": [50, 58, 61, 63, 64, 65, 66, 69, 89, 90, 92, 93, 95, 99, 100, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118], "c": [50, 61, 66, 68, 69, 89, 91, 112], "api": [50, 51, 61, 66, 69], "hierarchi": 50, "full": [50, 51], "torchtrtc": [52, 89], "convers": [53, 57, 59, 60], "phase": [53, 55, 56, 57, 58, 59], "node": 53, "evalu": [53, 54, 70], "convert": [53, 54, 60, 65, 70, 89, 93], "write": [54, 60, 62, 93, 95], "dynamo": [54, 62, 69, 71, 106, 107, 117, 118], "implement": [54, 93], "registr": 54, "capabl": 54, "valid": 54, "contract": [54, 60], "exampl": [54, 62, 82, 84, 94], "convolut": 54, "oper": [54, 64, 70, 89, 95], "decomposit": 54, "addmm": [54, 55], "lower": [55, 57, 59, 62], "pass": [55, 62], "us": [55, 61, 89, 90, 92, 93, 95, 101, 102, 103, 104, 105, 106, 107, 108, 112, 114], "eliminatecommonsubexpress": 55, "elimin": 55, "dead": 55, "code": [55, 69, 82], "except": 55, "Or": 55, "pattern": 55, "redund": 55, "guard": 55, "freez": 55, "modul": [55, 88, 89, 99, 118], "fuse": 55, "branch": 55, "linear": 55, "flatten": 55, "graph": [55, 58, 118], "tupl": 55, "fallback": [55, 56], "peephol": 55, "optim": [55, 68, 110, 111, 113], "remov": 55, "contigu": 55, "dropout": 55, "To": 55, "unpack": 55, "logsoftmax": 55, "unrol": 55, "loop": [55, 108], "replac": [55, 82], "tile": 55, "repeat": 55, "partit": [56, 57, 59], "partitoninfo": 56, "segmentedblock": 56, "shape_analysi": 56, "automat": [56, 109], "depend": [56, 66, 98, 110], "awar": [56, 112], "runtim": [57, 58, 59, 75, 94, 116], "background": [58, 60], "engin": [58, 65, 95, 96, 97], "executor": 58, "op": [58, 65, 95], "construct": 58, "result": 58, "serial": [58, 64, 68], "deseri": 58, "abi": [58, 66], "version": [58, 66], "format": [58, 118], "system": [59, 66], "overview": [59, 67], "what": 60, "guarante": 60, "respons": 60, "context": [60, 80, 109], "arg": [60, 81], "weight": [60, 100, 108, 109], "other": 60, "advic": 60, "link": [61, 82], "develop": 61, "avail": 61, "layer": 61, "expect": 61, "dimens": 61, "python": [61, 66, 68, 69, 88, 90, 91], "sometim": 61, "easier": 61, "read": 61, "pytorch": [61, 65, 69, 92, 95, 106, 107, 112], "native_op": 61, "ir": [61, 117, 118], "aten": 62, "basic": 62, "requir": 62, "regist": [62, 89], "export": [63, 68, 105, 114], "customiz": [63, 64], "set": [63, 64, 99, 101, 105, 110, 111, 113], "under": [63, 89, 114], "hood": [63, 89, 114], "trace": 63, "backend": [64, 102, 103, 104, 106, 107], "kei": 64, "featur": 64, "custom": [64, 89, 93, 95, 97, 101, 114], "usag": [64, 100, 101], "after": 64, "model": [64, 65, 69, 94, 95, 98, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 117], "perform": 64, "coverag": 64, "feasibl": 64, "dynam": [64, 102, 112, 114], "shape": [64, 102, 112, 114], "support": [64, 70], "recompil": [64, 102], "condit": 64, "fx": [65, 69, 72, 112, 118], "frontend": [65, 66, 69, 92, 112, 118], "user": [65, 69], "guid": [65, 69], "acc": 65, "tracer": 65, "fx2trt": 65, "how": [65, 80, 91], "add": 65, "miss": 65, "instal": [66, 87], "precompil": 66, "binari": 66, "specif": 66, "cuda": [66, 101, 104], "nightli": 66, "build": [66, 67, 80, 110, 111, 113], "onli": 66, "from": [66, 92], "sourc": 66, "linux": 66, "packag": [66, 116], "addit": 66, "option": [66, 68, 80, 81, 83, 102, 104, 109, 118], "distribut": 66, "No": 66, "librari": [66, 116], "standalon": 66, "releas": 66, "debug": 66, "pre": [66, 108], "cxx11": 66, "choos": 66, "right": 66, "window": [66, 94], "step": [66, 68, 110, 111, 113], "advanc": [66, 100, 101], "setup": 66, "troubleshoot": 66, "altern": 66, "cmake": 66, "nativ": 66, "aarch64": 66, "jetson": 66, "prerequisit": [66, 67], "environ": 66, "cli": [66, 69], "jetpack": 67, "6": [67, 84], "1": [67, 68, 84, 110, 111, 113], "quick": 68, "start": [68, 69], "2": [68, 84, 85, 110, 111, 113], "deploi": [68, 108, 112, 116], "deploy": 68, "In": [69, 100], "framework": 69, "infer": [69, 102, 103, 104, 105, 108, 110, 111, 113], "nvidia": 69, "gpu": 69, "get": 69, "tutori": [69, 110], "zoo": [69, 98, 110], "contributor": 69, "indic": 69, "legaci": [69, 112, 118], "further": 69, "inform": 69, "current": 70, "through": 70, "ts": [74, 77, 118], "submodul": 76, "comput": 78, "time": [78, 118], "changelog": 79, "configur": 80, "project": 80, "wide": 80, "html": 80, "theme": [80, 86], "toc": 80, "page": 80, "tabl": [80, 81, 82, 83, 84, 85], "mod": 81, "test_py_modul": 81, "gener": [81, 106, 107], "index": 81, "paramet": 81, "data": 81, "paragraph": [82, 85], "markup": 82, "inlin": 82, "math": 82, "meta": 82, "block": 82, "liter": 82, "line": 82, "quot": 82, "doctest": 82, "emphas": 82, "number": [82, 83], "sidebar": 82, "ch": 82, "ien": 82, "The": [82, 89], "creativ": 82, "A": 82, "refer": 82, "footnot": 82, "citat": [82, 91], "glossari": 82, "target": 82, "direct": 82, "center": 82, "text": 82, "imag": [82, 83], "figur": 82, "admonit": 82, "And": 82, "wai": 82, "topic": 82, "rubric": 82, "titl": 82, "compound": 82, "download": [82, 87], "enumer": 83, "field": 83, "bullet": 83, "second": 83, "But": 83, "deeper": 83, "down": 83, "rabbit": 83, "hole": 83, "hlist": 83, "grid": 83, "giant": 83, "can": 83, "have": 83, "caption": [83, 86], "like": 83, "thi": [83, 86], "one": 83, "long": [84, 86], "sticki": 84, "nav": 84, "menu": [84, 86], "3": [84, 110, 111, 113], "4": 84, "5": 84, "7": 84, "8": 84, "9": 84, "10": 84, "11": 84, "12": 84, "13": 84, "14": 84, "15": 84, "16": 84, "17": 84, "18": 84, "19": 84, "20": 84, "submenu": 84, "subsubmenu": 84, "structur": 85, "element": 85, "section": 85, "subsect": 85, "subsubsect": 85, "demo": 86, "an": 86, "incred": 86, "via": 87, "git": 87, "creat": [88, 91], "work": [88, 89], "save": [88, 99, 117], "disk": 88, "quickstart": 89, "unsupport": 89, "post": 91, "train": [91, 108, 112], "quantiz": [91, 108, 112], "your": [91, 110, 111, 113], "own": 91, "applic": 91, "directli": 92, "overload": 93, "metadata": 93, "our": [93, 95], "cross": 94, "import": [94, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "kernel": 95, "within": 95, "test": 95, "wrap": 95, "insert": 95, "cach": [96, 97, 100], "bert": [96, 104, 112], "jit": [97, 114], "aot": [97, 114], "mutabl": 99, "initi": 99, "make": [99, 100], "modif": 99, "stabl": [99, 103], "diffus": [99, 103], "huggingfac": 99, "refit": 100, "new": 100, "standard": 100, "workflow": 100, "refitt": 100, "pretrain": 100, "map": 100, "place": 100, "default": [101, 105], "cleanup": [101, 104], "driver": [101, 104], "error": [101, 104], "note": [101, 104], "resnet": 102, "argument": [102, 104], "avoid": 102, "specifi": 102, "befor": 102, "trt": 102, "cudagraph": [105, 116], "integr": 105, "gpt2": 106, "output": [106, 107], "decod": [106, 107], "sentenc": [106, 107], "llama2": 107, "load": [108, 117], "dataset": 108, "loss": 108, "calibr": 108, "tune": 108, "fp8": 108, "stream": 109, "run": 109, "budget": 109, "size": 109, "manag": 109, "serv": [110, 111, 112, 113], "triton": [110, 111, 113], "up": [110, 111, 113], "server": [110, 111, 113], "client": [110, 111, 113], "queri": [110, 111, 113], "notebook": 112, "citrinet": 112, "efficientnet": 112, "mask": 112, "languag": 112, "mlm": 112, "hug": 112, "face": 112, "transform": 112, "acceler": 112, "resnet50": 112, "lenet": 112, "deep": 112, "learn": 112, "object": 112, "detect": 112, "ssd": 112, "int8": 112, "constraint": 114, "mix": 115, "precis": 115, "libtorchtrt": 116, "so": 116, "plugin": 116, "multi": 116, "safe": 116, "mode": 116, "exportedprogram": 117, "b": 117, "explain": 118, "just": 118, "accept": 118, "return": 118, "ahead": 118, "dla": 119}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["_cpp_api/classtorch__tensorrt_1_1DataType", "_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType", "_cpp_api/classtorch__tensorrt_1_1TensorFormat", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator", "_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502", "_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268", "_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e", "_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827", "_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b", "_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da", "_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59", "_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883", "_cpp_api/dir_cpp", "_cpp_api/dir_cpp_include", "_cpp_api/dir_cpp_include_torch_tensorrt", "_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558", "_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb", "_cpp_api/file_cpp_include_torch_tensorrt_logging.h", "_cpp_api/file_cpp_include_torch_tensorrt_macros.h", "_cpp_api/file_cpp_include_torch_tensorrt_ptq.h", "_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h", "_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3", "_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650", "_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a", "_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2", "_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8", "_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5", "_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc", "_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c", "_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178", "_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797", "_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9", "_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9", "_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528", "_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384", "_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1", "_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2", "_cpp_api/namespace_torch_tensorrt", "_cpp_api/namespace_torch_tensorrt__logging", "_cpp_api/namespace_torch_tensorrt__ptq", "_cpp_api/namespace_torch_tensorrt__torchscript", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h", "_cpp_api/structtorch__tensorrt_1_1Device", "_cpp_api/structtorch__tensorrt_1_1GraphInputs", "_cpp_api/structtorch__tensorrt_1_1Input", "_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec", "_cpp_api/torch_tensort_cpp", "_cpp_api/unabridged_orphan", "cli/torchtrtc", "contributors/conversion", "contributors/dynamo_converters", "contributors/lowering", "contributors/partitioning", "contributors/phases", "contributors/runtime", "contributors/system_overview", "contributors/ts_converters", "contributors/useful_links", "contributors/writing_dynamo_aten_lowering_passes", "dynamo/dynamo_export", "dynamo/torch_compile", "fx/getting_started_with_fx_path", "getting_started/installation", "getting_started/jetpack", "getting_started/quick_start", "index", "indices/supported_ops", "py_api/dynamo", "py_api/fx", "py_api/logging", "py_api/ptq", "py_api/runtime", "py_api/torch_tensorrt", "py_api/ts", "sg_execution_times", "src/pytorch-sphinx-theme/docs/changelog", "src/pytorch-sphinx-theme/docs/configuring", "src/pytorch-sphinx-theme/docs/demo/api", "src/pytorch-sphinx-theme/docs/demo/demo", "src/pytorch-sphinx-theme/docs/demo/lists_tables", "src/pytorch-sphinx-theme/docs/demo/long", "src/pytorch-sphinx-theme/docs/demo/structure", "src/pytorch-sphinx-theme/docs/index", "src/pytorch-sphinx-theme/docs/installing", "ts/creating_torchscript_module_in_python", "ts/getting_started_with_cpp_api", "ts/getting_started_with_python_api", "ts/ptq", "ts/torchscript_frontend_from_pytorch", "tutorials/_rendered_examples/dynamo/converter_overloading", "tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows", "tutorials/_rendered_examples/dynamo/custom_kernel_plugins", "tutorials/_rendered_examples/dynamo/engine_caching_bert_example", "tutorials/_rendered_examples/dynamo/engine_caching_example", "tutorials/_rendered_examples/dynamo/index", "tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example", "tutorials/_rendered_examples/dynamo/refit_engine_example", "tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage", "tutorials/_rendered_examples/dynamo/torch_compile_resnet_example", "tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion", "tutorials/_rendered_examples/dynamo/torch_compile_transformers_example", "tutorials/_rendered_examples/dynamo/torch_export_cudagraphs", "tutorials/_rendered_examples/dynamo/torch_export_gpt2", "tutorials/_rendered_examples/dynamo/torch_export_llama2", "tutorials/_rendered_examples/dynamo/vgg16_ptq", "tutorials/_rendered_examples/dynamo/weight_streaming_example", "tutorials/_rendered_examples/index", "tutorials/_rendered_examples/triton/index", "tutorials/notebooks", "tutorials/serving_torch_tensorrt_with_triton", "user_guide/dynamic_shapes", "user_guide/mixed_precision", "user_guide/runtime", "user_guide/saving_models", "user_guide/torch_tensorrt_explained", "user_guide/using_dla"], "filenames": ["_cpp_api/classtorch__tensorrt_1_1DataType.rst", "_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.rst", "_cpp_api/classtorch__tensorrt_1_1TensorFormat.rst", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.rst", "_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.rst", "_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.rst", "_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.rst", "_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.rst", "_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.rst", "_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.rst", "_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.rst", "_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.rst", "_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.rst", "_cpp_api/dir_cpp.rst", "_cpp_api/dir_cpp_include.rst", "_cpp_api/dir_cpp_include_torch_tensorrt.rst", "_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.rst", "_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.rst", "_cpp_api/file_cpp_include_torch_tensorrt_logging.h.rst", "_cpp_api/file_cpp_include_torch_tensorrt_macros.h.rst", "_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.rst", "_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.rst", "_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.rst", "_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.rst", "_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.rst", "_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.rst", "_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.rst", "_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.rst", "_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.rst", "_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.rst", "_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.rst", "_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.rst", "_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.rst", "_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.rst", "_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.rst", "_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.rst", "_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.rst", "_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.rst", "_cpp_api/namespace_torch_tensorrt.rst", "_cpp_api/namespace_torch_tensorrt__logging.rst", "_cpp_api/namespace_torch_tensorrt__ptq.rst", "_cpp_api/namespace_torch_tensorrt__torchscript.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.rst", "_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.rst", "_cpp_api/structtorch__tensorrt_1_1Device.rst", "_cpp_api/structtorch__tensorrt_1_1GraphInputs.rst", "_cpp_api/structtorch__tensorrt_1_1Input.rst", "_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.rst", "_cpp_api/torch_tensort_cpp.rst", "_cpp_api/unabridged_orphan.rst", "cli/torchtrtc.rst", "contributors/conversion.rst", "contributors/dynamo_converters.rst", "contributors/lowering.rst", "contributors/partitioning.rst", "contributors/phases.rst", "contributors/runtime.rst", "contributors/system_overview.rst", "contributors/ts_converters.rst", "contributors/useful_links.rst", "contributors/writing_dynamo_aten_lowering_passes.rst", "dynamo/dynamo_export.rst", "dynamo/torch_compile.rst", "fx/getting_started_with_fx_path.rst", "getting_started/installation.rst", "getting_started/jetpack.rst", "getting_started/quick_start.rst", "index.rst", "indices/supported_ops.rst", "py_api/dynamo.rst", "py_api/fx.rst", "py_api/logging.rst", "py_api/ptq.rst", "py_api/runtime.rst", "py_api/torch_tensorrt.rst", "py_api/ts.rst", "sg_execution_times.rst", "src/pytorch-sphinx-theme/docs/changelog.rst", "src/pytorch-sphinx-theme/docs/configuring.rst", "src/pytorch-sphinx-theme/docs/demo/api.rst", "src/pytorch-sphinx-theme/docs/demo/demo.rst", "src/pytorch-sphinx-theme/docs/demo/lists_tables.rst", "src/pytorch-sphinx-theme/docs/demo/long.rst", "src/pytorch-sphinx-theme/docs/demo/structure.rst", "src/pytorch-sphinx-theme/docs/index.rst", "src/pytorch-sphinx-theme/docs/installing.rst", "ts/creating_torchscript_module_in_python.rst", "ts/getting_started_with_cpp_api.rst", "ts/getting_started_with_python_api.rst", "ts/ptq.rst", "ts/torchscript_frontend_from_pytorch.rst", "tutorials/_rendered_examples/dynamo/converter_overloading.rst", "tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows.rst", "tutorials/_rendered_examples/dynamo/custom_kernel_plugins.rst", "tutorials/_rendered_examples/dynamo/engine_caching_bert_example.rst", "tutorials/_rendered_examples/dynamo/engine_caching_example.rst", "tutorials/_rendered_examples/dynamo/index.rst", "tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.rst", "tutorials/_rendered_examples/dynamo/refit_engine_example.rst", "tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage.rst", "tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.rst", "tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.rst", "tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.rst", "tutorials/_rendered_examples/dynamo/torch_export_cudagraphs.rst", "tutorials/_rendered_examples/dynamo/torch_export_gpt2.rst", "tutorials/_rendered_examples/dynamo/torch_export_llama2.rst", "tutorials/_rendered_examples/dynamo/vgg16_ptq.rst", "tutorials/_rendered_examples/dynamo/weight_streaming_example.rst", "tutorials/_rendered_examples/index.rst", "tutorials/_rendered_examples/triton/index.rst", "tutorials/notebooks.rst", "tutorials/serving_torch_tensorrt_with_triton.rst", "user_guide/dynamic_shapes.rst", "user_guide/mixed_precision.rst", "user_guide/runtime.rst", "user_guide/saving_models.rst", "user_guide/torch_tensorrt_explained.rst", "user_guide/using_dla.rst"], "titles": ["Class DataType", "Class Device::DeviceType", "Class TensorFormat", "Template Class Int8CacheCalibrator", "Template Class Int8Calibrator", "Define STR", "Define TORCH_TENSORRT_PATCH_VERSION", "Define TORCH_TENSORRT_MAJOR_VERSION", "Define TORCH_TENSORRT_MINOR_VERSION", "Define TORCHTRT_API", "Define XSTR", "Define TORCHTRT_HIDDEN", "Define TORCH_TENSORRT_VERSION", "Directory cpp", "Directory include", "Directory torch_tensorrt", "Enum Level", "Enum EngineCapability", "File logging.h", "File macros.h", "File ptq.h", "File torch_tensorrt.h", "Function torch_tensorrt::logging::get_logging_prefix", "Function torch_tensorrt::logging::get_reportable_log_level", "Function torch_tensorrt::logging::get_is_colored_output_on", "Function torch_tensorrt::logging::set_reportable_log_level", "Function torch_tensorrt::logging::log", "Function torch_tensorrt::logging::set_is_colored_output_on", "Function torch_tensorrt::logging::set_logging_prefix", "Template Function torch_tensorrt::ptq::make_int8_cache_calibrator", "Template Function torch_tensorrt::ptq::make_int8_calibrator", "Function torch_tensorrt::torchscript::check_method_operator_support", "Function torch_tensorrt::torchscript::compile", "Function torch_tensorrt::torchscript::embed_engine_in_new_module", "Function torch_tensorrt::get_build_info", "Function torch_tensorrt::set_device", "Function torch_tensorrt::dump_build_info", "Function torch_tensorrt::torchscript::convert_method_to_trt_engine", "Namespace torch_tensorrt", "Namespace torch_tensorrt::logging", "Namespace torch_tensorrt::ptq", "Namespace torch_tensorrt::torchscript", "Program Listing for File logging.h", "Program Listing for File macros.h", "Program Listing for File ptq.h", "Program Listing for File torch_tensorrt.h", "Struct Device", "Struct GraphInputs", "Struct Input", "Struct CompileSpec", "Torch-TensorRT C++ API", "Full API", "torchtrtc", "Conversion Phase", "Writing Dynamo Converters", "Lowering Phase", "Partitioning Phase", "Compiler Phases", "Runtime Phase", "System Overview", "Writing TorchScript Converters", "Useful Links for Torch-TensorRT Development", "Writing Dynamo ATen Lowering Passes", "Compiling Exported Programs with Torch-TensorRT", "TensorRT Backend for torch.compile", "Torch-TensorRT (FX Frontend) User Guide", "Installation", "Overview", "Quick Start", "Torch-TensorRT", "Operators Supported", "torch_tensorrt.dynamo", "torch_tensorrt.fx", "torch_tensorrt.logging", "torch_tensorrt.ts.ptq", "torch_tensorrt.runtime", "torch_tensorrt", "torch_tensorrt.ts", "Computation times", "Changelog", "Configuration", "5. :mod:`test_py_module`", "3. Paragraph Level Markup", "4. Lists & Tables", "1. Long Sticky Nav", "1. Structural Elements", "<no title>", "Installation", "Creating a TorchScript Module", "Using Torch-TensorRT in C++", "Using Torch-TensorRT in Python", "Post Training Quantization (PTQ)", "Using Torch-TensorRT TorchScript Frontend Directly From PyTorch", "Overloading Torch-TensorRT Converters with Custom Converters", "Cross runtime compilation for windows example", "Using Custom Kernels within TensorRT Engines with Torch-TensorRT", "Engine Caching (BERT)", "Engine Caching", "Dependencies", "Mutable Torch TensorRT Module", "Refitting Torch-TensorRT Programs with New Weights", "Torch Compile Advanced Usage", "Compiling ResNet with dynamic shapes using the torch.compile backend", "Compiling Stable Diffusion model using the torch.compile backend", "Compiling BERT using the torch.compile backend", "Torch Export with Cudagraphs", "Compiling GPT2 using the dynamo backend", "Compiling Llama2 using the dynamo backend", "Deploy Quantized Models using Torch-TensorRT", "Weight Streaming", "Torch-TensorRT Tutorials", "Serving a Torch-TensorRT model with Triton", "Legacy notebooks", "Serving a Torch-TensorRT model with Triton", "Dynamic shapes with Torch-TensorRT", "Compile Mixed Precision models with Torch-TensorRT", "Deploying Torch-TensorRT Programs", "Saving models compiled with Torch-TensorRT", "Torch-TensorRT Explained", "DLA"], "terms": {"defin": [0, 1, 2, 3, 4, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 46, 47, 48, 49, 51, 52, 54, 65, 68, 75, 76, 80, 88, 89, 90, 91, 93, 95, 97, 101, 104, 105, 106, 107, 112], "file": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 46, 47, 48, 49, 52, 54, 56, 58, 59, 64, 65, 66, 67, 68, 71, 72, 74, 76, 77, 78, 80, 81, 83, 87, 89, 91, 94, 110, 111, 113, 114, 117], "torch_tensorrt": [0, 1, 2, 14, 16, 17, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 56, 62, 63, 64, 65, 68, 69, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 103, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119], "h": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 46, 47, 48, 49, 50, 51, 52, 55, 68, 76, 89, 91], "support": [0, 1, 2, 27, 31, 46, 48, 49, 52, 54, 56, 61, 63, 65, 67, 68, 69, 72, 75, 76, 77, 80, 81, 88, 89, 90, 93, 95, 100, 102, 104, 106, 107, 108, 109, 110, 111, 113, 115, 118, 119], "data": [0, 2, 3, 4, 29, 30, 44, 46, 48, 49, 52, 53, 56, 57, 59, 60, 64, 65, 70, 71, 72, 74, 76, 77, 82, 86, 90, 91, 95, 97, 108, 109, 112], "type": [0, 1, 2, 30, 49, 50, 52, 53, 56, 58, 60, 62, 63, 64, 65, 71, 72, 74, 75, 76, 77, 82, 89, 90, 91, 93, 94, 95, 97, 108, 109, 112, 115, 117], "can": [0, 1, 4, 29, 30, 37, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 71, 74, 75, 76, 77, 80, 82, 88, 89, 90, 91, 92, 93, 94, 95, 97, 99, 100, 101, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "us": [0, 1, 2, 3, 4, 29, 30, 32, 35, 37, 43, 44, 45, 46, 48, 49, 52, 53, 54, 56, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 78, 80, 81, 82, 83, 88, 91, 94, 97, 98, 99, 100, 109, 110, 111, 113, 115, 116, 117, 118, 119], "tensorrt": [0, 1, 3, 4, 29, 30, 31, 32, 33, 36, 37, 44, 45, 46, 48, 49, 52, 53, 54, 55, 56, 57, 59, 60, 62, 67, 68, 71, 72, 74, 75, 76, 77, 88, 91, 94, 97, 98, 101, 102, 103, 104, 105, 109], "engin": [0, 1, 17, 32, 33, 37, 45, 46, 48, 49, 52, 53, 56, 57, 59, 62, 63, 64, 69, 71, 72, 75, 76, 77, 80, 89, 90, 91, 92, 93, 98, 100, 102, 104, 109, 110, 114, 116, 118, 119], "thi": [0, 1, 2, 29, 30, 42, 43, 44, 45, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 71, 72, 75, 76, 77, 80, 81, 82, 84, 85, 88, 89, 91, 92, 93, 95, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "compat": [0, 1, 46, 55, 58, 64, 65, 71, 75, 76, 77, 118], "c10": [0, 1, 45, 46, 48, 49, 89, 91], "check": [0, 1, 31, 46, 52, 55, 60, 65, 67, 71, 75, 77, 89, 95, 99, 100, 110, 111, 113, 116], "trt": [0, 1, 3, 4, 46, 48, 53, 55, 58, 60, 62, 64, 65, 67, 68, 70, 71, 75, 76, 89, 93, 95, 104, 106, 107, 109, 114, 116, 117], "so": [0, 44, 52, 53, 54, 55, 58, 59, 60, 62, 64, 65, 66, 67, 72, 75, 76, 81, 82, 83, 89, 91, 93, 95, 97, 101, 102, 104, 106, 107, 114], "should": [0, 3, 4, 29, 45, 49, 52, 53, 54, 55, 56, 57, 59, 60, 63, 64, 65, 67, 71, 75, 76, 77, 80, 82, 85, 91, 93, 95, 96, 97, 100, 105, 110, 111, 113], "reason": [0, 65, 88, 93, 95, 97, 118], "you": [0, 1, 2, 29, 30, 46, 48, 49, 52, 53, 54, 55, 56, 58, 59, 60, 63, 65, 66, 67, 68, 71, 75, 76, 77, 80, 82, 83, 84, 88, 89, 90, 91, 92, 93, 95, 97, 98, 99, 100, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "need": [0, 1, 2, 25, 29, 43, 46, 53, 54, 55, 60, 65, 66, 67, 71, 72, 75, 76, 82, 89, 90, 91, 93, 95, 96, 97, 99, 100, 110, 111, 112, 113, 114, 116], "explictli": 0, "public": [0, 1, 2, 3, 4, 44, 45, 46, 47, 48, 49, 83, 91], "enum": [0, 1, 2, 42, 45, 46, 51, 71, 77, 91, 93], "valu": [0, 1, 2, 16, 17, 45, 46, 48, 53, 56, 58, 60, 63, 70, 71, 74, 76, 80, 89, 99, 101, 102, 104, 109, 112], "underli": [0, 1, 2, 46, 60], "In": [0, 1, 2, 46, 53, 54, 56, 57, 58, 59, 60, 64, 65, 66, 75, 76, 82, 83, 85, 90, 91, 93, 95, 99, 110, 111, 112, 113, 114, 115, 116, 117], "case": [0, 1, 2, 46, 49, 53, 54, 56, 58, 60, 62, 64, 65, 66, 67, 75, 76, 91, 93, 95, 99, 100, 114, 115, 116], "itself": [0, 1, 2, 46, 52, 55, 92, 93, 110, 111, 113], "interfac": [0, 1, 2, 46, 58, 59, 60, 64, 69, 91], "vs": [0, 1, 2, 46, 55, 66, 71, 76, 77, 92], "normal": [0, 1, 2, 46, 65, 82, 88, 89, 91, 93, 99, 100, 105, 108, 110, 111, 113, 119], "instatin": [0, 1, 2, 46], "ex": [0, 1, 2, 33, 46, 67, 77, 83, 85], "kfloat": [0, 45, 49], "enumer": [0, 1, 2, 16, 17, 46], "klong": [0, 45], "int64": [0, 76, 77, 109], "kdoubl": [0, 45], "fp64": [0, 76], "fp32": [0, 48, 49, 52, 64, 65, 71, 76, 77, 91, 106, 107, 110, 111, 112, 113, 115], "khalf": [0, 45, 89], "fp16": [0, 48, 49, 52, 64, 65, 71, 72, 76, 89, 90, 99, 103, 106, 107, 109, 115, 119], "kchar": [0, 45], "int8": [0, 44, 48, 49, 52, 64, 71, 76, 77, 91, 108, 119], "kint": [0, 45], "int": [0, 3, 4, 35, 44, 45, 49, 52, 54, 56, 63, 64, 70, 71, 72, 76, 77, 80, 89, 95, 108, 109], "kbool": [0, 45], "bool": [0, 1, 2, 3, 4, 24, 27, 30, 31, 42, 44, 45, 46, 49, 55, 60, 64, 70, 71, 72, 74, 75, 76, 77, 80, 89, 91, 94, 95], "kunknown": [0, 2, 45], "sentinel": [0, 2, 76], "function": [0, 1, 2, 3, 4, 46, 48, 49, 51, 54, 55, 56, 58, 60, 62, 64, 65, 66, 88, 89, 91, 92, 93, 95, 100, 101, 104, 105, 106, 107, 110, 111, 112, 113, 114, 116, 118, 119], "default": [0, 1, 2, 3, 4, 16, 29, 30, 33, 43, 45, 46, 48, 49, 52, 54, 56, 62, 64, 65, 66, 71, 72, 75, 76, 77, 80, 81, 82, 89, 90, 91, 92, 93, 94, 95, 97, 108, 114, 116, 117, 118], "construct": [0, 1, 2, 3, 4, 46, 48, 49, 53, 54, 55, 57, 59, 60, 65, 74, 75, 76, 82, 83, 89, 91, 93, 95, 97, 114], "new": [0, 1, 2, 3, 4, 32, 33, 46, 48, 49, 56, 58, 59, 60, 62, 64, 65, 68, 69, 71, 77, 82, 89, 97, 98, 99, 102, 104, 105, 110, 111, 113, 116], "object": [0, 1, 2, 3, 4, 46, 48, 49, 52, 58, 60, 62, 63, 64, 71, 75, 76, 77, 91, 92, 93, 114, 117], "inlin": [0, 1, 2, 3, 4, 29, 30, 44, 46, 48, 55, 83, 86, 89], "constexpr": [0, 1, 2, 45, 46, 95], "t": [0, 1, 2, 45, 46, 55, 60, 65, 66, 70, 76, 80, 82, 83, 88, 89, 91, 93, 95, 108, 110, 111, 113, 114], "constructor": [0, 2, 46, 48, 49, 58, 88], "from": [0, 1, 2, 3, 4, 29, 30, 44, 46, 48, 49, 52, 53, 55, 56, 57, 58, 59, 60, 63, 64, 65, 67, 69, 71, 72, 75, 76, 77, 78, 80, 81, 82, 83, 88, 89, 91, 93, 94, 95, 96, 97, 99, 100, 103, 104, 106, 107, 108, 109, 110, 111, 112, 113, 116, 117, 118], "torchtrt_api": [0, 2, 19, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 48, 49, 50], "scalartyp": [0, 45, 70], "torch": [0, 1, 2, 4, 20, 21, 29, 30, 31, 32, 33, 36, 37, 44, 45, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 67, 71, 72, 74, 75, 76, 77, 78, 88, 91, 94, 96, 97, 98, 109, 119], "paramet": [0, 1, 2, 3, 4, 25, 26, 27, 29, 30, 31, 32, 33, 35, 37, 46, 48, 49, 53, 54, 55, 60, 64, 65, 71, 72, 74, 75, 76, 77, 86, 88, 89, 100, 106, 107], "oper": [0, 1, 2, 3, 4, 31, 44, 45, 46, 49, 52, 53, 55, 56, 57, 58, 59, 60, 62, 63, 65, 69, 71, 76, 77, 90, 91, 93, 100, 102, 104, 118, 119], "const": [0, 1, 2, 3, 4, 29, 30, 31, 32, 33, 35, 37, 44, 45, 46, 55, 60, 70, 89, 91], "get": [0, 1, 2, 3, 4, 23, 34, 44, 46, 55, 56, 60, 62, 63, 65, 67, 75, 76, 89, 91, 93, 97, 106, 107, 109, 110, 111, 112, 113], "return": [0, 1, 2, 3, 4, 23, 24, 29, 30, 31, 32, 33, 34, 37, 42, 43, 44, 45, 46, 54, 55, 56, 57, 58, 59, 60, 62, 64, 65, 71, 72, 75, 76, 77, 88, 89, 90, 91, 93, 95, 97, 100, 101, 108, 109, 110, 111, 113, 114, 115], "explicit": [0, 1, 2, 3, 4, 45, 46, 55, 65, 72, 75, 82, 91, 118], "delet": [0, 1, 2, 45, 46, 55], "other": [0, 1, 2, 45, 46, 52, 53, 55, 58, 62, 64, 65, 66, 70, 71, 75, 76, 81, 82, 89, 90, 93, 116], "comparis": [0, 2], "true": [0, 1, 2, 4, 46, 49, 55, 56, 60, 62, 64, 65, 70, 71, 72, 75, 76, 77, 80, 83, 89, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 115, 116, 119], "fals": [0, 1, 2, 3, 4, 44, 45, 46, 49, 54, 62, 64, 65, 70, 71, 72, 75, 76, 77, 80, 81, 82, 83, 89, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 103, 104, 105, 106, 107, 108, 109, 116], "struct": [1, 21, 38, 41, 45, 54, 91], "onli": [1, 3, 4, 16, 29, 44, 46, 48, 52, 54, 55, 56, 59, 60, 64, 65, 67, 68, 71, 72, 75, 76, 82, 91, 93, 94, 95, 99, 100, 107, 109, 115, 116, 119], "applic": [1, 29, 46, 52, 55, 59, 64, 71, 75, 76, 89, 90, 92, 116, 119], "kcuda": [1, 46, 56, 89], "which": [1, 2, 29, 32, 37, 46, 49, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 71, 72, 74, 75, 76, 77, 80, 82, 83, 88, 89, 90, 91, 92, 93, 94, 95, 97, 101, 102, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118], "map": [1, 46, 53, 54, 55, 57, 59, 60, 65, 75, 76, 89, 91, 92, 97, 101, 110, 111, 112, 113], "kgpu": [1, 45, 46], "To": [1, 46, 52, 54, 56, 64, 66, 71, 80, 88, 89, 90, 92, 95, 100, 106, 107, 110, 111, 113], "datatyp": [1, 21, 38, 45, 46, 48, 49, 50, 71, 76, 77, 90, 95, 110, 111, 113, 115], "target": [1, 33, 45, 46, 48, 49, 52, 54, 56, 58, 59, 64, 65, 66, 69, 71, 75, 76, 77, 90, 91, 92, 93, 95, 100, 118, 119], "gpu": [1, 32, 35, 37, 45, 46, 52, 64, 65, 71, 75, 76, 77, 89, 91, 92, 95, 106, 107, 109, 110, 111, 113, 116, 118, 119], "run": [1, 37, 46, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 64, 65, 66, 67, 68, 71, 72, 75, 76, 77, 82, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119], "kdla": [1, 45, 46, 119], "dla": [1, 45, 46, 49, 52, 64, 69, 71, 76, 77], "intern": [1, 16, 46, 60, 63, 73, 75, 82, 89], "note": [1, 46, 48, 54, 60, 62, 65, 66, 67, 75, 76, 80, 82, 89, 95, 100, 110, 111, 113, 114, 119], "The": [1, 46, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 71, 75, 76, 77, 80, 83, 88, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 105, 106, 109, 110, 111, 112, 113, 114, 117, 118], "valid": [1, 46, 56, 60, 62, 71, 75, 76, 93], "kcpu": [1, 46], "comparison": [1, 46], "an": [2, 3, 4, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 64, 65, 66, 68, 71, 72, 74, 75, 76, 77, 80, 82, 83, 88, 89, 90, 91, 93, 95, 97, 100, 101, 105, 106, 107, 109, 110, 111, 112, 113, 114, 116, 117, 118], "memeori": 2, "layout": [2, 48, 70, 71, 76, 77], "store": [2, 4, 49, 52, 53, 58, 60, 64, 65, 71, 75, 76, 77, 88, 89, 95, 97, 100], "tensor": [2, 33, 44, 45, 48, 49, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 70, 71, 72, 75, 76, 77, 88, 89, 90, 91, 93, 95, 101, 109, 112], "kcontigu": [2, 45, 48], "contigu": [2, 48, 49, 52, 71, 76, 77], "nchw": [2, 71, 76, 77], "linear": [2, 56, 70, 76, 88, 95, 108, 115], "kchannelslast": [2, 45], "channel": [2, 76, 81], "last": [2, 55, 65, 76, 108], "nhwc": [2, 52], "memoryformat": [2, 45], "ptq": [3, 4, 15, 18, 19, 38, 50, 51, 52, 69, 71, 76, 77], "privat": [3, 4, 44, 45, 91], "algorithm": [3, 4, 29, 30, 44, 65, 74, 91, 107], "typenam": [3, 4, 29, 30, 44], "gener": [3, 4, 29, 52, 55, 58, 59, 60, 62, 64, 65, 66, 71, 72, 80, 82, 83, 86, 88, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 108, 109, 110, 116], "int8calibr": [3, 20, 30, 40, 44, 50], "implement": [3, 4, 55, 56, 58, 63, 65, 75, 81, 89, 91, 95, 97, 116], "specifi": [3, 4, 33, 52, 54, 60, 64, 65, 66, 71, 76, 77, 80, 82, 90, 92, 109, 110, 111, 113, 114, 115, 117, 118], "calibr": [3, 4, 29, 30, 44, 49, 52, 71, 74, 76, 77, 89, 91], "read": [3, 4, 29, 30, 44, 80, 82, 91], "nvinfer1": [3, 4, 29, 30, 44, 45, 49, 60, 91], "iint8calibr": [3, 4, 29, 30, 44, 45, 49, 71, 76, 77, 91], "iint8entropycalibrator2": [3, 4, 29, 30, 44, 91], "std": [3, 4, 22, 26, 28, 29, 30, 31, 33, 34, 37, 42, 44, 45, 47, 48, 49, 56, 89, 91, 110, 111, 113, 119], "string": [3, 4, 18, 20, 21, 22, 26, 28, 29, 30, 31, 33, 34, 37, 42, 44, 45, 49, 54, 56, 58, 60, 64, 71, 76, 80, 89, 91], "cache_file_path": [3, 4, 29, 30, 44], "8": [3, 52, 55, 63, 64, 66, 75, 76, 82, 83, 86, 89, 94, 95, 102, 105, 110, 111, 113, 114], "cach": [3, 4, 29, 30, 44, 52, 64, 65, 69, 71, 72, 74, 76, 89, 91, 98, 110, 116], "getbatchs": [3, 4, 44], "noexcept": [3, 4, 44, 91], "overrid": [3, 4, 29, 30, 44, 54, 65, 91], "batch": [3, 4, 44, 64, 65, 72, 75, 91, 97, 102, 104, 108, 109, 110, 111, 113, 114, 119], "size": [3, 4, 44, 48, 49, 52, 55, 56, 64, 65, 70, 71, 72, 76, 77, 80, 89, 91, 95, 97, 102, 104, 108, 112, 114], "next": [3, 4, 53, 54, 58, 63, 72, 76, 80, 82, 83, 91, 93, 101, 105, 108, 110, 111, 113], "alwai": [3, 4, 27, 52, 76, 82, 100, 109], "1": [3, 4, 33, 44, 45, 48, 49, 52, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 70, 71, 72, 74, 75, 76, 77, 79, 80, 82, 83, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 104, 105, 106, 107, 108, 109, 112, 114, 115, 117, 119], "due": [3, 4, 66, 81, 82, 108], "issu": [3, 4, 64, 71, 76, 89, 101, 104], "getbatch": [3, 4, 44], "void": [3, 4, 25, 26, 27, 28, 35, 36, 42, 44, 45], "bind": [3, 4, 33, 44, 75, 77, 82], "char": [3, 4, 44, 52, 89], "name": [3, 4, 31, 33, 37, 44, 54, 56, 58, 60, 65, 66, 67, 72, 74, 75, 76, 77, 82, 83, 88, 89, 92, 93, 95, 100, 105, 108, 110, 111, 113, 115], "nbbind": [3, 4, 44], "Not": 3, "arrai": [3, 4, 33, 53, 54, 76, 77, 93, 95, 109], "pointer": [3, 4, 91], "fed": [3, 4, 48], "buffer": [3, 4, 65, 95], "each": [3, 4, 49, 53, 55, 56, 58, 60, 64, 65, 66, 71, 72, 75, 80, 82, 89, 93, 100, 107, 116], "input": [3, 4, 21, 29, 33, 38, 44, 45, 47, 49, 50, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 68, 70, 71, 72, 73, 75, 76, 77, 83, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119], "number": [3, 4, 49, 52, 54, 55, 56, 60, 63, 64, 65, 71, 72, 76, 77, 80, 89, 90, 95, 100, 102, 104, 109, 112, 118], "readcalibrationcach": [3, 4, 44], "size_t": [3, 4, 44, 91], "length": [3, 4, 44, 65, 70, 83, 109], "how": [3, 4, 66, 67, 82, 84, 86, 88, 92, 93, 95, 97, 99, 101, 108, 109, 110, 111, 112, 113, 114, 116], "enabl": [3, 4, 24, 49, 52, 54, 56, 57, 59, 64, 65, 66, 71, 72, 74, 75, 76, 77, 80, 97, 99, 100, 102, 104, 105, 106, 107, 109, 115, 116], "use_cach": [3, 4, 30, 44, 74, 91, 106, 107, 109], "set": [3, 4, 16, 21, 25, 27, 29, 32, 35, 37, 45, 46, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 65, 66, 71, 72, 75, 76, 77, 80, 84, 87, 88, 89, 90, 91, 93, 95, 100, 106, 108, 109, 112, 114, 115, 116, 118, 119], "writecalibrationcach": [3, 4, 44], "write": [3, 4, 29, 30, 44, 65, 69, 82, 89, 91, 110, 111, 113], "provid": [3, 4, 49, 52, 54, 56, 58, 60, 62, 64, 65, 66, 68, 71, 72, 75, 76, 77, 82, 89, 90, 91, 92, 93, 97, 98, 100, 101, 105, 109, 110, 111, 113, 114, 116, 117, 118], "cast": [3, 4, 55, 64, 71, 106, 107, 115], "convienc": [3, 4, 49], "convert": [3, 4, 31, 32, 37, 52, 55, 56, 57, 59, 63, 64, 69, 71, 76, 77, 90, 92, 95, 98, 102, 104, 109, 110, 112, 116], "easili": [3, 4, 99], "assign": [3, 4, 81], "ptq_calibr": [3, 4, 45, 49, 91], "field": [3, 4, 63, 72, 76, 91], "compilespec": [3, 4, 21, 32, 37, 41, 45, 50, 56, 77, 89, 91, 119], "dataloaderuniqueptr": [4, 44], "libtorch": [4, 36, 60, 66, 68, 89, 91, 118], "dataload": [4, 29, 30, 44, 49, 74, 91, 108], "unique_ptr": [4, 30], "unqiue_ptr": 4, "A": [4, 29, 30, 32, 33, 47, 48, 54, 55, 56, 60, 65, 66, 71, 72, 76, 77, 83, 91, 103, 110, 111, 113], "uniqu": [4, 90], "what": [4, 54, 55, 65, 68, 76, 82, 88, 89, 90, 106, 107, 118], "make_data_load": [4, 91], "factori": [4, 29, 30, 64, 71, 91], "path": [4, 13, 14, 15, 29, 30, 52, 64, 65, 66, 67, 71, 74, 76, 88, 89, 91, 94, 97, 105, 108, 118], "find": [4, 65, 66, 67, 89, 95, 109], "whether": [4, 52, 54, 64, 65, 71, 72, 76, 81, 91, 102, 104, 116], "exist": [4, 31, 32, 37, 54, 63, 64, 65, 67, 71, 74, 76, 77, 91, 97, 112], "There": [4, 53, 54, 59, 60, 62, 63, 65, 66, 83, 88, 91, 100, 110, 111, 112, 113, 114, 116], "consum": [4, 53, 88], "macro": [5, 6, 7, 8, 9, 10, 11, 12, 15, 18, 20, 21, 42, 44, 45, 50, 51], "x": [5, 10, 33, 43, 55, 56, 66, 67, 68, 75, 77, 83, 88, 89, 93, 95, 97, 101, 105, 108, 109, 110, 111, 113, 114, 115, 117], "includ": [13, 15, 16, 34, 36, 42, 43, 44, 45, 51, 52, 54, 56, 57, 58, 59, 62, 64, 65, 66, 67, 68, 71, 72, 75, 76, 80, 82, 88, 89, 91, 95, 116], "parent": [14, 15, 18, 19, 20, 21], "cpp": [14, 15, 42, 43, 44, 45, 51, 55, 59, 66, 89, 91], "log": [15, 16, 19, 20, 38, 44, 50, 51, 55, 60, 64, 65, 69, 70, 71, 72, 76, 93, 102, 104, 115], "emum": [16, 17], "messag": [16, 25, 26, 52, 73], "sever": [16, 26, 73], "kinternal_error": [16, 42], "print": [16, 31, 44, 62, 64, 67, 71, 77, 82, 89, 92, 93, 94, 95, 96, 97, 99, 100, 102, 104, 106, 107, 108, 109, 110, 111, 113], "error": [16, 49, 52, 53, 55, 59, 64, 65, 71, 73, 76, 77, 82, 89, 114], "kerror": [16, 42], "all": [16, 42, 43, 44, 45, 49, 52, 54, 55, 56, 58, 62, 64, 65, 66, 67, 71, 73, 75, 76, 78, 82, 83, 88, 89, 90, 91, 93, 95, 106, 107, 110, 111, 112, 113, 115, 116, 118], "kwarn": [16, 42], "warn": [16, 44, 52, 60, 73, 75], "kinfo": [16, 42, 44], "info": [16, 32, 37, 45, 52, 60, 73, 75, 76, 115], "kdebug": [16, 42, 44], "debug": [16, 27, 45, 49, 52, 60, 62, 64, 71, 73, 75, 76, 77, 92, 94, 95, 96, 97, 99, 100, 101, 102, 104, 108, 115], "kgraph": [16, 42, 55], "everyth": [16, 64, 71, 76], "intermedi": [16, 49, 52, 54, 64, 71, 73, 76, 77, 88, 115, 118], "graph": [16, 31, 32, 37, 45, 49, 52, 53, 54, 56, 57, 59, 60, 62, 63, 64, 65, 71, 72, 73, 76, 77, 88, 89, 93, 95, 97, 99, 100, 102, 104, 105, 112, 114, 116], "lower": [16, 54, 63, 65, 69, 71, 72, 73, 76, 83, 95, 97, 102, 104, 109, 112, 118], "phase": [16, 60, 63, 89, 93, 100, 114, 118], "class": [17, 29, 30, 44, 45, 46, 51, 58, 60, 64, 65, 73, 77, 82, 83, 88, 89, 90, 91, 93, 95, 97, 101, 108, 112, 114, 115], "int8_t": [17, 45], "select": [17, 29, 30, 37, 49, 52, 58, 64, 65, 66, 70, 71, 76, 77, 81, 84, 90, 91, 95, 118], "capabl": [17, 45, 49, 52, 58, 71, 76, 77, 92, 93, 94], "kstandard": [17, 45, 49], "ksafeti": [17, 45], "kdla_standalon": [17, 45], "directori": [18, 19, 20, 21, 42, 43, 44, 45, 50, 66, 67, 71, 91, 97, 110, 111, 113], "program": [18, 19, 20, 21, 29, 51, 52, 57, 58, 59, 69, 71, 88, 97, 98, 106, 107, 110, 114], "list": [18, 19, 20, 21, 31, 49, 51, 53, 56, 58, 60, 62, 63, 65, 68, 70, 71, 72, 75, 76, 77, 86, 89, 90, 93, 95, 110, 111, 113], "level": [18, 23, 25, 26, 39, 42, 44, 50, 54, 55, 56, 59, 64, 65, 71, 76, 77, 86, 88, 93, 95, 110, 111, 113, 118], "get_is_colored_output_on": [18, 39, 42, 50], "get_logging_prefix": [18, 39, 42, 50], "get_reportable_log_level": [18, 39, 42, 50], "set_is_colored_output_on": [18, 39, 42, 50], "set_logging_prefix": [18, 39, 42, 50], "set_reportable_log_level": [18, 39, 42, 50], "torchscript": [19, 21, 38, 43, 45, 49, 50, 52, 56, 57, 58, 59, 63, 68, 71, 72, 74, 75, 76, 77, 90, 110, 111, 112, 113, 114, 119], "str": [19, 43, 44, 50, 54, 64, 65, 70, 71, 74, 75, 76, 77, 93, 94, 95, 97, 108], "torch_tensorrt_major_vers": [19, 43, 50], "torch_tensorrt_minor_vers": [19, 43, 50], "torch_tensorrt_patch_vers": [19, 43, 50], "torch_tensorrt_vers": [19, 43, 50], "torchtrt_hidden": [19, 43, 50], "xstr": [19, 43, 50], "nvinfer": [20, 44], "fstream": [20, 44], "iostream": [20, 21, 44, 45, 89], "iter": [20, 44, 49, 52, 53, 64, 71, 74, 76, 77, 96, 97, 108, 109], "memori": [20, 21, 44, 45, 55, 60, 71, 76, 77, 89, 90, 95, 97, 106, 107, 109], "sstream": [20, 44], "vector": [20, 21, 33, 44, 45, 47, 48, 49, 56, 58, 76, 89, 91, 119], "templat": [20, 40, 44, 45, 50, 80, 89], "int8cachecalibr": [20, 29, 40, 44, 50], "make_int8_cache_calibr": [20, 40, 44, 50, 91], "make_int8_calibr": [20, 29, 40, 44, 50, 91], "cuda_runtim": [21, 45], "custom_class": [21, 45], "devic": [21, 33, 35, 38, 45, 49, 50, 52, 58, 64, 70, 71, 72, 74, 75, 76, 77, 90, 91, 92, 95, 99, 103, 106, 107, 109, 112, 119], "graphinput": [21, 38, 45, 49, 50], "devicetyp": [21, 38, 45, 46, 50, 75, 76, 77, 91, 92, 95, 119], "tensorformat": [21, 38, 45, 48, 50, 76, 95], "enginecap": [21, 38, 45, 49, 50, 64, 71, 75, 76, 77, 92, 95], "dump_build_info": [21, 38, 45, 50], "get_build_info": [21, 38, 45, 50], "set_devic": [21, 38, 45, 50, 116], "check_method_operator_support": [21, 41, 45, 50], "compil": [21, 31, 37, 41, 45, 49, 50, 52, 54, 55, 56, 58, 60, 62, 65, 71, 72, 73, 75, 76, 77, 78, 80, 88, 90, 91, 92, 93, 95, 96, 98, 99, 108, 110, 111, 113, 116, 119], "convert_method_to_trt_engin": [21, 41, 45, 50, 76, 77, 89, 92], "embed_engine_in_new_modul": [21, 41, 45, 50, 77], "current": [23, 54, 56, 58, 60, 62, 63, 64, 65, 66, 67, 71, 72, 75, 76, 77, 80, 93, 95, 99, 106, 107, 108, 109, 116], "report": [23, 44, 75], "Is": [24, 76], "color": [24, 27, 82], "output": [24, 27, 33, 49, 52, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 71, 73, 75, 76, 77, 80, 82, 83, 89, 93, 95, 97, 99, 100, 103, 109, 110, 111, 112, 113, 114, 115, 117], "lvl": [25, 26, 42], "inform": [25, 33, 34, 36, 48, 52, 53, 56, 58, 62, 64, 65, 66, 71, 72, 73, 76, 82, 88, 89, 91, 92, 95, 97, 109, 114], "ad": [25, 52, 53, 54, 56, 62, 65, 66, 95, 99], "abov": [25, 54, 56, 62, 65, 66, 73, 81, 82, 89, 95, 102, 104, 115, 117], "msg": [26, 42], "add": [26, 53, 54, 55, 56, 60, 63, 66, 70, 80, 82, 87, 89, 90, 93, 95], "global": [26, 52, 64, 71, 76, 89], "colored_output_on": [27, 42], "prefix": [27, 28, 42, 82], "help": [27, 52, 53, 60, 64, 65, 89, 94, 97, 108, 109, 112, 116], "when": [27, 44, 45, 46, 52, 53, 55, 56, 57, 58, 59, 60, 64, 65, 66, 71, 75, 76, 77, 80, 82, 84, 88, 89, 91, 93, 95, 97, 99, 100, 109, 112, 114, 116], "termin": [27, 52, 89], "If": [27, 33, 53, 54, 55, 56, 62, 63, 64, 65, 66, 68, 71, 72, 76, 80, 82, 89, 90, 91, 93, 95, 97, 100, 101, 105, 109, 110, 111, 113, 114, 115, 116, 118, 119], "build": [29, 30, 34, 49, 52, 53, 57, 59, 60, 63, 64, 65, 71, 75, 76, 81, 86, 89, 91, 93, 95, 102, 104, 109, 114], "post": [29, 30, 49, 52, 63, 69, 89, 97], "train": [29, 30, 49, 52, 69, 70, 89, 90, 97, 109], "quantiz": [29, 30, 52, 64, 69, 74, 76, 89, 98, 110], "creat": [29, 30, 33, 52, 53, 54, 56, 58, 60, 65, 69, 76, 77, 82, 89, 93, 95, 100, 109, 110, 111, 113], "previous": [29, 33, 89, 97, 100], "therefor": [29, 58, 65, 66, 75, 82, 89, 112, 116], "have": [29, 33, 44, 52, 53, 54, 55, 56, 60, 62, 63, 64, 65, 66, 67, 71, 72, 74, 75, 76, 77, 82, 88, 89, 90, 91, 95, 98, 102, 104, 108, 110, 111, 112, 113, 114], "requir": [29, 49, 52, 53, 54, 55, 63, 64, 65, 66, 67, 71, 76, 77, 80, 89, 91, 93, 94, 95, 98, 108, 109, 110, 111, 113, 114, 116], "dataset": [29, 74, 91, 112], "save": [29, 44, 52, 58, 64, 65, 68, 69, 71, 75, 76, 77, 89, 90, 94, 96, 97, 100, 103, 109, 110, 111, 112, 113, 116, 118], "later": [29, 71, 89, 100, 117, 118], "differ": [29, 55, 56, 59, 64, 65, 66, 71, 76, 80, 88, 93, 95, 97, 99, 106, 109, 112, 116, 118], "scratch": [29, 97, 100], "depend": [29, 34, 53, 59, 64, 65, 67, 68, 71, 89, 90, 109, 111, 113, 116], "howev": [29, 66, 80, 81, 89, 93, 95, 97, 110, 111, 113, 114, 118], "network": [29, 30, 54, 60, 65, 76, 89, 91, 93, 95, 109, 110, 111, 112, 113, 119], "also": [29, 53, 54, 60, 62, 64, 66, 68, 80, 82, 83, 89, 90, 91, 97, 105, 108, 112], "recalibr": 29, "its": [29, 53, 56, 58, 60, 66, 75, 76, 82, 95, 108, 110, 111, 113, 116, 118], "structur": [29, 46, 49, 56, 59, 60, 64, 71, 76, 80, 82, 86, 88, 95, 110, 111, 113], "chang": [29, 55, 56, 59, 62, 64, 65, 75, 76, 77, 80, 91, 93, 97, 99, 100, 110, 111, 113, 116, 118], "respons": [29, 54, 58, 82, 116], "ensur": [29, 54, 55, 56, 62, 64, 66, 67, 71, 75, 106, 107], "By": [29, 30, 51, 56, 64, 66, 71, 80, 88, 97, 114], "entropi": [29, 30, 91], "v2": [29, 30, 82], "perform": [29, 30, 54, 62, 63, 71, 75, 76, 91, 95, 105, 109, 110, 111, 112, 113, 115, 116, 117, 118], "recommend": [29, 30, 65, 66, 76, 82, 89, 95, 110, 111, 113, 114], "feed": [29, 30, 89], "forward": [29, 30, 32, 33, 56, 58, 60, 64, 68, 71, 75, 76, 77, 88, 89, 90, 91, 92, 93, 95, 101, 108, 114, 115], "minmax": [29, 30, 91], "recomend": [29, 30], "nlp": [29, 30, 91], "task": [29, 30, 65, 91, 112], "call": [29, 30, 32, 49, 54, 55, 58, 60, 65, 71, 72, 75, 76, 77, 82, 88, 89, 92, 93, 95, 97, 99, 101, 104, 112, 114, 116, 118], "e": [29, 30, 52, 55, 60, 65, 66, 67, 68, 72, 76, 88, 89, 91, 95, 97, 100, 110, 111, 113], "g": [29, 30, 52, 55, 65, 66, 67, 72, 76, 82, 91, 95, 100, 110, 111, 113], "iint8minmaxcalibr": [29, 30, 91], "calibration_cache_fil": [29, 30, 91], "move": [30, 44, 55, 58, 77, 89, 91, 93, 106, 107], "calibration_dataload": [30, 91], "contain": [30, 31, 52, 53, 54, 55, 56, 60, 65, 66, 72, 75, 76, 82, 83, 88, 89, 91, 95, 97, 110, 111, 113, 116], "jit": [31, 32, 33, 37, 45, 47, 49, 52, 53, 55, 56, 57, 58, 59, 60, 61, 64, 68, 69, 71, 75, 76, 77, 88, 89, 90, 92, 95, 100, 110, 111, 113, 117, 118], "modul": [31, 32, 33, 37, 45, 49, 52, 56, 57, 58, 59, 60, 64, 65, 66, 67, 68, 69, 71, 72, 74, 75, 76, 77, 81, 82, 83, 90, 91, 92, 93, 94, 95, 98, 100, 101, 108, 110, 112, 114, 115, 117, 119], "method_nam": [31, 37, 45, 52, 76, 77, 89], "see": [31, 55, 56, 58, 62, 64, 65, 66, 76, 77, 82, 88, 89, 90, 93, 95, 97, 100, 101], "fulli": [31, 52, 55, 64, 71, 75, 76, 77, 89, 91, 95, 119], "take": [31, 32, 33, 37, 53, 54, 57, 58, 59, 60, 62, 65, 71, 72, 75, 76, 77, 80, 82, 89, 91, 92, 93, 95, 101, 112, 114], "method": [31, 32, 33, 37, 48, 52, 55, 60, 66, 71, 76, 77, 82, 88, 89, 92, 97, 112], "pure": [31, 71, 76], "Will": 31, "out": [31, 44, 53, 55, 56, 57, 59, 60, 64, 66, 71, 76, 77, 82, 89, 95, 99, 108, 109, 110, 111, 113, 114], "unsupport": [31, 49, 54, 64, 76, 95, 118], "script": [31, 55, 56, 68, 76, 77, 88, 89, 90, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 116, 118], "nvidia": [32, 37, 42, 43, 44, 45, 52, 61, 64, 65, 66, 67, 71, 76, 77, 89, 101, 104, 110, 111, 113, 118, 119], "configur": [32, 37, 48, 62, 64, 66, 71, 75, 76, 77, 86, 89, 91, 95, 109, 110, 111, 113, 114], "equival": [32, 57, 59, 60, 71, 76, 77, 88, 89, 91, 93, 95, 102, 104], "specif": [32, 49, 54, 55, 57, 59, 62, 64, 71, 76, 77, 82, 93, 109, 118], "traget": 32, "input_binding_nam": [33, 45, 75, 77], "output_binding_nam": [33, 45, 75, 77], "emb": [33, 52, 63, 77, 83], "pre": [33, 55, 74, 77, 91, 97, 109, 116], "built": [33, 52, 58, 59, 64, 66, 71, 75, 76, 77, 97, 100], "serial": [33, 37, 52, 57, 59, 66, 71, 75, 76, 77, 89, 95, 97, 110, 111, 113, 118], "regist": [33, 54, 58, 60, 65, 75, 77, 93, 95], "execut": [33, 49, 52, 55, 57, 58, 59, 63, 64, 65, 66, 69, 71, 72, 75, 76, 77, 78, 88, 89, 91, 93, 95, 110, 111, 113], "must": [33, 48, 49, 52, 54, 55, 56, 60, 62, 65, 66, 71, 72, 76, 77, 82, 83, 89, 97, 114, 116, 118], "follow": [33, 52, 54, 56, 58, 62, 63, 64, 65, 66, 77, 80, 82, 83, 87, 88, 89, 91, 93, 95, 97, 98, 102, 106, 107, 110, 111, 112, 113, 114, 115, 116], "format": [33, 45, 48, 49, 52, 70, 71, 76, 77, 82, 83, 90, 95, 97, 108, 110, 111, 112, 113, 115, 117], "symbol": [33, 65, 66, 77, 82, 116], "index": [33, 61, 62, 66, 67, 69, 70, 77, 80, 86, 91, 95], "0": [33, 43, 44, 45, 49, 52, 54, 56, 59, 60, 62, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 78, 79, 81, 82, 89, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 119], "2": [33, 43, 54, 56, 60, 63, 64, 65, 66, 67, 69, 70, 71, 74, 75, 76, 77, 80, 82, 83, 86, 88, 89, 91, 93, 95, 96, 97, 99, 100, 101, 102, 104, 106, 107, 108, 109, 114, 117], "y": [33, 56, 77, 83, 93, 95, 101], "compilesepc": 33, "order": [33, 49, 54, 56, 60, 62, 65, 66, 71, 72, 75, 76, 77, 89, 90, 93, 97, 115], "pass": [33, 53, 54, 56, 57, 58, 59, 60, 63, 64, 65, 66, 69, 73, 74, 75, 76, 77, 88, 89, 91, 93, 95, 97, 100], "origin": [33, 65, 72, 76, 95, 97, 99, 118], "pytorch": [33, 48, 49, 52, 54, 55, 56, 57, 58, 59, 60, 63, 64, 66, 67, 68, 71, 74, 75, 76, 77, 88, 89, 90, 91, 93, 97, 99, 100, 108, 110, 111, 113, 114, 115, 116, 117, 118], "assum": [33, 75, 92, 95, 98, 110], "convent": 33, "below": [33, 56, 60, 62, 63, 64, 65, 66, 67, 82, 89, 90, 97, 103, 110, 111, 113], "librari": [34, 42, 43, 44, 45, 52, 54, 57, 58, 59, 60, 76, 89, 95, 98, 110], "version": [34, 36, 59, 62, 64, 65, 67, 71, 75, 76, 80, 83, 95, 110, 111, 112, 113, 117], "gpu_id": [35, 45, 46, 52, 75, 76, 77, 91, 92, 95, 119], "id": [35, 45, 52, 76, 80, 81, 85, 119], "cudasetdevic": 35, "dump": [36, 52, 95], "base": [36, 50, 58, 63, 64, 66, 71, 72, 76, 82, 88, 90, 91, 96, 100, 104, 112, 118], "stdout": [36, 75], "equivil": 37, "document": [42, 43, 44, 45, 50, 59, 80, 82, 83, 87, 88, 89, 91, 92, 110, 111, 113, 114, 116], "copyright": [42, 43, 44, 45, 83, 89], "c": [42, 43, 44, 45, 52, 59, 64, 67, 70, 71, 72, 75, 76, 83, 90, 95, 99, 110, 111, 113, 116, 119], "corpor": [42, 43, 44, 45], "right": [42, 43, 44, 45, 55, 59, 60, 82, 110, 111, 113], "reserv": [42, 43, 44, 45, 106, 107], "licens": [42, 43, 44, 45, 89], "under": [42, 43, 44, 45, 59, 65, 82, 93, 102, 118], "bsd": [42, 43, 44, 45], "style": [42, 43, 44, 45, 64, 68, 80, 82, 83], "found": [42, 43, 44, 45, 63, 66, 75, 82, 89, 91, 93, 95, 97, 116], "root": [42, 43, 44, 45, 66, 80, 91, 108], "sourc": [42, 43, 44, 45, 54, 59, 64, 65, 67, 71, 72, 73, 74, 75, 76, 77, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "tree": [42, 43, 44, 45, 80, 91, 108, 116], "pragma": [42, 43, 44, 45, 91], "onc": [42, 43, 44, 45, 53, 55, 56, 58, 64, 65, 66, 67, 76, 91, 95, 107, 109, 110, 111, 113, 116], "namespac": [42, 43, 44, 45, 51, 55, 69, 76, 91, 95], "ar": [42, 46, 49, 52, 53, 54, 55, 56, 58, 59, 60, 62, 63, 64, 65, 66, 71, 74, 75, 76, 77, 80, 82, 83, 84, 88, 89, 91, 92, 93, 95, 96, 97, 99, 100, 102, 106, 107, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], "ones": [42, 56, 57, 59, 66, 82, 89, 93, 95, 118], "necessari": [42, 62, 64, 66, 75, 93, 100, 116], "user": [42, 48, 54, 56, 57, 58, 59, 62, 63, 64, 66, 67, 71, 82, 83, 89, 90, 91, 93, 97, 100, 109, 110, 111, 113, 114, 115, 116, 118], "dont": 42, "know": [42, 60, 80, 82, 93, 95], "we": [42, 44, 53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 72, 75, 80, 82, 88, 89, 91, 93, 95, 97, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118], "want": [42, 56, 65, 66, 67, 68, 72, 88, 89, 91, 92, 93, 95, 100, 101, 110, 111, 113], "use_cmake_generated_export_head": 43, "torch_tensorrt_export": 43, "els": [43, 44, 48, 77, 82, 83, 94, 96, 97, 108], "__gnuc__": 43, "__attribute__": 43, "__visibility__": 43, "hidden": [43, 80], "endif": [43, 44, 45], "doe": [43, 44, 55, 56, 60, 62, 65, 66, 76, 82, 91, 95, 102, 104], "gaurd": 43, "someth": [43, 55, 82, 110, 111, 113], "6": [43, 55, 56, 58, 66, 70, 82, 86, 88, 89, 94, 95], "setup": [43, 67, 91, 110, 111, 113], "alias": 43, "eas": 43, "ts": [43, 52, 56, 68, 69, 76, 88, 89, 90, 92, 114, 117], "torchtrt": [43, 56, 94, 95, 108, 110, 111, 113], "ifndef": [44, 45], "doxygen_should_skip_thi": [44, 45], "get_batch_impl": 44, "element_typ": 44, "super": [44, 88, 93, 95, 101, 108, 114, 115], "batchtyp": 44, "dataloader_": 44, "cache_file_path_": 44, "use_cache_": 44, "auto": [44, 56, 60, 64, 68, 71, 82, 83, 89, 91, 106, 107, 109, 119], "batched_data_": 44, "push_back": [44, 56], "it_": 44, "begin": [44, 65, 66, 82, 101, 105], "hack": 44, "explict": 44, "work": [44, 55, 59, 60, 64, 65, 68, 71, 74, 75, 76, 82, 83, 91, 95, 100, 101, 105, 109, 110, 111, 113, 114], "here": [44, 53, 54, 56, 58, 63, 64, 65, 66, 68, 80, 82, 83, 88, 89, 91, 93, 95, 98, 105, 106, 107, 108, 110, 111, 113, 114, 116, 117], "explic": 44, "just": [44, 45, 55, 56, 64, 65, 69, 73, 75, 82, 84, 88, 89, 90, 92, 95, 97, 99, 112, 116], "still": [44, 56, 65, 66, 91, 93, 101, 118], "static_cast": 44, "option": [44, 48, 52, 56, 57, 59, 62, 63, 64, 65, 71, 75, 76, 77, 82, 86, 91, 93, 95, 96, 97, 101, 103, 115, 116, 117, 119], "batch_siz": [44, 91, 108], "end": [44, 52, 60, 62, 70, 71, 76, 77, 82, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "statu": [44, 83], "reset": [44, 96, 97, 101, 104, 116], "incas": 44, "go": [44, 55, 56, 65, 68, 88, 89, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 118], "again": [44, 58, 60, 82, 95, 99], "stringstream": 44, "ss": 44, "cache_": 44, "clear": 44, "ifstream": 44, "io": [44, 67, 110, 111, 113], "binari": [44, 91], "noskipw": 44, "good": [44, 60, 65, 82, 97], "copi": [44, 60, 65, 67, 70, 74, 83, 109], "istream_iter": 44, "back_insert": 44, "nullptr": [44, 45, 49], "ofstream": [44, 89], "cache_fil": [44, 74, 91], "reinterpret_cast": 44, "cache_size_": 44, "arrayref": [45, 48, 49], "friend": 45, "ostream": 45, "os": [45, 67, 97], "dtype": [45, 48, 49, 52, 63, 64, 65, 70, 71, 72, 75, 76, 77, 90, 95, 96, 102, 104, 105, 109, 110, 111, 112, 113, 114, 115], "device_typ": [45, 46, 76, 91, 92, 119], "int64_t": [45, 46, 48, 49, 91, 119], "core": [45, 52, 55, 56, 59, 64, 71, 76, 89, 93, 118, 119], "agx": 45, "platform": [45, 52, 59, 64, 66, 67, 71, 94, 119], "xavier": [45, 119], "dla_cor": [45, 46, 52, 76, 91, 92, 119], "allow_gpu_fallback": [45, 46, 71, 76, 77, 91, 92, 119], "customclasshold": [45, 48], "min_shap": [45, 48, 63, 65, 71, 76, 77, 90, 102, 105, 112, 114], "opt_shap": [45, 48, 63, 71, 76, 77, 90, 102, 105, 112, 114], "max_shap": [45, 48, 63, 65, 71, 76, 77, 90, 102, 105, 112, 114], "shape": [45, 47, 48, 49, 52, 56, 60, 63, 65, 69, 70, 71, 72, 75, 76, 77, 78, 90, 93, 95, 98, 105, 108, 109, 110, 111, 113, 116, 119], "doubl": [45, 48, 49, 52, 63, 71, 76, 77, 82, 116], "tensor_domain": [45, 48, 76], "input_is_dynam": 45, "ivalu": [45, 47, 49, 53, 58, 60, 89], "input_signatur": [45, 47, 49, 77, 90], "nest": [45, 49, 50, 82, 83], "full": [45, 49, 52, 60, 64, 71, 73, 76, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 116, 119], "spec": [45, 48, 49, 52, 73, 76, 77, 92, 97], "flatten": [45, 47, 70, 88, 89, 108], "fixed_s": [45, 49], "reflect": [45, 76], "builderconfig": 45, "graph_input": [45, 49], "enabled_precis": [45, 49, 63, 64, 71, 75, 76, 77, 89, 90, 91, 92, 95, 96, 97, 99, 100, 101, 102, 103, 104, 106, 107, 108, 109, 110, 111, 113, 115, 119], "disable_tf32": [45, 49, 64, 71, 75, 76, 77, 91, 95, 106, 107], "sparse_weight": [45, 49, 64, 65, 71, 75, 76, 77, 95], "refit": [45, 49, 64, 69, 71, 76, 77, 92, 95, 97, 98, 99, 110], "truncate_long_and_doubl": [45, 49, 63, 64, 77, 103], "allow_shape_tensor": [45, 49, 77], "uint64_t": [45, 49], "num_avg_timing_it": [45, 49, 64, 71, 75, 76, 77, 92, 95], "workspace_s": [45, 49, 52, 64, 71, 75, 76, 77, 95, 100, 102, 104], "dla_sram_s": [45, 49, 52, 64, 71, 75, 76, 77, 95], "1048576": [45, 49, 64, 71, 75, 76, 77, 95], "dla_local_dram_s": [45, 49, 52, 64, 71, 75, 76, 77, 95], "1073741824": [45, 49, 64, 71, 75, 76, 77, 95], "dla_global_dram_s": [45, 49, 52, 64, 71, 75, 76, 77, 95], "536870912": [45, 49, 64, 71, 75, 76, 77, 95], "require_full_compil": [45, 49, 64, 71, 75, 76, 77, 95], "min_block_s": [45, 49, 56, 63, 64, 71, 75, 76, 77, 93, 94, 95, 96, 97, 100, 101, 102, 104, 108], "3": [45, 49, 52, 55, 56, 58, 63, 64, 65, 67, 68, 70, 71, 74, 76, 77, 82, 83, 86, 88, 89, 91, 92, 94, 95, 96, 97, 99, 100, 102, 105, 106, 107, 108, 109, 112, 114, 117, 119], "torch_executed_op": [45, 49, 56, 63, 64, 71, 75, 76, 77, 95, 100, 101, 102, 104], "torch_executed_modul": [45, 49, 56, 71, 76, 77], "member": [46, 47, 48, 49], "hold": [46, 47, 48, 53, 60, 76, 91], "relat": [46, 82, 101, 104], "let": [46, 52, 55, 60, 65, 71, 76, 77, 80, 82, 110, 111, 112, 113, 118], "layer": [46, 49, 52, 53, 55, 60, 62, 64, 65, 71, 75, 76, 77, 89, 91, 93, 95, 106, 107, 108, 110, 111, 112, 113, 114, 115, 118, 119], "thei": [46, 52, 53, 54, 55, 58, 60, 64, 65, 71, 74, 75, 76, 80, 82, 90, 93, 97], "complex": [47, 49, 64, 66, 88, 90, 99, 107], "either": [47, 48, 52, 60, 62, 71, 76, 77, 80, 82, 88, 89, 90, 93, 94, 95, 97, 117], "one": [47, 54, 55, 60, 64, 65, 67, 71, 75, 76, 82, 88, 89, 90, 93, 95, 101, 104, 106, 107, 110, 111, 113], "rang": [48, 49, 52, 65, 76, 95, 96, 97, 102, 109, 112, 114], "optim": [48, 52, 63, 64, 65, 69, 71, 72, 74, 76, 88, 89, 90, 100, 102, 103, 104, 109, 112, 114, 118], "profil": [48, 72, 75, 115], "singl": [48, 52, 55, 56, 65, 76, 82, 88, 89, 91, 109, 116], "repres": [48, 49, 54, 60, 65, 68, 82], "signifi": [48, 55], "static": [48, 49, 53, 60, 63, 64, 71, 76, 77, 80, 89, 108, 114], "three": [48, 57, 59, 65, 72, 76, 82, 83, 110, 111, 112, 113], "min": [48, 52, 60, 70, 76, 97, 102, 114], "optimin": 48, "max": [48, 52, 60, 70, 76, 80, 97, 102, 108, 114], "allow": [48, 49, 52, 53, 54, 55, 56, 62, 64, 65, 66, 71, 76, 77, 80, 93, 95, 97, 100, 102, 104, 109, 116], "argument": [48, 52, 54, 55, 58, 60, 62, 64, 65, 71, 75, 76, 77, 82, 83, 89, 90, 93, 94, 95, 114], "expect": [48, 54, 55, 60, 76, 89, 90, 112], "tradit": [48, 71, 76, 77, 91], "convect": 48, "produc": [48, 53, 54, 58, 60, 63, 76, 82, 89, 112], "low": [48, 65, 93, 99], "high": [48, 55, 56, 80, 93, 95, 118], "weight": [48, 49, 52, 53, 64, 65, 69, 70, 71, 75, 76, 77, 82, 89, 97, 98, 99, 103, 110, 112], "first": [48, 53, 54, 55, 65, 68, 82, 83, 89, 90, 91, 93, 95, 97, 99, 101, 110, 111, 113, 114, 117, 118], "calcul": [48, 53, 56, 89, 95, 109], "detect": [48, 58, 76], "float32": [48, 49, 52, 63, 64, 65, 71, 76, 77, 95, 99, 103, 106, 107, 109, 114, 115], "dynam": [48, 49, 63, 65, 69, 71, 72, 76, 77, 78, 93, 97, 98, 101, 103, 104, 107, 109, 110, 116], "opt": [48, 66, 75, 76, 105], "minimum": [48, 49, 52, 56, 63, 64, 71, 76, 77, 95, 109], "maximum": [48, 49, 52, 64, 65, 71, 72, 76, 77, 102, 104, 109, 110, 111, 113], "accept": [48, 52, 54, 58, 60, 66, 76, 89, 90, 101, 117], "exampl": [48, 56, 58, 59, 60, 65, 66, 71, 73, 75, 76, 77, 78, 80, 81, 83, 86, 88, 89, 90, 91, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117], "s": [48, 49, 53, 56, 58, 60, 63, 65, 66, 67, 69, 71, 72, 75, 76, 80, 82, 83, 88, 89, 91, 93, 95, 97, 109, 110, 111, 112, 113, 114, 116, 117], "cannot": [48, 55, 56, 65, 66, 71, 75, 76, 77, 81, 88, 94, 95], "through": [48, 53, 54, 55, 56, 58, 64, 65, 71, 73, 74, 82, 89, 90, 95, 99, 100, 112, 118], "altern": [48, 56, 62, 63, 76, 90, 93, 105, 112, 117], "refer": [48, 54, 57, 59, 65, 81, 86, 89, 91, 95, 108, 110, 111, 113, 114, 117], "given": [48, 49, 52, 54, 55, 65, 71, 72, 74, 76, 77, 88, 89, 90, 92, 93, 114], "kernel": [48, 49, 52, 60, 64, 65, 69, 71, 76, 77, 93, 98, 110, 115, 116], "ani": [48, 52, 53, 54, 60, 62, 64, 65, 70, 71, 74, 75, 76, 77, 80, 82, 89, 90, 91, 93, 95, 102, 114], "event": [48, 64, 96, 97], "place": [48, 55, 62, 65, 82, 83, 84, 91, 95, 108], "variabl": [48, 65, 75, 76], "dimens": [48, 55, 65, 72, 76, 102, 112, 114, 115], "domain": [48, 76, 83, 91], "convien": 49, "fix": [49, 65, 82, 95, 116, 119], "describ": [49, 56, 60, 76, 88, 92, 110, 111, 113], "entri": [49, 60, 97], "okai": 49, "ha": [49, 53, 54, 55, 56, 57, 59, 60, 62, 64, 65, 66, 67, 71, 72, 76, 82, 83, 88, 89, 91, 93, 94, 97, 100, 108, 112, 114, 118], "flaten": 49, "precis": [49, 52, 63, 64, 65, 69, 71, 76, 89, 90, 91, 102, 104, 106, 107, 109, 119], "dure": [49, 52, 54, 56, 60, 63, 64, 71, 74, 76, 91, 93, 106, 107, 109, 110, 111, 112, 113, 114, 116], "prevent": [49, 52, 54, 56], "tf32": [49, 52, 64, 71], "comput": [49, 64, 65, 66, 67, 71, 75, 82, 91, 94, 98, 110, 112], "inner": [49, 83, 112], "product": [49, 67, 76], "round": [49, 71, 76, 77, 95], "10": [49, 66, 67, 71, 72, 76, 77, 86, 88, 89, 91, 108, 109, 110, 111, 112, 113, 114, 115], "bit": [49, 60, 65, 66, 71, 76, 77, 89], "mantissa": [49, 71, 76, 77], "befor": [49, 54, 55, 56, 59, 60, 65, 71, 76, 77, 89, 110, 111, 113, 114], "multipli": [49, 71, 76, 77], "accumul": [49, 64, 71, 76, 77, 106, 107], "sum": [49, 65, 70, 71, 76, 77, 95, 108], "23": [49, 55, 71, 76, 77, 83], "behavior": [49, 56, 65, 71, 76, 77, 93, 106, 107, 114, 116, 117], "sparsiti": [49, 52, 65, 71, 76, 77], "conv": [49, 52, 89, 95], "fc": [49, 52, 55], "truncat": [49, 52, 63, 64, 71, 76, 77], "long": [49, 52, 53, 63, 76, 82, 83], "float": [49, 52, 63, 64, 70, 76, 88, 89, 90, 91, 92, 95, 96, 97, 100, 101, 104, 105, 115], "ishap": 49, "restrict": [49, 64, 71, 76, 77, 114], "cuda": [49, 58, 63, 65, 67, 68, 71, 72, 75, 76, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 102, 103, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 116, 117], "safeti": [49, 52, 76], "averag": [49, 52, 64, 71, 76, 77, 95], "time": [49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 64, 65, 66, 68, 69, 71, 72, 75, 76, 77, 80, 82, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "workspac": [49, 52, 64, 65, 66, 71, 72, 76, 77, 95, 101, 102, 104], "fast": [49, 52, 64, 68, 71, 76, 77], "softwar": [49, 52, 64, 71, 76, 77, 82], "manag": [49, 52, 53, 55, 57, 59, 60, 64, 66, 67, 71, 73, 75, 76, 77, 89, 105, 116], "ram": [49, 52, 64, 71, 76, 77], "commun": [49, 52, 64, 71, 76, 77, 89], "within": [49, 52, 57, 59, 64, 69, 71, 75, 76, 77, 80, 82, 98, 106, 107, 110, 112], "host": [49, 52, 64, 66, 71, 76, 77, 95, 109, 110, 111, 113], "share": [49, 52, 64, 66, 71, 75, 76, 77, 97], "across": [49, 52, 55, 56, 64, 71, 76, 77, 80], "metadata": [49, 52, 54, 58, 60, 64, 71, 76, 77, 80, 100, 114, 115], "quantizatiom": 49, "instead": [49, 52, 53, 54, 55, 66, 71, 75, 76, 89, 93, 100, 108, 116], "potenti": [49, 71, 76, 85], "subgraph": [49, 52, 53, 54, 55, 60, 62, 89, 95, 97, 118], "aten": [49, 54, 55, 56, 60, 61, 64, 69, 70, 71, 76, 77, 89, 93, 101, 118], "thrown": [49, 71, 76, 77], "empti": [49, 71, 72, 76, 77, 83, 88, 95], "torch_tensorrtnamespac": 50, "loggingenum": 50, "levelnamespac": 50, "ptqtemplat": 50, "int8cachecalibratortempl": 50, "int8calibratornamespac": 50, "torchscriptstruct": 50, "compilespecenum": 50, "enginecapabilitystruct": 50, "deviceclass": 50, "devicetypestruct": 50, "graphinputsstruct": 50, "inputclass": 50, "datatypeclass": 50, "cppdirectori": 50, "includedirectori": 50, "torch_tensorrtfil": 50, "hfile": 50, "relationship": 50, "inherit": [50, 65, 71, 91], "subdirectori": 51, "definit": [51, 54, 60, 82], "cli": [52, 90], "It": [52, 54, 55, 56, 57, 59, 60, 65, 66, 69, 76, 80, 82, 94, 95, 109, 112, 116, 118], "serv": [52, 58, 65, 69, 71, 76], "easi": [52, 53, 55, 89, 91], "wai": [52, 64, 65, 66, 88, 89, 91, 93, 95, 97, 100, 112, 116, 117], "command": [52, 64, 66, 82, 83, 88, 89, 110, 111, 113], "line": [52, 66, 83, 89, 99], "quickli": [52, 89, 91, 110, 111, 113], "part": [52, 56, 59, 65, 75, 80, 81, 82, 95, 97], "deploy": [52, 75, 89, 90, 91, 110, 111, 112, 113, 116, 119], "pipelin": [52, 89, 99, 103, 119], "basic": [52, 56, 65, 83, 110, 111, 113], "featur": [52, 56, 65, 66, 89, 91, 92, 103, 108, 109, 112, 118], "though": [52, 59, 60, 88, 89, 118], "alreadi": [52, 53, 54, 55, 89, 91, 93, 95, 98, 110, 111, 113, 114], "two": [52, 55, 60, 62, 64, 65, 66, 76, 82, 83, 87, 88, 90, 91, 93, 97, 110, 111, 113, 114], "embed": [52, 54, 58, 70, 77, 82, 119], "plan": [52, 59, 63, 64, 71], "after": [52, 53, 55, 56, 62, 65, 71, 75, 76, 88, 89, 90, 101, 104, 110, 111, 113, 116], "link": [52, 53, 62, 69, 80, 81, 86, 89, 95, 116], "against": [52, 89, 93], "libtorchtrt": [52, 66, 89], "python": [52, 56, 59, 62, 64, 65, 67, 71, 72, 75, 76, 77, 82, 83, 89, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 116, 119], "import": [52, 55, 56, 63, 64, 65, 66, 67, 68, 75, 80, 82, 88, 89, 90, 92, 93, 95, 96, 97, 99, 110, 111, 113, 114, 116, 117], "packag": [52, 55, 64, 67, 89], "aspect": 52, "ident": [52, 62, 64, 71, 76, 100], "standard": [52, 58, 66, 69, 71, 75, 76, 77, 82, 92, 93, 95, 99, 112, 116], "load": [52, 56, 58, 64, 65, 68, 71, 74, 75, 76, 77, 89, 90, 91, 92, 94, 95, 96, 97, 99, 100, 109, 110, 111, 112, 113, 116, 118], "like": [52, 53, 55, 58, 60, 65, 66, 68, 76, 81, 82, 88, 89, 90, 91, 93, 95, 97, 99, 100, 109, 110, 111, 113, 116], "would": [52, 54, 60, 64, 65, 66, 67, 75, 89, 90, 92, 93, 95, 110, 111, 113, 116], "input_file_path": [52, 119], "output_file_path": [52, 119], "input_spec": [52, 65, 72], "displai": [52, 62, 64, 73, 80, 116], "menu": [52, 80, 82], "verbios": 52, "v": [52, 67, 83, 108, 110, 111, 113], "verbos": [52, 64, 65, 71, 72, 83, 102, 104], "about": [52, 53, 58, 60, 66, 75, 80, 89, 110, 111, 113, 114], "process": [52, 56, 64, 76, 81, 82, 88, 91, 92, 100, 101, 105, 110, 111, 112, 113, 116], "onto": [52, 58], "consol": 52, "w": [52, 66, 76], "disabl": [52, 64, 66, 71, 75, 80, 81, 93, 97, 109, 116], "i": [52, 55, 60, 66, 68, 70, 82, 83, 88, 89, 91, 95, 96, 97, 106, 108], "debugg": [52, 71, 76, 77], "fallback": [52, 57, 59, 60, 100, 119], "model": [52, 56, 58, 63, 68, 71, 72, 73, 74, 76, 78, 88, 89, 90, 91, 92, 96, 97, 99, 114, 116, 118], "throw": [52, 55, 76, 89], "spars": [52, 54, 64, 70, 71], "p": [52, 70, 89, 110, 111, 113, 119], "repeat": [52, 70], "f32": [52, 71, 75, 76, 95], "half": [52, 64, 76, 82, 89, 90, 91, 92, 95, 101, 102, 106, 107, 109, 115, 119], "float16": [52, 76, 95, 99, 103, 115], "f16": [52, 76, 89, 110, 111, 113, 119], "i8": [52, 76], "d": [52, 67, 76, 82, 83, 89, 119], "multi": [52, 75], "dlacor": 52, "avail": [52, 54, 60, 62, 64, 65, 66, 67, 71, 75, 76, 80, 95, 109, 112, 118, 119], "dla_standalon": [52, 76], "file_path": [52, 76, 94, 117], "teo": 52, "op_nam": 52, "op": [52, 53, 54, 55, 56, 57, 59, 60, 62, 63, 64, 75, 76, 89, 93, 101, 116, 118], "partial": [52, 82], "tem": 52, "module_nam": 52, "mod": [52, 56, 65, 71, 86, 89, 91, 115], "mb": [52, 78], "num_op": 52, "block": [52, 53, 55, 56, 64, 71, 86, 118], "treat": 52, "num": 52, "avg": 52, "num_it": 52, "sram": 52, "local": [52, 55, 66, 67, 80, 89], "dram": 52, "atol": 52, "absolut": [52, 66], "toler": 52, "threshold": 52, "numer": [52, 65, 83], "deviat": 52, "1e": [52, 99, 100], "rtol": 52, "rel": [52, 56], "5": [52, 56, 58, 59, 64, 65, 66, 67, 71, 75, 76, 82, 83, 86, 88, 89, 93, 95, 99, 101, 109, 110, 111, 113], "skip": 52, "complianc": 52, "64bit": [52, 94], "32bit": 52, "custom": [52, 62, 63, 65, 66, 69, 98, 106, 107, 110], "dll": 52, "n": [52, 60, 62, 76, 89, 91, 93, 95, 96], "min_n": 52, "min_c": 52, "min_h": 52, "min_w": 52, "opt_n": 52, "opt_c": 52, "opt_h": 52, "opt_w": 52, "max_n": 52, "max_c": 52, "max_h": 52, "max_w": 52, "32": [52, 76, 88, 89, 90, 91, 106, 107, 108, 119], "flag": [52, 56, 57, 59, 64, 66, 71, 74, 76, 90, 105, 106, 107, 116, 117], "forc": [52, 63, 65, 71, 76, 77, 80], "posit": [52, 54, 65, 76, 80], "test": [52, 56, 59, 65, 66, 67, 71, 76, 82, 83, 91, 108, 110, 111, 112, 113], "ssd_trace": 52, "pt": [52, 65, 89, 106, 107, 110, 111, 113], "ssd_trt": 52, "300": [52, 92], "512": [52, 71, 76, 77, 108, 112], "1024": [52, 71, 76, 77, 106, 112], "simplifi": [53, 95], "form": [53, 75, 76, 82, 90, 110, 111, 113], "up": [53, 55, 56, 57, 58, 59, 62, 65, 66, 71, 76, 82, 88, 93, 95, 97, 100, 101, 104, 109, 112], "context": [53, 57, 58, 59, 64, 73, 75, 93, 105, 116], "inetworkdefinit": [53, 54], "record": [53, 88, 96, 97, 105, 116], "togeth": [53, 60, 89], "start": [53, 56, 65, 70, 74, 76, 83, 89, 92, 95, 96, 97, 112], "look": [53, 54, 55, 68, 71, 76, 88, 91, 92, 93, 97, 110, 111, 113, 114], "assembl": [53, 62, 89], "resourc": [53, 91, 95], "coupl": [53, 59, 65, 116], "state": [53, 54, 60, 62, 75, 89, 93, 99], "been": [53, 60, 64, 66, 67, 74, 83, 89, 94, 97, 100, 118], "evaluated_value_map": [53, 60], "stage": [53, 65], "arg": [53, 54, 62, 65, 71, 74, 75, 76, 86, 89, 93, 94, 95, 97, 108, 112], "itensor": [53, 54, 60, 65, 89, 93, 95], "value_tensor_map": [53, 60], "typic": [53, 60, 76, 110, 111, 113], "abl": [53, 55, 60, 62, 65, 91, 92, 95, 100], "system": [53, 60, 62, 64, 69, 71, 75, 76, 77, 93, 94, 95, 97, 100, 118], "registri": [53, 54, 89, 95], "enter": [53, 76], "recurs": 53, "resolv": [53, 55, 57, 59, 101, 104], "until": [53, 56, 59, 60, 66, 71, 76, 118], "final": [53, 56, 57, 59, 66, 93, 95, 101, 104, 112], "some": [53, 54, 55, 56, 57, 58, 59, 60, 62, 63, 64, 65, 66, 71, 76, 81, 82, 89, 91, 93, 95, 97, 114, 118], "These": [53, 54, 56, 58, 62, 64, 66, 71, 74, 75, 76, 80, 82, 91, 93, 110, 111, 113, 118], "those": [53, 54, 62, 64, 82], "do": [53, 54, 55, 56, 60, 63, 65, 81, 83, 88, 89, 90, 91, 93, 95, 106, 107, 119], "theori": [53, 82], "kind": [53, 65], "common": [53, 55, 65, 72, 82, 93, 97], "prim": [53, 55, 56, 58, 70, 88, 89], "constant": [53, 54, 55, 56, 89, 95], "emit": 53, "listconstruct": [53, 56, 58, 89], "make": [53, 54, 65, 66, 67, 71, 76, 82, 84, 89, 90, 91, 95, 97, 110, 111, 112, 113, 119], "associ": [53, 60, 89, 97, 116], "where": [53, 54, 55, 60, 62, 64, 65, 71, 75, 76, 77, 83, 89, 91, 93, 100], "result": [53, 55, 56, 66, 68, 71, 73, 75, 76, 77, 80, 88, 90, 94, 95, 99, 100, 109, 110, 111, 113, 115, 118], "done": [53, 56, 59, 95, 100, 110, 111, 113, 117], "mai": [53, 54, 56, 58, 59, 65, 66, 71, 75, 76, 77, 82, 83, 88, 89, 90, 91, 93, 95, 100, 101, 104, 109, 110, 111, 113, 116], "For": [53, 56, 62, 63, 64, 65, 66, 68, 72, 76, 80, 82, 83, 88, 89, 91, 92, 93, 95, 99, 101, 108, 110, 111, 112, 113, 116, 117], "more": [53, 64, 65, 66, 67, 69, 71, 76, 80, 83, 88, 89, 90, 91, 92, 95, 97, 99, 102, 104, 110, 111, 113, 116], "writing_convert": [53, 89], "locat": [54, 62, 66, 91, 93, 95], "py": [54, 55, 59, 62, 65, 66, 67, 78, 80, 82, 87, 88, 89, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114], "convers": [54, 55, 56, 58, 63, 64, 65, 71, 76, 77, 89, 93, 95, 112, 114], "decror": 54, "dynamo_tensorrt_convert": [54, 93, 95], "signatur": [54, 77], "leaky_relu": [54, 70], "def": [54, 62, 65, 82, 88, 90, 93, 95, 96, 97, 101, 108, 109, 110, 111, 113, 114, 115], "leaky_relu_convert": 54, "ctx": [54, 60, 89, 93, 95, 109], "conversionctx": [54, 60, 89, 93], "tupl": [54, 58, 63, 65, 71, 72, 75, 76, 77, 90, 93, 95, 97, 100, 114, 115], "kwarg": [54, 65, 71, 74, 75, 76, 93, 95, 112], "dict": [54, 71, 75, 76, 77, 93, 95, 97], "union": [54, 60, 64, 71, 75, 76, 77, 89, 93], "sequenc": [54, 62, 65, 71, 72, 75, 76, 77, 82, 93, 95, 109, 112], "decor": [54, 62, 65, 93], "kei": [54, 82, 88, 97, 110, 111, 113, 114], "node": [54, 55, 56, 57, 59, 60, 62, 64, 65, 71, 72, 89, 93, 95, 108, 112, 114], "capability_valid": [54, 93], "lambda": [54, 60, 82, 89, 93, 110, 111, 113], "fx": [54, 62, 63, 71, 75, 76, 89, 90, 93, 95, 100, 117], "determin": [54, 55, 64, 65, 76, 93, 109, 114, 116], "properli": [54, 66], "handl": [54, 55, 56, 58, 64, 65, 75, 76, 95], "partition": [54, 71, 76, 95], "sure": [54, 66, 67, 89, 90, 110, 111, 113, 119], "prioriti": [54, 93], "develop": [54, 65, 66, 67, 69, 82, 83, 89, 93, 95], "bodi": [54, 82, 83], "nativ": [54, 59, 61, 89, 93, 95, 100], "numpi": [54, 76, 95, 96, 97, 99, 100, 109, 110, 111, 113], "frozen": 54, "attribut": [54, 55, 56, 58, 65, 76, 82, 89], "previou": [54, 80, 101], "correspond": [54, 60, 65, 66, 75, 76, 93, 97, 99, 108, 116], "edg": [54, 82], "well": [54, 63, 66, 69, 73, 75, 82, 89, 91, 93, 97, 105, 117], "being": [54, 65, 66, 71, 89, 93, 95, 100], "truth": 54, "http": [54, 61, 64, 66, 67, 80, 82, 88, 89, 91, 93, 95, 99, 101, 104, 108, 110, 111, 112, 113, 114, 116], "github": [54, 61, 64, 66, 67, 80, 89, 91, 101, 104, 108, 110, 111, 113, 116], "com": [54, 61, 64, 66, 67, 89, 91, 99, 101, 104, 108, 110, 111, 113, 116], "blob": [54, 61, 66, 80, 91, 97], "main": [54, 55, 56, 57, 58, 59, 60, 63, 65, 66, 80, 82, 84, 89, 93, 95, 106, 108], "src": [54, 58, 61, 70], "native_funct": [54, 61], "yaml": [54, 61], "sinc": [54, 55, 64, 65, 67, 75, 82, 88, 89, 91, 93, 96, 97, 100], "mani": [54, 56, 64, 65, 80, 82, 83, 93, 97, 100, 118], "composit": [54, 89], "raw": [54, 80, 93], "impl": [54, 93], "subpackag": 54, "chain": [54, 60], "primarili": [54, 59, 66, 89, 93], "manipul": [54, 62, 76], "net": [54, 60, 82, 83, 89, 95, 110, 111, 113], "addit": [54, 55, 64, 65, 75, 76, 89, 93, 95, 97, 100, 112, 114], "call_modul": 54, "call_funct": [54, 62, 65], "eg": [54, 110, 111, 113, 115], "aten_": 54, "_leaky_relu": 54, "opoverloadpacket": 54, "while": [54, 56, 66, 75, 91, 93, 99, 109, 110, 111, 112, 113, 116, 118], "opoverload": 54, "particular": [54, 64, 97], "collect": [54, 56, 64, 71, 76, 77, 89, 90, 108], "trtinterpret": [54, 65, 72], "along": [54, 76], "match": [54, 55, 93, 100], "special": [54, 56], "account": [54, 110, 111, 113], "illustr": [54, 65, 102, 106, 107, 112], "scale_grad_by_freq": [54, 70], "embedding_param_valid": 54, "establish": 54, "subset": [54, 64, 71, 76, 91, 112], "converter_util": [54, 95], "enforce_tensor_typ": 54, "dictionari": [54, 76, 77, 92, 101], "between": [54, 55, 56, 60, 66, 76, 82, 83, 91, 97, 99, 106, 109], "possibl": [54, 66, 82, 93, 95, 97, 110, 111, 112, 113], "prefer": [54, 64, 66, 89], "keyword": [54, 62, 71, 75, 76, 77, 93, 101, 104], "both": [54, 56, 64, 66, 69, 71, 72, 75, 76, 80, 82, 88, 91, 93, 95, 97, 110, 111, 113], "enforc": [54, 89], "situat": 54, "partit": [54, 55, 63, 64, 71, 76, 93, 118], "greater": [54, 71, 73, 76], "than": [54, 55, 64, 66, 71, 76, 81, 82, 93, 96, 97, 99, 109, 112, 116], "3d": [54, 65], "autocast": 54, "therebi": [54, 58, 95, 112], "limit": [54, 55, 73, 81, 91, 94, 97, 98, 109, 110, 118], "author": [54, 83], "conv_nod": 54, "7": [54, 56, 58, 59, 75, 76, 86, 89, 95, 101, 102, 104, 108, 114], "ignor": [54, 64, 71, 75, 76, 95], "misc": [54, 95], "trttensor": 54, "np": [54, 93, 95, 96, 97, 99, 100, 109, 110, 111, 113], "ndarrai": [54, 95], "aten_ops_convolut": 54, "conversioncontext": [54, 93, 95], "side": [54, 55, 80, 89, 93], "effect": [54, 55, 64, 65, 71, 80, 89, 91, 93, 95, 112], "term": [54, 76, 82, 83, 91, 93, 95, 112], "getitem": 54, "categor": 54, "modif": [54, 62, 76], "op_evalu": 54, "capbility_valid": 54, "opcod": 54, "decompos": 54, "suboper": 54, "separ": [54, 56, 57, 59, 66], "Such": 54, "via": [54, 64, 65, 67, 69, 71, 75, 76, 77, 80, 86, 90, 91, 101, 102, 104, 106, 107, 112, 114, 116, 117, 118], "register_torch_trt_decomposit": 54, "addmm_replac": 54, "replac": [54, 56, 62, 66, 67, 74, 95, 108, 118], "input_": 54, "mat1": 54, "mat2": [54, 70], "beta": [54, 65, 70, 77], "alpha": [54, 65, 70, 83], "mul": [54, 56, 70, 93], "matmul": [54, 55, 64, 70, 71, 89, 106, 107, 114], "modifi": [54, 56, 62, 65, 83, 99, 114], "edit": [54, 66, 80], "torch_enabled_decomposit": 54, "torch_disabled_decomposit": 54, "disjoint": 54, "preced": [54, 82], "over": [54, 57, 59, 65, 82, 108, 109, 110, 111, 113, 118], "much": [54, 60, 80, 82, 91], "significantli": [54, 55, 80, 97], "easier": [54, 57, 59, 60, 65, 71, 75, 76, 89, 91, 95, 99], "tri": 54, "made": [55, 57, 59, 76, 82], "represent": [55, 60, 65, 88, 112, 118], "instanc": [55, 62, 64, 66, 71, 74, 75, 88, 89, 93, 112, 116], "idea": [55, 82, 93], "reduc": [55, 56, 57, 59, 65, 71, 76, 91, 95, 97, 112, 116], "actual": [55, 58, 60, 65, 88, 89, 95], "aim": [55, 118], "closer": 55, "scope": [55, 95, 101, 104], "csrc": [55, 61], "common_subexpression_elimin": 55, "subexpress": 55, "dead_code_elimin": 55, "exception_elimin": 55, "wa": [55, 58, 62, 64, 65, 71, 75, 76, 82, 89, 93, 94, 118], "1013": 55, "ne": [55, 70], "1012": 55, "24": [55, 67, 110, 111, 113], "lib": [55, 66, 67, 89], "python3": [55, 66, 89], "site": [55, 66, 82, 89], "nn": [55, 61, 65, 71, 72, 75, 76, 77, 88, 89, 90, 93, 95, 101, 108, 114, 115, 118], "batchnorm": 55, "248": 55, "11": [55, 66, 82, 86, 89, 110, 111, 113], "block0": 55, "raiseexcept": 55, "249": 55, "12": [55, 56, 67, 82, 86, 88, 89, 102, 110, 111, 113, 114], "block1": 55, "guard_elimin": 55, "whose": [55, 65, 102], "freeze_modul": 55, "propag": 55, "fuse_addmm_branch": 55, "variant": [55, 116], "caught": 55, "ret": 55, "622": 55, "self": [55, 58, 60, 70, 75, 76, 88, 89, 90, 93, 95, 97, 101, 108, 112, 114, 115, 119], "bia": [55, 70, 89, 108], "x9": 55, "3677": 55, "output0": [55, 110, 111, 113, 115], "add_": [55, 70, 89, 93], "fuse_linear": 55, "back": [55, 56, 58, 59, 75, 76, 82, 88, 89, 95, 118], "fuse_flatten_linear": 55, "implicitli": [55, 76], "connect": [55, 71, 76, 77, 82, 99, 110, 111, 113, 119], "higher": [55, 64, 71, 76, 80, 82, 88, 109], "1d": 55, "lower_graph": 55, "access": [55, 60, 65, 80, 89, 92, 118], "rather": 55, "getattr": [55, 58, 88, 89], "trainabl": 55, "remain": [55, 76, 91, 118], "lower_tupl": 55, "lowersimpletupl": 55, "tupleconstruct": [55, 58], "tupleunpack": 55, "leav": [55, 62, 64, 71], "statement": [55, 82, 93], "loweralltupl": 55, "_all_": 55, "rais": [55, 65, 76, 94], "onnx": 55, "module_fallback": 55, "consist": [55, 65, 82, 95, 116, 118], "pair": [55, 60, 66, 82, 91, 112], "delimit": 55, "around": [55, 58, 60, 64, 66, 71, 75, 82, 85, 88, 95], "second": [55, 65, 82, 90, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "mark": [55, 56, 80, 97], "notatemoduleforfallback": 55, "marknodesforfallback": 55, "tell": [55, 56, 57, 58, 59, 60, 82, 118], "them": [55, 56, 58, 63, 64, 65, 66, 71, 75, 80, 89, 95, 97, 107, 112, 114, 118], "peephole_optimz": 55, "intent": [55, 82], "catch": [55, 76, 89], "small": [55, 95, 96, 110, 111, 113], "might": [55, 66, 80, 100, 114], "interest": [55, 82], "now": [55, 56, 59, 60, 65, 66, 76, 82, 89, 92, 93, 95, 97, 100, 109, 115, 116], "expand": [55, 70], "simpli": [55, 101, 112], "remove_contigu": 55, "remove_dropout": 55, "infer": [55, 64, 65, 71, 76, 77, 89, 91, 94, 100, 101, 109, 112, 114, 116, 117, 118], "remove_to": 55, "unpack_addmm": 55, "reus": [55, 65, 71, 91, 97], "dedic": [55, 83], "unpack_log_softmax": 55, "softmax": [55, 65, 70, 108], "loop_unrol": 55, "suffici": [55, 66, 76], "short": [55, 64, 71, 82, 83, 100], "tile_to_repeat": 55, "instruct": [56, 57, 59, 65, 66, 89, 110, 111, 113], "criteria": [56, 57, 59, 64], "lack": [56, 57, 59, 65, 95, 109], "explicitli": [56, 57, 59, 66, 77, 90, 91, 92, 106, 107, 115], "On": 56, "segment": [56, 63, 95, 102, 104, 112], "verifi": [56, 71, 93, 95, 100], "Then": [56, 91, 92, 100, 110, 111, 113], "roughli": [56, 110, 111, 113], "analysi": 56, "everi": [56, 72, 75, 76, 89, 116], "complet": [56, 63, 71, 76, 88, 89], "mean": [56, 60, 64, 65, 70, 71, 72, 101, 109, 110, 111, 113, 118], "trace": [56, 65, 71, 75, 77, 88, 89, 110, 111, 113, 114, 117, 118], "tensorlist": [56, 60], "figur": [56, 83, 85], "our": [56, 59, 63, 88, 89, 110, 111, 113], "stitch": [56, 89], "altogeth": [56, 80], "brief": 56, "descript": [56, 83, 94, 108], "partitioninfo": 56, "api": [56, 59, 60, 62, 63, 64, 65, 75, 76, 77, 81, 89, 90, 91, 92, 95, 101, 102, 105, 109, 110, 111, 112, 113, 114, 116, 117], "maintain": [56, 58, 60, 76, 99, 118], "code": [56, 59, 62, 64, 65, 66, 81, 83, 88, 89, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 114], "mymodel": [56, 63, 68, 90, 95, 114, 117], "ts_model": [56, 89], "trt_model": [56, 92, 95, 102, 106, 107, 108, 109, 110, 111, 113, 117], "off": [56, 58, 105], "consecut": [56, 63], "satisfi": [56, 62, 65], "forced_fallback_op": 56, "randn": [56, 63, 68, 71, 76, 77, 89, 92, 93, 97, 102, 105, 114, 115, 117], "224": [56, 63, 68, 71, 72, 76, 77, 89, 94, 97, 99, 100, 102, 105, 110, 111, 112, 113, 114, 117], "trt_ts_modul": [56, 90], "input_s": 56, "inputrang": 56, "cfg": [56, 89], "relu": [56, 70, 88, 89, 101, 108], "trt_mod": [56, 68, 89, 91, 119], "consid": [56, 77, 89, 95, 115], "segmentmodelwithdependencyawar": 56, "test_segment": 56, "20": [56, 67, 86, 100, 102, 104], "x_lgamma": 56, "lgamma": 56, "y_lgamma": 56, "div": [56, 70], "div_lgamma": 56, "27": [56, 89], "cat": [56, 66, 67, 70, 108, 109], "greedi": [56, 106, 107, 109], "strategi": [56, 76], "travers": [56, 57, 59, 64], "gather": 56, "same": [56, 58, 62, 64, 65, 66, 71, 76, 80, 82, 88, 89, 92, 94, 95, 97, 100, 102, 104, 110, 111, 113, 114, 116, 117], "encount": [56, 64, 66, 93, 101, 104], "4": [56, 58, 63, 64, 65, 66, 70, 76, 78, 80, 82, 83, 86, 89, 95, 101, 103, 104, 105, 108, 114], "suboptim": 56, "arithmet": 56, "split": [56, 65, 70], "own": [56, 60, 64, 66, 71, 82, 89, 97, 108, 110, 111, 113], "could": [56, 64, 65, 95, 102, 104, 116], "rewrit": [56, 62], "portion": [56, 82, 95, 103], "without": [56, 60, 68, 71, 80, 82, 89, 91, 95, 96, 97, 100, 116], "reorder": 56, "seri": 56, "cleanli": 56, "approach": [56, 97], "achiev": [56, 112], "hit": 56, "larger": [56, 71, 76, 80, 109, 112], "boundari": [56, 74, 76], "guarante": [56, 75], "trigger": [56, 64, 65, 76, 89, 97, 99, 100, 118], "appear": [56, 82], "adjac": [56, 71, 76, 82], "As": [56, 65, 66, 76, 89, 93, 95, 97, 100, 118], "clean": [56, 62, 82, 101, 104], "step": [56, 65, 67, 70, 76, 91, 95, 100, 112], "consolid": [56, 88], "further": [56, 64, 65, 116, 118], "merg": 56, "identifi": 56, "do_not_merg": 56, "combin": [56, 64, 65], "condit": [56, 82, 118], "loop": [56, 64, 65, 106, 107], "ir": [57, 59, 60, 63, 64, 68, 71, 76, 88, 89, 90, 98, 101, 102, 104, 105, 110, 114], "larg": [57, 59, 80, 82, 89, 91, 100, 109, 112], "opset": [57, 59, 93], "compon": [57, 59, 66, 67, 74, 88, 116, 118], "evalu": [57, 58, 59, 108], "deploi": [57, 59, 69, 71, 89, 91, 98, 110, 111, 113], "instanti": [57, 58, 59, 60, 89, 103], "wrap": [57, 58, 59, 65, 82, 85, 89, 92, 101, 104], "extend": [57, 59, 60, 70, 89, 97, 112], "providi": [57, 59], "stand": [58, 82], "interpret": [58, 65, 82], "execute_engin": [58, 75, 89], "stack": [58, 70, 91, 108, 118], "machin": [58, 66, 91, 94, 110, 111, 113], "pop": 58, "push": 58, "element": [58, 65, 82, 83, 86], "realiz": 58, "abstract": [58, 60, 83, 93], "__torch__": [58, 88, 89], "portabl": [58, 66, 77], "serializ": [58, 64, 88, 118], "instnanti": 58, "whatev": [58, 65, 95], "self_1": [58, 89], "torchvis": [58, 91, 92, 94, 97, 99, 100, 102, 105, 108, 110, 111, 113], "resnet": [58, 69, 78, 94, 98, 99, 110, 111, 112, 113], "___torch_mangle_4847": 58, "resnet_trt": 58, "input_0": [58, 89], "__torch___torchvision_models_resnet____torch_mangle_4847_resnet_trt_engin": 58, "listunpack": [58, 89], "multipl": [58, 66, 71, 75, 76, 82, 83, 91, 109, 110, 111, 113, 116], "repack": 58, "ssd": 58, "ssd300_trt": 58, "__torch___pytorch_detection_ssd_src_model_ssd300_trt_engin": 58, "holder": [58, 84], "torchbind": 58, "pickler": 58, "seril": 58, "zip": [58, 66, 99, 100, 110], "depickl": 58, "encod": [58, 112], "sm": 58, "correct": [58, 66, 80, 98, 99, 100, 108, 110, 111, 113], "bazel": [59, 66, 67], "linux": [59, 64, 67, 71, 89, 94], "x86_64": [59, 66], "aarch64": 59, "gcc": [59, 89], "untest": 59, "try": [59, 76, 82, 83, 89, 92, 95, 97, 110, 111, 113, 118], "older": 59, "repositori": [59, 66, 80, 87, 110, 111, 113], "notebook": [59, 69, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "doc": [59, 61, 66, 67, 80, 81, 82, 87, 93, 95, 114], "docsrc": 59, "third_parti": [59, 66], "toolchain": [59, 66, 67], "unstabl": 59, "subject": [59, 62, 118], "matur": 59, "most": [59, 65, 66, 72, 95, 100, 110, 111, 113, 116, 118], "hood": [59, 102, 118], "major": [59, 65, 76], "top": [59, 80, 84], "coordin": [59, 76], "ingest": 59, "flow": [60, 65, 82, 88, 112], "ilay": 60, "analogu": 60, "goal": [60, 64, 97], "registernodeconversionpattern": [60, 89], "helper": [60, 93], "pattern": [60, 76, 89, 109], "schema": [60, 89, 93, 95], "caus": [60, 64, 80, 101, 102, 104, 109, 116], "acthardtanh": 60, "torchtrt_unus": 60, "hardtanh": [60, 70], "scalar": [60, 70], "min_val": [60, 70], "max_val": [60, 70], "unwraptodoubl": 60, "new_lay": 60, "addactiv": 60, "activationtyp": [60, 65], "kclip": 60, "torchtrt_check": 60, "unabl": [60, 89, 95], "setalpha": 60, "setbeta": 60, "setnam": [60, 89], "util": [60, 62, 74, 77, 89, 91, 101, 104, 106, 107, 108, 109, 110, 111, 112, 113, 118], "node_info": [60, 89], "c_str": [60, 89], "out_tensor": [60, 89], "associatevalueandtensor": [60, 89], "getoutput": [60, 89], "log_debug": 60, "getdimens": [60, 89], "accord": [60, 64, 77, 94], "unwrap": 60, "tool": [60, 64, 65, 66, 89, 93, 97, 112], "don": [60, 65, 80, 82, 83, 91, 93, 108, 110, 111, 113, 114], "annot": [60, 89], "your": [60, 63, 64, 66, 67, 68, 75, 80, 82, 83, 87, 88, 89, 90, 92, 97, 114, 116], "Its": [60, 82], "track": [60, 91], "sort": [60, 70, 92], "live": [60, 82], "directli": [60, 62, 63, 66, 69, 74, 76, 91, 93, 95, 101, 117], "associatevalueandivalu": 60, "inspect": [60, 88, 89], "dataflow": [60, 89], "mechan": [60, 64, 65, 95, 100, 112], "safe": [60, 64, 71, 75, 76, 77], "unsur": 60, "deep": [60, 64, 69, 80, 91, 95, 119], "straight": 60, "chanc": 60, "none": [60, 64, 65, 70, 71, 72, 74, 75, 76, 77, 80, 82, 93, 95, 97, 101, 108, 109], "wrapper": [60, 65, 117], "similar": [60, 63, 64, 65, 66, 89, 92, 95, 106, 107], "tocustomclass": 60, "tensorcontain": 60, "istensor": 60, "iscustomclass": 60, "lot": [60, 63], "singular": 60, "becaus": [60, 65, 66, 72, 88, 89, 93, 95, 96, 97, 109, 115], "alloc": 60, "freed": 60, "destructor": 60, "destroi": [60, 83], "realli": 60, "think": [60, 82], "becom": [60, 66, 99], "benefit": [60, 89, 97, 109], "deal": [60, 97], "quit": [60, 66, 89, 112], "effici": 60, "batch_norm": [60, 70], "fusion": [60, 62, 65], "deeplearn": [61, 65, 67], "sdk": [61, 67, 110, 111, 113, 118], "matrix": 61, "html": [61, 66, 67, 82, 88, 91, 93, 95, 114], "c_api": 61, "python_api": 61, "org": [61, 66, 80, 82, 88, 89, 91, 93, 95, 114, 116], "stabl": [61, 67, 69, 77, 78, 80, 98, 110, 114], "master": [61, 66, 91, 116], "overview": [61, 69, 101, 105], "md": 61, "appli": [62, 63, 91, 100], "desir": [62, 71, 83, 91, 97], "coalesc": 62, "insert": [62, 64, 71, 89, 91, 93, 97, 100], "graphmodul": [62, 63, 71, 72, 76, 89, 90, 95, 100, 117, 118], "caller": 62, "invok": [62, 64, 65, 88, 89, 116], "lint": 62, "recompil": [62, 71, 76, 93, 97, 100, 104, 114, 118], "repair": 62, "disallow": 62, "repair_input_as_output": 62, "gm": [62, 71], "sample_input": [62, 65, 101], "scenario": [62, 64, 99, 109], "clone": [62, 66, 70, 95], "modified_graph": 62, "extract": [62, 89, 112], "placehold": [62, 93], "isinst": [62, 65, 95, 108], "issubclass": 62, "direct": [62, 86, 100, 116], "len": [62, 70, 95], "direct_output": 62, "inserting_aft": 62, "cloned_placehold": 62, "replace_input_with": 62, "date": [62, 83, 118], "eliminate_dead_cod": 62, "logger": [62, 73], "f": [62, 64, 65, 67, 76, 82, 88, 93, 94, 95, 108, 109], "__init__": [62, 75, 76, 82, 88, 93, 95, 97, 101, 108, 114, 115], "pass_manag": 62, "passmanag": 62, "backend": [62, 68, 69, 77, 78, 81, 92, 96, 97, 98, 101, 108, 110, 111, 113, 114], "offer": [62, 64], "registr": [62, 65], "conveni": [62, 91, 104, 112, 116, 118], "control": [62, 65, 88, 100, 109, 116], "_aten_lowering_pass": 62, "my_custom_pass": 62, "front": [62, 71], "passlist": 62, "arbitrari": [62, 75], "remov": [62, 63, 71, 80, 96, 97, 108], "dump_lowering_pass": 62, "apply_lowering_pass": 62, "graph_modul": [62, 71], "_remove_lowering_pass": 62, "evolv": 62, "introduc": [63, 65, 112], "exportedprogram": [63, 68, 71, 76, 100, 106, 107, 109, 114, 118], "dynamo": [63, 64, 66, 68, 74, 75, 76, 78, 89, 93, 94, 95, 96, 97, 98, 100, 101, 102, 104, 105, 108, 109, 110, 111, 113, 114, 115], "frontend": [63, 71, 74, 90, 95, 98, 102, 104, 108, 110, 111, 113, 114], "simpl": [63, 64, 65, 82, 83, 88, 110, 111, 112, 113, 114], "usag": [63, 65, 69, 74, 78, 82, 89, 98, 109, 110, 114, 117], "eval": [63, 68, 89, 90, 93, 94, 96, 97, 99, 100, 101, 102, 104, 105, 106, 107, 108, 109, 110, 111, 113, 114, 115, 117], "exp_program": [63, 97, 100, 108, 114], "trt_gm": [63, 68, 97, 100, 114, 115, 117], "interact": [63, 82, 99, 101, 102, 103, 104, 105], "ideal": 63, "discuss": [63, 64, 110, 111, 113], "section": [63, 65, 80, 82, 83, 84, 86, 89, 91, 110, 111, 113, 117], "frequent": 63, "builder": [63, 64, 65, 71], "respect": [63, 64, 66, 71, 76, 106, 107, 115], "releas": [63, 64, 67, 82], "insid": [63, 82, 95], "decomposit": [63, 64, 71, 76, 95], "downstream": [63, 112], "constraint": [63, 109], "guid": [64, 81], "present": [64, 100], "learn": [64, 66, 69, 89, 91, 95, 110, 111, 113, 119], "acceler": [64, 72, 76, 116, 118, 119], "workflow": [64, 65, 68, 69, 71, 72, 76, 89, 92, 97, 98, 99, 102, 103, 104, 106, 107, 110, 112], "wide": [64, 76, 86], "varieti": [64, 110, 111, 113], "primari": [64, 93, 97, 117], "simplic": 64, "optimized_model": [64, 68, 96, 101, 102, 104], "depth": [64, 80, 112], "challeng": [64, 99, 110, 111, 113], "addition": [64, 95], "fit": [64, 82], "compilationset": [64, 71, 75, 93, 95, 101], "_enum": [64, 71], "callabl": [64, 71, 76], "pass_through_build_failur": [64, 71, 75, 76, 95], "max_aux_stream": [64, 71, 75, 76, 95], "version_compat": [64, 71, 75, 76, 95], "optimization_level": [64, 71, 75, 76, 95, 101], "use_python_runtim": [64, 71, 75, 76, 95, 96, 97, 99, 100, 101], "truncate_doubl": [64, 71, 75, 76, 95, 96, 106, 107, 109], "use_fast_partition": [64, 71, 75, 76, 95], "enable_experimental_decomposit": [64, 71, 75, 76, 95], "_devic": [64, 71], "assume_dynamic_shape_support": [64, 71, 75, 76], "engine_cap": [64, 71, 75, 76, 95], "dryrun": [64, 71, 75, 76, 95], "hardware_compat": [64, 71, 75, 76, 95], "timing_cache_path": [64, 71, 75, 76, 97], "tmp": [64, 71, 75, 76, 89, 96], "torch_tensorrt_engine_cach": [64, 71, 75, 76], "timing_cach": [64, 65, 71, 75, 76], "bin": [64, 66, 67, 71, 75, 76], "lazy_engine_init": [64, 71, 75, 76], "cache_built_engin": [64, 71, 75, 96, 97], "reuse_cached_engin": [64, 71, 75, 96, 97, 100], "use_explicit_typ": [64, 71, 75, 106, 107, 109, 115], "use_fp32_acc": [64, 71, 75, 106, 107], "refit_identical_engine_weight": [64, 71, 75], "strip_engine_weight": [64, 71, 75], "immutable_weight": [64, 71, 75, 76, 96, 97, 99, 100], "enable_weight_stream": [64, 71, 75, 109], "enable_cross_compile_for_window": [64, 71, 75], "dpython": [64, 71, 76, 77], "per": [64, 71, 95, 116], "regardless": [64, 71, 83, 102, 104], "fail": [64, 71, 76, 89, 99, 100, 108, 119], "auxiliari": [64, 71], "stream": [64, 69, 71, 76, 95, 98, 110], "impli": [64, 71], "longer": [64, 66, 71, 76, 80, 94, 116], "search": [64, 69, 71, 76, 80], "strictli": [64, 71], "runtim": [64, 66, 68, 69, 71, 76, 89, 93, 98, 99, 101, 104, 105, 109, 110, 118], "presenc": [64, 71], "preferenti": [64, 71], "choos": [64, 65, 71, 88], "float64": [64, 71, 76, 77], "toggl": [64, 71, 76], "mode": [64, 65, 71, 75, 76, 90, 91, 93, 105, 108], "detail": [64, 65, 67, 71, 88, 89, 95, 97, 110, 111, 113, 116], "natur": [64, 71, 82], "architectur": [64, 66, 69, 71, 76, 94, 97, 112], "amper": [64, 71, 76], "newer": [64, 66, 71, 76], "storag": [64, 71, 91], "use_strong_typ": [64, 71], "strong": [64, 71, 82], "mix": [64, 69, 71], "happen": [64, 65, 71, 88, 99, 102, 114], "strip": [64, 71], "non": [64, 66, 71, 76, 83, 85, 116], "refitt": [64, 71, 76, 97], "were": [64, 71, 95, 100, 116], "cross": [64, 71, 82, 98, 110], "window": [64, 71, 82], "sub": [64, 70, 82, 88, 101], "slate": 64, "futur": [64, 65, 71, 76, 77, 116], "occur": [64, 109], "first_output": 64, "subsequ": [64, 97], "second_output": 64, "session": [64, 68, 82, 97, 105], "point": [64, 66, 76, 80, 81, 82, 89, 108, 110, 111, 113], "cover": [64, 93], "benchmark": [64, 70], "automat": [64, 67, 76, 82, 89, 100, 114, 118], "vari": [64, 72, 109, 114], "distribut": [64, 67, 89, 91, 109, 116], "inf": 64, "dynamo_convers": 64, "contribut": 64, "demonstr": [64, 82, 83, 84, 91, 93, 95, 97, 99, 108, 110, 111, 112, 113], "break": [64, 65, 71, 75, 76, 82, 95, 107], "successfulli": [64, 94, 99, 100], "_dynamo": [64, 96, 97, 101, 102, 104, 114], "explain": [64, 65, 69], "veri": [64, 65, 83, 84, 91, 92, 106, 107, 110, 111, 113], "explan": [64, 65], "graph_break_count": 64, "furthermor": 64, "durat": [64, 82], "latter": [64, 75], "logic": [64, 65, 93], "guard": 64, "compos": [65, 88, 91, 93, 108, 110, 111, 113], "variou": [65, 119], "etc": [65, 80, 82, 95, 119], "environ": [65, 68, 71, 110, 111, 113], "research": 65, "few": [65, 66, 76, 93], "nightli": 65, "lower_exampl": 65, "welcom": [65, 89], "finish": 65, "converison": 65, "pleas": [65, 67, 76, 82, 89, 98, 108, 110, 111, 113, 114], "max_batch_s": [65, 72, 110, 111, 113], "2048": [65, 72], "max_workspace_s": [65, 72], "33554432": [65, 72], "explicit_batch_dimens": [65, 72], "lower_precis": [65, 72], "lowerprecis": [65, 72], "verbose_log": [65, 72], "timing_cache_prefix": [65, 72], "save_timing_cach": [65, 72], "cuda_graph_batch_s": [65, 72], "dynamic_batch": [65, 72], "turn": [65, 72, 105], "trtmodul": [65, 72], "otherwis": [65, 66, 72, 97, 116], "implicit": [65, 70, 72, 82], "config": [65, 66, 72, 110, 111, 113], "updat": [65, 66, 67, 71, 72, 76, 95, 100], "dim": [65, 70, 72, 95, 97, 108, 109, 110, 111, 113, 114], "fx2trt_exampl": 65, "acc_trac": 65, "come": [65, 66, 81, 95, 99, 110, 111, 113], "my_pytorch_model": 65, "build_model": 65, "prepar": [65, 110, 111, 113], "acc_mod": 65, "earli": [65, 100], "deprec": [65, 70], "continu": [65, 82, 116], "backward": [65, 75, 95, 118], "vision": [65, 98, 110, 111, 113], "activ": [65, 75, 77, 82, 89, 91, 93, 112, 116, 119], "except": [65, 71, 76], "permut": [65, 70], "transpos": [65, 70, 114], "ll": [65, 97], "inputtensorspec": [65, 72, 76], "experiment": [65, 76, 77], "dataclass": [65, 101], "re": [65, 76, 82, 97, 99, 105, 116], "manual": [65, 76, 81, 82, 100, 109], "sampl": [65, 71, 82, 90, 91, 99, 100, 101, 102, 103, 104, 105, 106, 107, 110, 111, 113], "rand": [65, 89, 94, 97, 99, 100, 101, 110, 111, 113], "from_tensor": [65, 76], "slightli": [65, 66, 95], "promis": 65, "optimize_target_shap": 65, "input_tensor_spec": 65, "shape_rang": [65, 72], "100": [65, 72, 95, 97, 108, 109], "accordingli": [65, 80, 114, 116], "trtinterpreterresult": [65, 72], "namedtupl": 65, "input_nam": [65, 72], "output_nam": [65, 72], "serialized_cach": [65, 72], "bytearrai": [65, 75, 77], "afford": 65, "temporari": [65, 97], "best": [65, 71, 76, 82, 99, 109, 115], "perforamnc": 65, "examin": 65, "suitabl": [65, 93], "force_fp32_output": 65, "strict_type_constraint": 65, "usual": [65, 66, 80], "unless": 65, "certain": [65, 66, 101, 106, 107, 109, 116], "algorithm_selector": 65, "profiling_verbos": 65, "trt_interpreter_result": 65, "64": [65, 76, 90, 107, 108, 114], "25": [65, 72, 89], "runtimeerror": [65, 108], "xxx": 65, "One": [65, 82, 83, 89, 112, 116], "reload_trt_mod": 65, "reload_model_output": 65, "far": [65, 82], "give": [65, 80, 82], "convtert": 65, "scheme": [65, 71, 76], "action": [65, 82], "tensort": [65, 118], "thing": [65, 66, 82], "compar": [65, 71, 76, 90, 100], "vanilla": 65, "mainli": 65, "builtin": 65, "purpos": [65, 110, 111, 112, 113], "acc_op": 65, "leverag": [65, 91], "power": [65, 82, 89, 109, 112], "goe": [65, 82], "whole": 65, "sigmoid": [65, 70], "tensorrt_convert": 65, "acc_ops_sigmoid": 65, "rest": [65, 82, 83], "input_v": [65, 93], "receiv": 65, "region": 65, "add_activ": 65, "get_output": [65, 95], "wherev": 65, "rememb": [65, 66, 110, 111, 113], "mapper": 65, "todo": [65, 67, 80], "logist": 65, "down": [65, 66, 80, 107], "acc_norm": 65, "foo": [65, 82, 83], "register_acc_op": 65, "register_acc_op_map": 65, "this_arg_is_opt": 65, "op_and_target": 65, "arg_replacement_tupl": 65, "rule": [65, 66, 77], "third": [65, 83], "boolean": [65, 76, 93], "matter": [65, 95], "register_custom_acc_mapper_fn": 65, "design": [65, 74, 93, 99, 106, 109, 112, 119], "redund": 65, "throught": 65, "custom_mapp": 65, "_": [65, 82, 95, 108, 109, 115], "foo_kwarg": 65, "inserting_befor": 65, "foo_nod": 65, "meta": [65, 67, 86, 107, 109], "children": 65, "unit": [65, 76], "test_acc_trac": 65, "acc_op_convert": 65, "essenti": 65, "plugin": [65, 95], "yet": [65, 112], "folder": 65, "center": 66, "pypi": 66, "m": [66, 67, 83, 108], "pip": [66, 67, 98, 110, 111, 113], "upload": [66, 110, 111, 113], "x86": [66, 116], "extra": [66, 75, 89, 95, 99], "url": [66, 80, 110, 111, 113], "download": [66, 67, 86, 91, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 113], "whl": [66, 67], "cu118": 66, "cu124": 66, "tarbal": [66, 89, 91], "easiest": [66, 95], "bazelisk": [66, 67], "bazelbuild": [66, 67], "export": [66, 67, 69, 71, 76, 97, 98, 100, 102, 106, 107, 108, 109, 110, 111, 113, 115, 117, 118], "bazel_vers": 66, "path_to_torchtrt_root": 66, "bazelvers": 66, "mkdir": 66, "cd": [66, 110, 111, 113], "curl": [66, 82], "fssl": 66, "o": [66, 82, 110, 111, 113], "dist": 66, "unzip": 66, "bash": [66, 110, 111, 113], "sh": 66, "cp": [66, 67, 95], "usr": [66, 67], "driver": 66, "branch": [66, 67], "4e5b0f6e860910eb510fa70a76ee3eb9825e7a4d": 66, "l46": 66, "pull": [66, 97, 110, 111, 113], "latest": [66, 67, 80], "l53c1": 66, "fact": 66, "reproduc": 66, "l71": 66, "http_archiv": 66, "build_fil": 66, "archiv": [66, 67], "sha256": 66, "strip_prefix": 66, "OR": 66, "TO": [66, 89], "gnu": 66, "tar": [66, 67, 82, 91], "gz": [66, 82, 83, 91], "ld_library_path": 66, "comment": [66, 82], "uncom": 66, "l114c1": 66, "l124c3": 66, "uv": 66, "astral": 66, "project": [66, 81, 86], "simpler": [66, 91], "wheel": [66, 67], "dep": 66, "lighter": 66, "executor": 66, "avoid": [66, 93, 95, 100, 114], "implic": 66, "python_onli": 66, "legaci": [66, 74], "mainten": 66, "torchdynamo": [66, 114, 118], "technolog": [66, 118], "exclud": [66, 95], "speed": [66, 97, 100], "no_torchscript": 66, "dbg": 66, "pre_cxx11_abi": 66, "complic": 66, "incompat": 66, "popular": [66, 81, 98, 106, 107, 110, 112], "ngc": [66, 67, 110, 111, 113], "tabl": [66, 86], "bdist_wheel": 66, "preinstal": 66, "forum": 66, "correctli": [66, 95], "declar": 66, "intend": [66, 101, 102, 103, 104, 105], "microsoft": 66, "2022": [66, 69], "open": [66, 110, 111, 112, 113], "app": 66, "x64": 66, "prompt": [66, 99, 103, 106, 107], "admin": 66, "privileg": 66, "launcher": 66, "chocolatei": 66, "navig": [66, 80], "ninja": 66, "setuptool": 66, "r": [66, 67, 82, 98, 110], "txt": [66, 67, 98, 110], "distutils_use_sdk": 66, "cuda_win": 66, "libtorch_win": 66, "tensorrt_win": 66, "similarli": [66, 97, 105, 116], "ci_workspac": 66, "win": 66, "tmpl": [66, 67], "torchtrtc": [66, 69, 119], "websit": 66, "finder": 66, "dcmake_module_path": 66, "doesn": [66, 82, 88, 89], "dtorch_dir": 66, "dtensorrt_root": 66, "choic": [66, 74], "b": [66, 70, 76, 83, 109, 110, 111, 113], "dcmake_build_typ": 66, "72048": 66, "jp_workspac": [66, 67], "new_local_repositori": 66, "sudo": [66, 67], "home": 66, "unlik": [66, 92], "libtorch_pre_cxx11_abi": 66, "shift": [66, 70, 82], "jetpack": 66, "jetpack_x": 66, "jetpack_5": 66, "drop": [66, 80, 108], "nvida": 67, "ofjetpack": 67, "With": [67, 80, 82, 89, 91, 97, 110, 111, 113], "incorpor": [67, 83], "cudnn": 67, "9": [67, 86, 89, 94, 95, 110, 111, 113], "dlfw": 67, "09": 67, "jetson": [67, 112], "framework": 67, "instal": [67, 69, 86, 89, 98, 110, 111, 113, 116], "kit": 67, "flash": 67, "board": 67, "apt": 67, "show": [67, 80, 82, 97, 103, 109, 112], "dev": 67, "everth": 67, "nvcc": 67, "cmd": 67, "toolkit": [67, 74], "libcusparselt": 67, "lib64": 67, "wget": [67, 110, 111, 113], "cusparselt": 67, "redist": 67, "libcusparse_lt": 67, "sbsa": 67, "xz": 67, "xf": 67, "v1": [67, 99, 103], "arm64": 67, "mv": 67, "chmod": 67, "pypa": 67, "en": [67, 80], "bootstrap": 67, "jp": 67, "v61": 67, "0a0": 67, "872d972e41": 67, "nv24": 67, "08": [67, 110, 111, 113], "17622132": 67, "cp310": 67, "linux_aarch64": 67, "test_requir": 67, "jetpack6": 67, "lanl": 67, "cuda_vers": 67, "grep": 67, "cut": [67, 82, 100], "sed": [67, 83, 85], "torch_install_path": 67, "dirnam": 67, "__file__": 67, "site_package_path": 67, "cuda_hom": 67, "envsubst": 67, "cxx11": [67, 116], "abi": [67, 116], "anywher": 68, "ahead": [68, 69, 89, 99], "ep": [68, 70, 94, 100, 115, 117], "output_format": [68, 76, 117], "input_tensor": [68, 95, 108, 109], "fill": 68, "aot": [69, 89, 98, 99, 100, 110, 118], "integr": [69, 99, 101], "seamlessli": [69, 76], "ecosystem": [69, 118], "hybrid": [69, 71, 76, 77, 118], "advanc": [69, 78, 83, 91, 98, 110], "bert": [69, 78, 98, 110], "triton": [69, 95], "cudagraph": [69, 98, 110], "overload": [69, 98, 110], "mutabl": [69, 98, 110], "diffus": [69, 78, 98, 110], "gpt2": [69, 98, 110], "llama2": [69, 98, 110], "page": [69, 84, 86, 110, 111, 113], "introductori": 69, "blog": [69, 116], "gtc": 69, "2020": [69, 89], "talk": 69, "fall": [69, 76, 95], "2021": 69, "dai": 69, "confer": 69, "_convolut": [70, 89], "stride": [70, 76, 95, 108], "pad": [70, 76, 95, 108], "dilat": 70, "output_pad": 70, "group": [70, 82, 83], "determinist": 70, "cudnn_en": 70, "allow_tf32": 70, "ab": 70, "aco": 70, "acosh": 70, "adaptive_avg_pool1d": 70, "output_s": 70, "adaptive_avg_pool2d": 70, "adaptive_avg_pool3d": 70, "adaptive_max_pool1d": 70, "adaptive_max_pool2d": 70, "adaptive_max_pool3d": 70, "argmax": [70, 109], "keepdim": 70, "argmin": 70, "asin": 70, "asinh": 70, "atan": 70, "atanh": 70, "avg_pool1d": 70, "kernel_s": [70, 95, 108], "ceil_mod": 70, "count_include_pad": 70, "avg_pool2d": 70, "divisor_overrid": 70, "avg_pool3d": 70, "gamma": 70, "var": 70, "momentum": 70, "bitwise_not": 70, "bmm": 70, "ceil": 70, "clamp": 70, "clamp_max": 70, "clamp_min": 70, "constant_pad_nd": 70, "co": [70, 83, 112], "cosh": 70, "cumsum": 70, "tensor_mod": 70, "rounding_mod": 70, "div_": 70, "elu": 70, "scale": [70, 91, 112], "input_scal": 70, "indic": [70, 71, 80, 82, 93, 100, 102, 114, 115], "padding_idx": 70, "eq": [70, 82], "erf": [70, 93], "exp": 70, "expand_a": 70, "fake_quantize_per_channel_affin": 70, "zero_point": 70, "axi": [70, 76], "quant_min": 70, "quant_max": 70, "fake_quantize_per_tensor_affin": 70, "using_int": [70, 89], "start_dim": [70, 89], "end_dim": [70, 89], "floor": 70, "floor_divid": 70, "ge": 70, "gru_cel": 70, "hx": 70, "w_ih": 70, "w_hh": 70, "b_ih": 70, "b_hh": 70, "gt": 70, "hardtanh_": 70, "instance_norm": 70, "running_mean": 70, "running_var": 70, "use_input_stat": 70, "layer_norm": 70, "normalized_shap": 70, "le": 70, "negative_slop": 70, "01": [70, 83, 89, 108], "leaky_relu_": 70, "lstm_cell": 70, "lt": 70, "masked_fil": 70, "mask": [70, 95], "max_pool1d": 70, "max_pool2d": [70, 88, 89], "max_pool3d": 70, "mul_": [70, 93], "narrow": 70, "neg": [70, 99], "norm": 70, "scalaropt_dim": 70, "pixel_shuffl": 70, "upscale_factor": 70, "pow": 70, "tensor_scalar": 70, "expon": 70, "tensor_tensor": 70, "prelu": 70, "prod": [70, 95], "dim_int": 70, "reciproc": 70, "reflection_pad1d": 70, "reflection_pad2d": 70, "relu_": 70, "repeat_interleav": 70, "self_int": 70, "replication_pad1d": 70, "replication_pad2d": 70, "replication_pad3d": 70, "reshap": [70, 95], "roll": 70, "rsub": 70, "scatter": 70, "sigmoid_": 70, "sin": [70, 82], "sinh": 70, "slice": 70, "split_siz": 70, "split_with_s": 70, "sqrt": 70, "squar": 70, "squeez": [70, 112], "sub_": 70, "dim_intlist": 70, "tan": 70, "tanh": [70, 93], "tanh_": [70, 93], "non_block": [70, 108], "memory_format": [70, 76], "prim_devic": 70, "topk": 70, "k": [70, 91, 108], "largest": 70, "dim0": [70, 97], "dim1": 70, "unbind": 70, "unsqueez": [70, 110, 111, 113], "upsample_bilinear2d": 70, "align_corn": 70, "scales_h": 70, "scales_w": 70, "vec": 70, "scale_factor": 70, "upsample_linear1d": 70, "upsample_nearest1d": 70, "upsample_nearest2d": 70, "upsample_nearest3d": 70, "scales_d": 70, "upsample_trilinear3d": 70, "view": [70, 80], "__and__": 70, "__derive_index": 70, "idx": 70, "__getitem__": 70, "__is__": 70, "t1": 70, "t2": 70, "obj": 70, "__isnot__": 70, "__not__": 70, "__or__": 70, "__range_length": 70, "lo": 70, "hi": [70, 82, 83], "__round_to_zero_floordiv": 70, "__xor__": 70, "append": [70, 93, 96, 97, 108, 109], "el": 70, "arang": [70, 95], "pin_memori": 70, "start_step": 70, "copy_": 70, "float_int": 70, "int_float": 70, "floordiv": 70, "is_floating_point": 70, "numel": 70, "l": [70, 108], "9223372036854775807": 70, "requires_grad": 70, "tupleindex": 70, "tup": 70, "exported_program": [71, 76, 117], "arg_input": [71, 76, 93, 100], "kwarg_input": [71, 76, 100], "engine_cache_dir": [71, 96, 97], "engine_cache_s": [71, 96, 97], "5368709120": 71, "custom_engine_cach": [71, 97], "baseenginecach": [71, 97], "int32": [71, 76, 77, 95, 96, 104, 112], "channel_last": [71, 76, 77, 112], "244": [71, 76, 77], "alia": [71, 76], "better": [71, 76, 88, 112, 118], "understand": [71, 76, 114], "convolut": [71, 76, 77, 91, 95, 119], "_c": [71, 76, 77, 92], "oppos": [71, 76, 77], "lean": [71, 76], "spend": [71, 76], "integ": [71, 76, 85], "faster": [71, 76, 96, 97, 112], "parition": [71, 76], "increas": [71, 76, 97, 109], "amount": [71, 76, 109], "defer": [71, 76, 118], "lead": [71, 76, 82, 109, 116], "oversubscript": [71, 76], "hard": [71, 100], "disk": [71, 76, 97], "space": [71, 82, 83, 91], "byte": [71, 75, 76, 77, 95, 97, 109, 112], "1gb": [71, 96, 97], "exce": 71, "oldest": 71, "gear": [71, 91], "toward": [71, 91], "cross_compile_flag": 71, "cross_compil": 71, "refit_module_weight": [71, 100], "compiled_modul": [71, 100], "new_weight_modul": [71, 100], "verify_output": [71, 100], "use_weight_map_cach": [71, 100], "in_plac": [71, 100], "compmil": 71, "coverag": [71, 95], "min_acc_module_s": 72, "is_aten": 72, "use_experimental_fx_rt": 72, "correctness_atol": 72, "correctness_rtol": 72, "minim": [72, 91, 95], "submodul": [72, 88, 95], "fx2trt": 72, "cpu": [72, 106, 107, 109], "has_batch_dim": 72, "dtyep": 72, "prop": 72, "min_input_shap": 72, "optimized_input_shap": 72, "max_input_shap": 72, "popul": 72, "225": [72, 110, 111, 113], "explicit_precis": 72, "logger_level": 72, "model_trt": 73, "model_torchtrt": 73, "internal_error": 73, "dataloadercalibr": [74, 91], "preprocess": [74, 91, 110, 111, 113], "algo_typ": [74, 91], "calibrationalgo": [74, 91], "cachecalibr": [74, 91], "qualnam": [74, 76], "entropy_calibr": 74, "entropy_calibration_2": [74, 91], "legacy_calibr": 74, "minmax_calibr": 74, "set_multi_device_safe_mod": [75, 116], "_multidevicesafemodecontextmanag": 75, "impact": 75, "suppress": 75, "unsaf": 75, "trt_compiled_modul": 75, "torchtensorrtmodul": [75, 95], "encompass": [75, 77], "simpili": 75, "de": 75, "initi": [75, 76, 82, 100, 101, 102, 104, 105, 106, 107], "scriptmodul": [75, 76, 77, 89, 90, 117, 118], "overridden": [75, 76], "subclass": 75, "although": [75, 82], "recip": [75, 91], "afterward": 75, "former": 75, "care": 75, "hook": 75, "silent": 75, "get_extra_st": 75, "state_dict": [75, 76, 99], "set_extra_st": 75, "picklabl": 75, "pickl": [75, 95, 97], "load_state_dict": [75, 99, 108], "pythontorchtensorrtmodul": 75, "serialized_engin": [75, 77], "_set": [75, 101], "weight_name_map": 75, "trt_modul": 75, "engine_str": 75, "my_modul": 75, "current_devic": 75, "cudagraphs_validate_shap": 75, "versu": 75, "disable_profil": 75, "enable_profil": 75, "iprofil": 75, "spent": 75, "get_layer_info": 75, "request": [76, 89, 110, 111, 113], "decid": 76, "deseri": [76, 77, 89, 95], "retrac": 76, "strict": [76, 116], "valueerror": [76, 94], "mutabletorchtensorrtmodul": [76, 99], "pytorch_model": 76, "regular": 76, "whenev": 76, "refit_gm": 76, "shape_mod": 76, "_shapemod": 76, "interv": 76, "notat": 76, "bound": 76, "torch_tensor": 76, "tracer": 76, "example_tensor": 76, "optimization_profile_field": 76, "classmethod": 76, "disable_memory_format_check": 76, "core_id": 76, "schedul": [76, 110, 111, 113], "use_default": 76, "try_to": 76, "anoth": [76, 82, 83, 88, 90, 100], "typeerror": 76, "unknown": 76, "succe": 76, "float_dtyp": 76, "failur": 76, "bf16": 76, "try_from": [76, 95], "complex128": 76, "16": [76, 86, 88, 89, 90, 102, 105], "brain": 76, "bfloat16": 76, "f64": 76, "f8": 76, "fp8": 76, "float8": 76, "i32": 76, "sign": [76, 110, 111, 113], "i64": 76, "u8": 76, "unsign": 76, "uint8": 76, "trt_dla": 76, "torchtrt_dla": 76, "_from": 76, "torchtrt_dla_ec": 76, "torchtrt_safety_ec": 76, "saefti": 76, "trt_dla_ec": 76, "standalon": [76, 82], "certifi": 76, "tf": 76, "torchtrt_linear": 76, "cdhw32": 76, "thirti": 76, "row": [76, 83], "spatial": 76, "31": [76, 89], "subscript": [76, 82], "chw16": 76, "sixteen": 76, "15": [76, 82, 86], "chw2": 76, "chw32": 76, "chw4": 76, "four": [76, 82, 83], "dhwc": 76, "equivi": 76, "channels_last_3d": 76, "dhwc8": 76, "eight": 76, "dla_hwc4": 76, "imag": [76, 91, 95, 99, 103, 108, 110, 111, 113], "roundup": 76, "elements": 76, "dla_linear": 76, "planar": 76, "hwc": 76, "channels_last": 76, "hwc16": 76, "hwc8": 76, "least": [76, 82, 83], "ishapelay": 77, "check_method_op_support": 77, "seriali": 77, "put_binding_nam": 77, "tensorrtcompilespec": [77, 92], "scriptclass": 77, "0x7fe4a04e4030": 77, "_jit_to_tensorrt": 77, "00": 78, "000": [78, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "total": [78, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "galleri": [78, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "mem": 78, "torch_compile_advanced_usag": [78, 101], "torch_compile_resnet_exampl": [78, 102], "torch_compile_stable_diffus": [78, 103], "torch_compile_transformers_exampl": [78, 104], "v0": [79, 110, 111, 113], "pytorch_sphinx_them": [80, 87], "conf": [80, 87], "html_theme_opt": 80, "canonical_url": 80, "analytics_id": 80, "logo_onli": 80, "display_vers": 80, "prev_next_buttons_loc": 80, "bottom": 80, "style_external_link": 80, "vcs_pageview_mod": 80, "collapse_navig": 80, "sticky_navig": [80, 84], "navigation_depth": 80, "includehidden": 80, "titles_onli": 80, "canon": 80, "rank": 80, "trail": 80, "slash": 80, "googl": 80, "analyt": 80, "isn": [80, 82, 95], "shown": [80, 82, 89, 115], "sidebar": [80, 86], "button": [80, 82], "icon": [80, 82], "extern": [80, 82, 98, 110], "display_github": 80, "display_gitlab": 80, "gitlab": 80, "bitbucket": 80, "bar": [80, 82], "www": [80, 82, 89, 91, 110, 111, 113], "sphinx": [80, 81, 82, 83, 87, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "toctre": 80, "lose": 80, "scroll": [80, 84], "unlimit": 80, "header": [80, 82, 83, 89, 110, 111, 113], "render": 80, "github_url": 80, "bitbucket_url": 80, "gitlab_url": 80, "left": [80, 82], "upon": [80, 101, 104], "rst": [80, 82], "visitor": 80, "revert": 80, "misbuild": 80, "properti": [80, 95], "stick": 80, "screen": 80, "vertic": [80, 82], "too": [80, 82, 83], "sticki": [80, 86], "nav": [80, 86], "At": [81, 93, 100], "django": 81, "payment": 81, "dotpai": 81, "dotpayprovid": 81, "seller_id": 81, "pin": 81, "lock": 81, "lang": 81, "pl": 81, "polish": 81, "gatewai": 81, "transfer": 81, "purchas": 81, "item": [81, 83, 108], "param": 81, "seller": 81, "consult": 81, "ui": 81, "languag": [81, 82, 83, 88, 95, 98, 106, 110, 111, 113], "data_item_1": 81, "emphasi": 82, "hyperlink": 82, "uri": 82, "web": 82, "anonym": 82, "label": [82, 91, 108, 110, 111, 112, 113], "substitut": 82, "charact": 82, "exceedingli": 82, "ugli": 82, "problem": [82, 107], "problemat": 82, "ext": [82, 83], "autodoc": [82, 83], "demo": [82, 91], "test_py_modul": [82, 86], "my": [82, 106], "role": 82, "pep": 82, "287": 82, "rfc": 82, "2822": 82, "superscript": 82, "gui": 82, "taken": 82, "height": 82, "interfer": 82, "press": 82, "keyboard": 82, "mous": 82, "mmb": 82, "menuselect": 82, "seen": [82, 83], "whitespac": 82, "signific": [82, 95], "strang": 82, "hyphen": 82, "word": [82, 112], "adjust": 82, "width": [82, 112], "browser": 82, "sentenc": [82, 109, 112], "suppli": [82, 100], "258": 82, "equat": 82, "x_": 82, "x_0": 82, "x_1": 82, "x_2": 82, "x_3": 82, "x_4": 82, "nabla": 82, "frac": 82, "theta": 82, "phi": 82, "restructuredtext": [82, 83], "parser": [82, 94, 108], "colon": 82, "indent": 82, "literal_block": 82, "spaces_and_linebreak": 82, "preserv": [82, 88, 91], "markup_process": 82, "Or": 82, "great": [82, 89, 95, 97, 118], "why": [82, 116], "didn": 82, "blank": 82, "align": 82, "permit": 82, "awai": 82, "eric": 82, "orchestra": 82, "leader": 82, "bee": 82, "philosoph": 82, "ipso": 82, "facto": 82, "But": [82, 89, 100, 109], "got": [82, 89], "vi": 82, "entiti": 82, "said": 82, "entir": [82, 118], "ancient": 82, "injuri": 82, "sing": 82, "elk": 82, "bracket": 82, "miss": [82, 89], "brontosaurus": 82, "thin": 82, "thicker": 82, "middl": 82, "That": [82, 89], "mine": 82, "belong": 82, "me": [82, 83], "ann": 82, "begun": 82, "past": 82, "pars": [82, 89], "someurl": 82, "dev0": 82, "38b1804": 82, "caption": [82, 85], "pane": 82, "shell_command": 82, "echo": 82, "did": 82, "window_nam": 82, "session_nam": 82, "shorthand": 82, "some_funct": 82, "highlight": 82, "THE": 82, "heaven": 82, "hexagram": 82, "six": 82, "unbroken": 82, "primal": 82, "light": [82, 117], "spirit": 82, "weak": 82, "essenc": 82, "energi": 82, "unrestrict": 82, "conceiv": 82, "motion": 82, "regard": [82, 118], "basi": 82, "thu": 82, "persist": 82, "dual": 82, "sens": [82, 89], "univers": 82, "world": 82, "men": 82, "express": 82, "deiti": 82, "human": 82, "denot": [82, 95], "holi": 82, "man": [82, 83], "sage": 82, "ruler": 82, "who": 82, "awaken": 82, "utf": [82, 83], "sphinx_rtd_them": [82, 83], "docstr": [82, 83, 90], "dl": 82, "dt": 82, "tag": [82, 110, 111, 113], "tt": 82, "descnam": 82, "descclassnam": 82, "wrote": 82, "anyth": [82, 83, 116], "programm": 82, "myclass": 82, "dothismethod": 82, "flush": 82, "meth": 82, "capit": 82, "flox": 82, "unreferenc": 82, "nonexist": 82, "extrem": 82, "stuff": 82, "mayb": 82, "bold": 82, "ital": 82, "heck": 82, "put": [82, 112], "13": [82, 86], "backlink": 82, "knowledg": 82, "mind": 82, "ey": 82, "thought": 82, "medium": 82, "peopl": 82, "subsect": 82, "interpol": 82, "indirect": 82, "phrase": 82, "docutil": [82, 83], "sourceforg": [82, 83], "ref": 82, "clickabl": 82, "legend": 82, "revis": [82, 83, 99, 103], "revisit": 82, "enhanc": 82, "structuredtext": 82, "wooden": 82, "nickel": 82, "mad": 82, "scientist": 82, "bigger": 82, "bread": 82, "box": [82, 114, 118], "wash": 82, "behind": 82, "ear": 82, "room": 82, "closet": 82, "bathroom": 82, "trash": 82, "sink": 82, "mother": 82, "g_": 82, "mu": 82, "nu": 82, "pi": 82, "t_": 82, "rho_": 82, "servic": 82, "thing1": 82, "thing2": 82, "thing3": 82, "prose": 82, "provok": 82, "mental": 82, "exert": 82, "reader": 82, "discret": 82, "strongli": [82, 109], "advis": 82, "subtitl": 82, "outsid": 82, "often": 82, "besid": 82, "border": 82, "background": [82, 88], "ok": [82, 89], "transmit": 82, "disconnect": 82, "nonetheless": 82, "semant": 82, "blue": [82, 95], "white": 82, "arab": 83, "roman": 83, "upper": 83, "iii": 83, "iv": 83, "classifi": [83, 88, 89, 108, 112], "paragraph": [83, 86], "z": 83, "commonli": 83, "vm": 83, "david": 83, "goodger": 83, "address": [83, 95, 99], "123": 83, "street": 83, "canada": 83, "a1b": 83, "2c3": 83, "contact": 83, "myself": 83, "organ": 83, "humankind": 83, "2012": 83, "03": 83, "19": [83, 86], "53": 83, "0000": 83, "tue": 83, "jan": 83, "progress": 83, "7302": 83, "wish": 83, "redistribut": 83, "reattribut": 83, "sell": 83, "bui": 83, "rent": 83, "leas": 83, "improv": [83, 116], "quot": 83, "excerpt": 83, "collat": 83, "fold": 83, "stapl": 83, "mutil": 83, "anyon": 83, "heart": 83, "bibliograph": 83, "markup": [83, 86], "literal": 83, "yahoo": 83, "oh": 83, "liter": 83, "heh": 83, "child": 83, "beat": 83, "text": [83, 85, 106, 107, 112], "hehe": 83, "kept": 83, "sai": [83, 112], "cackl": 83, "night": 83, "lone": 83, "guangzhou": 83, "destini": 83, "hope": 83, "dream": 83, "forth": 83, "fifth": 83, "sixth": 83, "lorem": [83, 85], "ipsum": [83, 85], "dolor": [83, 85], "sit": [83, 85], "amet": [83, 85], "consectetur": [83, 85], "adipisc": [83, 85], "elit": [83, 85], "donec": [83, 85], "porttitor": [83, 85], "odio": [83, 85], "posuer": [83, 85], "vita": [83, 85], "ornar": [83, 85], "libero": [83, 85], "matti": 83, "loborti": [83, 85], "justo": [83, 85], "vestibulum": [83, 85], "nibh": [83, 85], "aliquet": [83, 85], "feugiat": [83, 85], "sagitti": [83, 85], "nequ": [83, 85], "qui": [83, 85], "eleifend": 83, "dui": [83, 85], "rutrum": [83, 85], "lectu": [83, 85], "suscipit": [83, 85], "letter": [83, 112], "column": 83, "cell": 83, "span": 83, "nam": [83, 85], "mauri": [83, 85], "arcu": [83, 85], "stub": 83, "behav": 84, "area": 84, "interdum": 85, "nec": 85, "finibu": 85, "dictum": 85, "velit": 85, "ut": 85, "eu": 85, "efficitur": 85, "aliquam": 85, "erat": 85, "diam": 85, "gravida": 85, "imperdiet": 85, "tellu": 85, "nisl": 85, "praesent": 85, "eget": 85, "elementum": 85, "rhoncu": 85, "tincidunt": 85, "suspendiss": 85, "volutpat": 85, "scelerisqu": 85, "tristiqu": 85, "aenean": 85, "condimentum": 85, "risu": 85, "accumsan": 85, "laoreet": 85, "maximu": 85, "sapien": 85, "ligula": 85, "fringilla": 85, "commodo": 85, "proin": 85, "et": 85, "pharetra": 85, "etiam": 85, "turpi": 85, "ant": 85, "luctu": 85, "vel": 85, "malesuada": 85, "dignissim": 85, "mi": 85, "nunc": 85, "augu": 85, "sem": 85, "cursu": 85, "nulla": 85, "pellentesqu": 85, "habit": 85, "morbi": 85, "senectu": 85, "netu": 85, "fame": 85, "ac": 85, "egesta": 85, "placerat": 85, "tortor": 85, "iaculi": 85, "venenati": 85, "cra": 85, "puru": 85, "ero": 85, "vehicula": 85, "fusc": 85, "auctor": 85, "phasellu": 85, "est": 85, "viverra": 85, "conval": 85, "faucibu": 85, "vulput": 85, "feli": 85, "sodal": 85, "maecena": 85, "congu": 85, "semper": 85, "enim": 85, "blandit": 85, "sollicitudin": 85, "urna": 85, "orci": 85, "lacu": 85, "quisqu": 85, "facilisi": 85, "hendrerit": 85, "curabitur": 85, "variu": 85, "bibendum": 85, "massa": 85, "magna": 85, "tempu": 85, "metu": 85, "nisi": 85, "pretium": 85, "leo": 85, "euismod": 85, "ultric": 85, "dapibu": 85, "lacinia": 85, "vivamu": 85, "molesti": 85, "hac": 85, "habitass": 85, "platea": 85, "dictumst": 85, "git": 86, "content": [86, 91, 110, 111, 113], "changelog": 86, "math": 86, "14": [86, 96, 104, 110, 111, 113], "17": 86, "18": [86, 89, 99], "submenu": 86, "symlink": 87, "subtre": 87, "_theme": 87, "html_theme": 87, "html_theme_path": 87, "optimiz": 88, "tutori": [88, 91, 93, 95, 97, 99, 100, 111, 113], "beginn": 88, "intro_to_torchscript_tutori": 88, "briefli": 88, "lenet": [88, 89], "lenetfeatextractor": 88, "conv1": [88, 89], "conv2d": [88, 95, 108], "conv2": [88, 89], "lenetclassifi": 88, "fc1": [88, 89], "120": [88, 89], "fc2": [88, 89], "84": [88, 89], "fc3": [88, 89], "feat": [88, 89], "obvious": 88, "pathwai": 88, "input_data": [88, 90], "traced_model": 88, "pick": [88, 115], "script_model": [88, 92], "perspect": 88, "___torch_mangle_10": 88, "129": 88, "___torch_mangle_9": 88, "119": 88, "___torch_mangle_5": 88, "137": 88, "callmethod": 88, "138": 88, "38": 88, "39": 88, "torch_script_modul": [88, 89], "in_tensor": 88, "fly": 88, "lenet_script": [88, 89], "haven": 89, "acquir": 89, "dyanmo": 89, "almost": [89, 118], "trt_lenet_script": 89, "apr": 89, "56": 89, "04": 89, "credit": 89, "stop": 89, "argc": 89, "argv": 89, "cerr": 89, "cout": 89, "even": [89, 99], "cppdoc": 89, "pretti": 89, "fashion": [89, 112], "enable_precis": 89, "And": 89, "convertgraphtotrtengin": 89, "engine_converted_from_jit": 89, "close": [89, 93], "saw": 89, "576": 89, "346": 89, "539": 89, "0464": 89, "0383": 89, "0678": 89, "0932": 89, "1045": 89, "0805": 89, "0435": 89, "0818": 89, "0208": 89, "0358": 89, "cudafloattyp": 89, "0530": 89, "1691": 89, "2802": 89, "1502": 89, "1056": 89, "1549": 89, "input0": [89, 90], "1063": 89, "input1": [89, 90], "input2": 89, "28": 89, "29": 89, "33": 89, "35": 89, "36": 89, "37": 89, "compilegraph": [89, 91], "transform": [89, 91, 96, 98, 100, 104, 106, 107, 108, 109, 110, 111, 113, 117], "laid": 89, "translat": [89, 100], "aren": 89, "techniqu": [89, 91, 107, 116], "checkmethodoperatorsupport": 89, "modular": 89, "ship": [89, 116], "exhaust": 89, "109": 89, "addlay": 89, "yourself": 89, "question": [89, 93], "outself": 89, "flatten_convert": 89, "unwraptoint": 89, "in_shap": 89, "tovec": 89, "out_shap": 89, "shuffl": [89, 91, 108], "addshuffl": 89, "setreshapedimens": 89, "todim": 89, "extens": [89, 118], "ctype": 89, "cdll": 89, "contributor": 89, "upstream": 89, "pr": 89, "usecas": 90, "sole": [90, 91, 118], "individu": 90, "accuraci": [91, 112], "loss": [91, 112], "infrastructur": [91, 110, 111, 113], "streamlin": 91, "expos": [91, 95], "cpp_frontend": 91, "loading_data_recip": 91, "cifar10": [91, 108], "cstddef": 91, "ktrain": 91, "ktest": 91, "un": 91, "cs": 91, "toronto": 91, "edu": 91, "kriz": 91, "cifar": 91, "is_train": 91, "trim": 91, "use_subset": 91, "new_siz": 91, "mode_": 91, "images_": 91, "targets_": 91, "calibration_dataset": 91, "data_dir": 91, "320": 91, "4914": [91, 108], "4822": [91, 108], "4465": [91, 108], "2023": [91, 108], "1994": [91, 108], "2010": [91, 108], "dataloaderopt": 91, "worker": 91, "virtual": 91, "input_shap": [91, 119], "compile_spec": [91, 94, 102, 119], "kf16": [91, 119], "ki8": 91, "vgg16": [91, 108], "testing_dataset": [91, 108], "totensor": [91, 108, 110, 111, 113], "testing_dataload": [91, 108], "num_work": [91, 108], "vgg": [91, 108], "test_ptq_dataloader_calibr": 91, "test_ptq_trt_calibr": 91, "krizhevski": 91, "hinton": 91, "2009": 91, "tini": 91, "simonyan": 91, "zisserman": 91, "2014": 91, "recognit": [91, 112], "arxiv": 91, "preprint": 91, "1409": 91, "1556": 91, "_jit_to_backend": 92, "mobilenet_v2": 92, "pretrain": [92, 97, 99, 102, 105, 110, 111, 112, 113], "gelu": 93, "sy": 93, "approxim": 93, "suppos": 93, "my_mod": 93, "ex_input": [93, 95], "baselin": 93, "my_standard_gelu": 93, "supports_dynamic_shap": 93, "supersed": 93, "converterprior": 93, "vers": 93, "prior": [93, 97, 114, 116], "distinct": 93, "prepend": 93, "candid": 93, "primit": 93, "compiler_ir": 93, "boilerpl": 93, "focu": [93, 99], "interoper": 93, "aten_ops_gelu": 93, "sourceir": 93, "cheap": 93, "unqiu": 93, "op_count": 93, "get_op_count": 93, "nonloc": 93, "elementwis": 93, "source_ir": 93, "lhs_val": 93, "rhs_val": 93, "x_7": 93, "x_8": 93, "79788456080000003": 93, "x_9": 93, "044714999999999998": 93, "x_10": 93, "x_11": 93, "x_12": 93, "x_13": 93, "x_14": 93, "x_15": 93, "my_custom_gelu": 93, "allclos": [93, 99, 100], "my_mod_erf": 93, "my_gelu_erf": 93, "notic": 93, "minut": [93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "converter_overload": 93, "jupyt": [93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], "ipynb": [93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "geforcertx": 94, "4080": 94, "3080": 94, "cross_runtime_compilation_for_window": 94, "trt_resnet": 94, "argpars": [94, 108], "argumentpars": [94, 108], "comil": 94, "add_argu": [94, 108], "parse_arg": [94, 108], "manual_se": [94, 96, 97, 99, 100], "resnet18": [94, 97, 99, 100, 102, 105], "amd64": 94, "loaded_model": 94, "load_cross_compiled_exported_program": 94, "trt_output": 94, "cross_compile_for_window": 94, "cost": [95, 97, 100, 116], "perhap": 95, "overhead": [95, 109, 116], "sake": 95, "circular": 95, "red": 95, "green": 95, "twice": 95, "written": 95, "openai": 95, "formal": 95, "tl": 95, "custom_op": 95, "circ_pad_kernel": 95, "all_pads_0": 95, "all_pads_2": 95, "all_pads_4": 95, "all_pads_6": 95, "orig_dims_0": 95, "orig_dims_1": 95, "orig_dims_2": 95, "orig_dims_3": 95, "y_shape_1": 95, "y_shape_2": 95, "y_shape_3": 95, "x_len": 95, "y_len": 95, "block_siz": 95, "pid": 95, "program_id": 95, "mask_i": 95, "i3": 95, "i2": 95, "i1": 95, "i0": 95, "j0": 95, "j1": 95, "j2": 95, "j3": 95, "load_idx": 95, "mask_x": 95, "launch": [95, 110, 111, 113], "torchtrt_ex": 95, "triton_circular_pad": 95, "mutates_arg": 95, "out_dim": 95, "tolist": 95, "all_pad": 95, "zero": 95, "orig_dim": 95, "blocksiz": 95, "256": [95, 108, 109, 110, 111, 113], "numblock": 95, "tracabl": 95, "prerequisit": 95, "fake": 95, "real": 95, "faketensor": 95, "register_fak": 95, "autograd": 95, "beyond": 95, "register_autograd": 95, "padded_x": 95, "my_model": 95, "2604": 95, "4232": 95, "3041": 95, "0833": 95, "2461": 95, "1270": 95, "2450": 95, "4079": 95, "2887": 95, "2828": 95, "0373": 95, "0332": 95, "3143": 95, "6344": 95, "5638": 95, "1867": 95, "5068": 95, "4363": 95, "7937": 95, "3488": 95, "1350": 95, "7966": 95, "3517": 95, "1379": 95, "5537": 95, "1088": 95, "8950": 95, "0550": 95, "6163": 95, "0109": 95, "5245": 95, "9632": 95, "5686": 95, "3775": 95, "8162": 95, "4216": 95, "4311": 95, "1649": 95, "2091": 95, "3668": 95, "1006": 95, "1447": 95, "0352": 95, "7689": 95, "8131": 95, "_run_on_gpu_0": 95, "_run_on_acc_1": 95, "dry": 95, "50": [95, 112], "count": 95, "__": 95, "aggreg": 95, "stat": 95, "latenc": [95, 109, 116], "abstractli": 95, "pkl": [95, 99], "cupi": 95, "gap": 95, "prealloc": 95, "circularpaddingplugin": 95, "ipluginv2dynamicext": 95, "field_collect": 95, "pluginfieldcollect": 95, "x_shape": 95, "num_output": 95, "plugin_namespac": 95, "plugin_typ": 95, "plugin_vers": 95, "assert": [95, 99, 100], "get_output_datatyp": 95, "input_typ": 95, "get_output_dimens": 95, "output_index": 95, "dimsexpr": 95, "exprbuild": 95, "iexprbuild": 95, "output_dim": 95, "dimensionoper": 95, "configure_plugin": 95, "inp": 95, "dynamicplugintensordesc": 95, "x_dim": 95, "desc": 95, "supports_format_combin": 95, "po": 95, "in_out": 95, "plugintensordesc": 95, "num_input": 95, "enqueu": 95, "input_desc": 95, "output_desc": 95, "in_dtyp": 95, "a_mem": 95, "unownedmemori": 95, "items": 95, "c_mem": 95, "a_ptr": 95, "memorypoint": 95, "c_ptr": 95, "a_d": 95, "memptr": 95, "c_d": 95, "a_t": 95, "as_tensor": 95, "c_t": 95, "cloned_plugin": 95, "__dict__": 95, "circularpaddingplugincr": 95, "iplugincr": 95, "field_nam": 95, "pluginfield": 95, "pluginfieldtyp": 95, "create_plugin": 95, "pluginfieldcollection_": 95, "deserialize_plugin": 95, "pads_dict": 95, "creator": 95, "trt_plugin_registri": 95, "get_plugin_registri": 95, "register_cr": 95, "untyp": 95, "get_trt_tensor": 95, "set_layer_nam": 95, "recal": 95, "intlist": 95, "circular_padding_convert": 95, "retriev": 95, "elsewher": 95, "plugin_registri": 95, "plugin_cr": 95, "get_plugin_cr": 95, "field_config": 95, "eventu": 95, "freez": 95, "_input": 95, "add_plugin_v2": 95, "circular_padding_plugin": 95, "_run_on_acc_0": 95, "grad_fn": 95, "subbackward0": 95, "custom_kernel_plugin": 95, "engine_caching_exampl": [96, 97], "remove_timing_cach": [96, 97], "bertmodel": [96, 104], "random": [96, 97, 99, 100, 109], "seed": [96, 97, 99, 100], "from_pretrain": [96, 99, 103, 104, 106, 107, 109], "uncas": [96, 104, 112], "return_dict": 96, "randint": [96, 104, 109], "compile_bert": 96, "enable_tim": [96, 97], "1st": [96, 97], "measur": [96, 97, 109], "2nd": [96, 97], "3rd": [96, 97], "slower": [96, 97], "messur": [96, 97], "compilation_kwarg": [96, 104], "torch_trt_bert_engine_cach": 96, "30": [96, 97, 99, 100, 102, 104, 115], "synchron": [96, 97, 109], "elapsed_tim": [96, 97], "millisecond": 96, "__name__": [96, 101, 104], "__main__": [96, 101, 104], "engine_caching_bert_exampl": 96, "paid": 97, "upfront": 97, "invalid": 97, "repeatedli": 97, "mitig": 97, "explor": 97, "torch_trt": [97, 99, 100], "_default": 97, "_engine_cach": 97, "flexibl": [97, 118], "histor": 97, "barrier": 97, "reconstruct": 97, "ti": 97, "hash": 97, "magnitud": 97, "torch_compil": [97, 101, 102, 104, 105, 114, 118], "compiled_model": 97, "ms": [97, 109], "dynamo_compil": 97, "example_input": 97, "200": 97, "dynamic_shap": [97, 114], "remot": 97, "systen": 97, "agnost": 97, "implent": 97, "ramenginecach": 97, "held": 97, "engine_cach": 97, "torch_compile_my_cach": 97, "_torch_export_gpt2": [98, 110], "_torch_export_llama2": [98, 110], "sphx_glr_tutorials__rendered_examples_dynamo_cross_runtime_compilation_for_window": [98, 110], "straightforward": 99, "especi": 99, "hug": [99, 106, 107], "face": [99, 106, 107], "difficult": 99, "ever": 99, "walk": [99, 100, 106], "lora": [99, 100], "use_python": 99, "mutable_modul": 99, "model2": [99, 100], "expected_output": [99, 100], "refitted_output": [99, 100], "reload": [99, 118], "checkpoint": [99, 108], "civitai": 99, "12597": 99, "moxin": 99, "diffusionpipelin": [99, 103], "no_grad": [99, 106, 107, 108, 109], "model_id": [99, 103], "runwayml": 99, "hous": 99, "forest": 99, "shuimobysim": 99, "wuchangshuo": 99, "qualiti": 99, "worst": 99, "lowr": 99, "cloudi": 99, "watermark": 99, "pipe": [99, 103], "torch_dtyp": [99, 103], "unet": [99, 103], "negative_prompt": 99, "num_inference_step": 99, "without_lora_mut": 99, "jpg": [99, 110, 111, 113], "procedur": 99, "load_lora_weight": 99, "stablediffusionapi": 99, "load_lora_embed": 99, "weight_nam": 99, "safetensor": 99, "adapter_nam": 99, "lora1": 99, "set_adapt": 99, "adapter_weight": 99, "fuse_lora": 99, "unload_lora_weight": 99, "with_lora_mut": 99, "mutable_torchtrt_module_exampl": 99, "expens": 100, "involv": 100, "occasion": [100, 101, 104], "adapt": 100, "infeas": 100, "focus": 100, "mostli": 100, "recogn": 100, "behalf": 100, "init": [100, 108], "sett": 100, "randomli": 100, "exp_program2": 100, "compiled_trt_ep": 100, "new_trt_gm": 100, "accomplish": 100, "gaurente": 100, "attempt": [100, 108, 114], "rebuild": 100, "heurist": 100, "refit_engine_exampl": 100, "x_out": 101, "y_out": 101, "x_y_out": 101, "invoc": 101, "sample_inputs_half": 101, "model_half": 101, "backend_kwarg": 101, "optimized_model_custom": 101, "exit": [101, 104], "2052": [101, 104], "compile_engine_and_inf": [101, 104], "new_input": [102, 104], "new_output": [102, 104], "new_batch_size_input": 102, "new_batch_size_output": 102, "inputs_bs8": 102, "mark_dynam": [102, 114], "outputs_bs8": 102, "No": [102, 114], "inputs_bs12": 102, "outputs_bs12": 102, "compvi": 103, "majest": 103, "castl": 103, "cloud": 103, "majestic_castl": 103, "png": 103, "enable_cudagraph": [105, 116], "out_trt": 105, "set_cudagraphs_mod": [105, 116], "inputs_2": 105, "inputs_3": 105, "out_trt_2": 105, "out_trt_3": 105, "torch_export_cudagraph": 105, "automodelforcausallm": [106, 107, 109], "autotoken": [106, 107], "export_llm": [106, 107, 109], "max_token": [106, 107, 109], "kv_cach": [106, 107], "token": [106, 107, 112], "pad_token_id": 106, "eos_token_id": [106, 107], "attn_implement": [106, 107, 109], "eager": [106, 107, 109], "enjoi": 106, "cute": 106, "dog": 106, "model_input": [106, 107], "return_tensor": [106, 107], "input_id": [106, 107], "regress": [106, 107], "huggingfac": [106, 107, 112], "pyt_gen_token": [106, 107], "gpt2_ep": 106, "max_seq_len": [106, 107, 109], "trt_gen_token": [106, 107], "skip_special_token": [106, 107], "parallel": 106, "paradigm": 106, "torch_export_gpt2": 106, "llama_path": [107, 109], "llama": [107, 109], "7b": [107, 109], "chat": [107, 109], "hf": [107, 109], "llama2_ep": [107, 109], "batch_decod": 107, "clean_up_tokenization_spac": 107, "solv": [107, 110, 111, 113], "smaller": [107, 112], "subproblem": 107, "torch_export_llama2": 107, "modelopt": 108, "mtq": 108, "export_torch_mod": 108, "layer_spec": 108, "num_class": 108, "1000": [108, 109, 110, 111, 113], "init_weight": 108, "in_channel": 108, "pool": [108, 119], "maxpool2d": 108, "batchnorm2d": 108, "sequenti": 108, "avgpool": 108, "adaptiveavgpool2d": 108, "4096": 108, "dropout": 108, "_initialize_weight": 108, "kaiming_normal_": 108, "fan_out": 108, "nonlinear": 108, "constant_": 108, "elif": 108, "normal_": 108, "vgg16_cfg": 108, "128": [108, 109], "ckpt": 108, "model_state_dict": 108, "device_count": 108, "ordereddict": 108, "new_state_dict": 108, "forget": 108, "training_dataset": 108, "randomcrop": 108, "randomhorizontalflip": 108, "training_dataload": 108, "drop_last": 108, "crit": 108, "crossentropyloss": 108, "calibrate_loop": 108, "pred": 108, "5f": 108, "acc": 108, "2f": 108, "quantize_typ": 108, "quant_cfg": 108, "int8_default_cfg": 108, "fp8_default_cfg": 108, "forward_loop": 108, "qdq": 108, "incomplet": 108, "functionaltensor": 108, "functionaltensormod": 108, "_trace": 108, "_export": 108, "float8_e4m3fn": 108, "class_prob": 108, "class_pr": 108, "test_prob": 108, "test_pr": 108, "test_loss": 108, "test_acc": 108, "vgg16_ptq": 108, "overcom": 109, "throughput": 109, "sometim": [109, 114], "outweigh": 109, "slowdown": 109, "hardwar": [109, 119], "experi": 109, "balanc": 109, "timeit": 109, "time_gener": 109, "output_seq_length": 109, "seq_len": [109, 114], "llm": 109, "input_seq": 109, "start_tim": 109, "default_tim": 109, "inputs_copi": 109, "decod": 109, "logit": 109, "next_token_logit": 109, "next_token": 109, "end_tim": 109, "time_mean_m": 109, "isl": 109, "osl": 109, "warm": 109, "solut": 109, "insight": 109, "weight_streaming_ctx": 109, "weight_stream": 109, "mean_lat": 109, "percentag": 109, "weight_budget_pct": 109, "device_budget": 109, "total_device_budget": 109, "permiss": 109, "equal": 109, "proportion": 109, "streamabl": 109, "streamable_budget": 109, "requested_budget": 109, "get_automatic_weight_streaming_budget": 109, "weight_streaming_exampl": 109, "hand": [110, 111, 113], "consider": [110, 111, 113], "concurr": [110, 111, 113], "grpc": [110, 111, 113], "aforement": [110, 111, 113], "familiar": [110, 111, 113], "resnet50": [110, 111, 113], "torchhub": [110, 111, 113], "docker": [110, 111, 113], "login": [110, 111, 113], "xx": [110, 111], "yy": [110, 111, 113], "mm": [110, 111, 113], "publish": [110, 111, 113], "pwd": [110, 111, 113], "scratch_spac": [110, 111, 113], "nvcr": [110, 111, 113], "py3": [110, 111, 113], "hub": [110, 111, 113], "_validate_not_a_forked_repo": [110, 111, 113], "ts_trt_model": [110, 111, 113], "triton_exampl": [110, 111, 113], "model_repositori": [110, 111, 113], "rm": [110, 111, 113], "highli": [110, 111, 112, 113], "suggest": [110, 111, 113], "simplest": [110, 111, 113], "pbtxt": [110, 111, 113], "data_typ": [110, 111, 113], "type_fp32": [110, 111, 113], "exact": [110, 111, 113], "encourag": [110, 111, 113], "proce": [110, 111, 113], "8000": [110, 111, 113], "8001": [110, 111, 113], "8002": [110, 111, 113], "tritonserv": [110, 111, 113], "spin": [110, 111, 113], "proceed": [110, 111, 113], "flesh": [110, 111, 113], "img1": [110, 111, 113], "hakaimagazin": [110, 111, 113], "wp": [110, 111, 113], "gulf": [110, 111, 113], "bird": [110, 111, 113], "attrdict": [110, 111, 113], "pyindex": [110, 111, 113], "tritoncli": [110, 111, 113], "jump": [110, 111, 113], "firstli": [110, 111, 113], "resiz": [110, 111, 113], "pil": [110, 111, 113], "httpclient": [110, 111, 113], "triton_to_np_dtyp": [110, 111, 113], "rn50_preprocess": [110, 111, 113], "img_path": [110, 111, 113], "img": [110, 111, 113], "centercrop": [110, 111, 113], "485": [110, 111, 113], "456": [110, 111, 113], "406": [110, 111, 113], "229": [110, 111, 113], "transformed_img": [110, 111, 113], "inferenceservercli": [110, 111, 113], "localhost": [110, 111, 113], "secondli": [110, 111, 113], "obtain": [110, 111, 112, 113, 117], "inferinput": [110, 111, 113], "set_data_from_numpi": [110, 111, 113], "binary_data": [110, 111, 113], "inferrequestedoutput": [110, 111, 113], "class_count": [110, 111, 113], "lastli": [110, 111, 113], "send": [110, 111, 113], "model_nam": [110, 111, 113], "inference_output": [110, 111, 113], "as_numpi": [110, 111, 113], "468750": [110, 111, 113], "90": [110, 111, 113], "523438": [110, 111, 113], "92": [110, 111, 113], "664062": [110, 111, 113], "429688": [110, 111, 113], "136": [110, 111, 113], "234375": [110, 111, 113], "confidence_scor": [110, 111, 113], "classification_index": [110, 111, 113], "_rendered_examples_python": 110, "_rendered_examples_jupyt": 110, "acoust": 112, "speech": 112, "quartznet": 112, "contextnet": 112, "subword": 112, "piec": 112, "excit": 112, "se": 112, "audio": 112, "transcrib": 112, "speedup": 112, "feedforward": 112, "cnn": 112, "uniformli": 112, "resolut": 112, "compound": 112, "coeffici": 112, "b0": 112, "corpu": 112, "english": 112, "supervis": 112, "walkthrough": 112, "overal": 112, "adopt": 112, "mobilenetv2": 112, "classif": 112, "imagenet": 112, "imagenett": 112, "qat": 112, "simul": 112, "eagerli": 114, "swap": 114, "exactli": 114, "_tracer": 114, "queri": 114, "attn_weight": 114, "compiler_dynamic_shap": 114, "inputs_bs2": 114, "mymodul": 115, "linear1": 115, "linear2": 115, "linear3": 115, "40": 115, "__myl_mulsum_myl0_0": 115, "layertyp": 115, "kgen": 115, "__mye116_dconst": 115, "__myln_k_arg__bb1_2": 115, "tacticnam": 115, "__myl_mulsum_0xfa6c1858aea1b13b03f90165d7149ec6": 115, "streamid": 115, "__myl_addresmulsum_myl0_1": 115, "__mye131_dconst": 115, "addmm_constant_0": 115, "addmm_add_broadcast_to_same_shape_lhs_broadcast_constantfloat": 115, "__myln_k_arg__bb1_3": 115, "__myl_addresmulsum_0xb3915d7ebfe48be45b6d49083479e12f": 115, "__myl_addresmulsumadd_myl0_2": 115, "__mye146_dconst": 115, "addmm_2_constant_0": 115, "addmm_2_add_broadcast_to_same_shape_lhs_broadcast_constantfloat": 115, "addmm_1_constant_0": 115, "addmm_1_add_broadcast_to_same_shape_lhs_broadcast_constantfloat": 115, "__myl_addresmulsumadd_0xcdd0085ad25f5f45ac5fafb72acbffd6": 115, "__myl_mulsumaddcas_myl0_0": 115, "__mye112_dconst": 115, "__myl_mulsumaddcas_0xacf8f5dd9be2f3e7bb09cdddeac6c936": 115, "__myl_resmulsumaddcas_myl0_1": 115, "__mye127_dconst": 115, "addmm_1_add_broadcast_to_same_shape_lhs_broadcast_constanthalf": 115, "__myl_resmulsumaddcas_0x5a3b318b5a1c97b7d5110c0291481337": 115, "__myl_resmulsumadd_myl0_2": 115, "__mye142_dconst": 115, "__myl_resmulsumadd_0x3fad91127c640fd6db771aa9cde67db0": 115, "libtorchtrt_runtim": 116, "dl_open": 116, "ld_preload": 116, "load_librari": 116, "wl": 116, "ltorchtrt": 116, "torchtrt_runtime_exampl": 116, "libtorchtrt_plugin": 116, "neglig": 116, "thread": 116, "alert": 116, "switch": 116, "mismatch": 116, "crash": 116, "sacrif": 116, "incur": 116, "intens": 116, "trt_ep": 117, "stai": 117, "trt_t": 117, "ergonom": 118, "deleg": 118, "believ": 118, "amen": 118, "artifact": 118, "pack": 118, "year": 118, "superset": 118, "codebas": 118, "immedi": 118, "traceabl": 118, "scriptabl": 118, "neural": 119, "deconvolut": 119, "scripted_model": 119}, "objects": {"": [[5, 0, 1, "c.STR", "STR"], [9, 0, 1, "c.TORCHTRT_API", "TORCHTRT_API"], [11, 0, 1, "c.TORCHTRT_HIDDEN", "TORCHTRT_HIDDEN"], [7, 0, 1, "c.TORCH_TENSORRT_MAJOR_VERSION", "TORCH_TENSORRT_MAJOR_VERSION"], [8, 0, 1, "c.TORCH_TENSORRT_MINOR_VERSION", "TORCH_TENSORRT_MINOR_VERSION"], [6, 0, 1, "c.TORCH_TENSORRT_PATCH_VERSION", "TORCH_TENSORRT_PATCH_VERSION"], [12, 0, 1, "c.TORCH_TENSORRT_VERSION", "TORCH_TENSORRT_VERSION"], [10, 0, 1, "c.XSTR", "XSTR"], [0, 1, 1, "_CPPv4N14torch_tensorrt8DataTypeE", "torch_tensorrt::DataType"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeE5Value", "torch_tensorrt::DataType::DataType"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeEN3c1010ScalarTypeE", "torch_tensorrt::DataType::DataType"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeEv", "torch_tensorrt::DataType::DataType"], [0, 3, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeE5Value", "torch_tensorrt::DataType::DataType::t"], [0, 3, 1, "_CPPv4N14torch_tensorrt8DataType8DataTypeEN3c1010ScalarTypeE", "torch_tensorrt::DataType::DataType::t"], [0, 4, 1, "_CPPv4N14torch_tensorrt8DataType5ValueE", "torch_tensorrt::DataType::Value"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kBoolE", "torch_tensorrt::DataType::Value::kBool"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kCharE", "torch_tensorrt::DataType::Value::kChar"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value7kDoubleE", "torch_tensorrt::DataType::Value::kDouble"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value6kFloatE", "torch_tensorrt::DataType::Value::kFloat"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kHalfE", "torch_tensorrt::DataType::Value::kHalf"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value4kIntE", "torch_tensorrt::DataType::Value::kInt"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kLongE", "torch_tensorrt::DataType::Value::kLong"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value8kUnknownE", "torch_tensorrt::DataType::Value::kUnknown"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kBoolE", "torch_tensorrt::DataType::kBool"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kCharE", "torch_tensorrt::DataType::kChar"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value7kDoubleE", "torch_tensorrt::DataType::kDouble"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value6kFloatE", "torch_tensorrt::DataType::kFloat"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kHalfE", "torch_tensorrt::DataType::kHalf"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value4kIntE", "torch_tensorrt::DataType::kInt"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value5kLongE", "torch_tensorrt::DataType::kLong"], [0, 5, 1, "_CPPv4N14torch_tensorrt8DataType5Value8kUnknownE", "torch_tensorrt::DataType::kUnknown"], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypecv5ValueEv", "torch_tensorrt::DataType::operator Value"], [0, 2, 1, "_CPPv4N14torch_tensorrt8DataTypecvbEv", "torch_tensorrt::DataType::operator bool"], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeneE8DataType", "torch_tensorrt::DataType::operator!="], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeneEN8DataType5ValueE", "torch_tensorrt::DataType::operator!="], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeneE8DataType", "torch_tensorrt::DataType::operator!=::other"], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeneEN8DataType5ValueE", "torch_tensorrt::DataType::operator!=::other"], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqE8DataType", "torch_tensorrt::DataType::operator=="], [0, 2, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqEN8DataType5ValueE", "torch_tensorrt::DataType::operator=="], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqE8DataType", "torch_tensorrt::DataType::operator==::other"], [0, 3, 1, "_CPPv4NK14torch_tensorrt8DataTypeeqEN8DataType5ValueE", "torch_tensorrt::DataType::operator==::other"], [46, 1, 1, "_CPPv4N14torch_tensorrt6DeviceE", "torch_tensorrt::Device"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device6DeviceEv", "torch_tensorrt::Device::Device"], [1, 1, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypeE", "torch_tensorrt::Device::DeviceType"], [46, 1, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypeE", "torch_tensorrt::Device::DeviceType"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEv", "torch_tensorrt::Device::DeviceType::DeviceType"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEv", "torch_tensorrt::Device::DeviceType::DeviceType"], [1, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [1, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [46, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeE5Value", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [46, 3, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType10DeviceTypeEN3c1010DeviceTypeE", "torch_tensorrt::Device::DeviceType::DeviceType::t"], [1, 4, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5ValueE", "torch_tensorrt::Device::DeviceType::Value"], [46, 4, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5ValueE", "torch_tensorrt::Device::DeviceType::Value"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kDLAE", "torch_tensorrt::Device::DeviceType::Value::kDLA"], [46, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kDLAE", "torch_tensorrt::Device::DeviceType::Value::kDLA"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kGPUE", "torch_tensorrt::Device::DeviceType::Value::kGPU"], [46, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kGPUE", "torch_tensorrt::Device::DeviceType::Value::kGPU"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kDLAE", "torch_tensorrt::Device::DeviceType::kDLA"], [1, 5, 1, "_CPPv4N14torch_tensorrt6Device10DeviceType5Value4kGPUE", "torch_tensorrt::Device::DeviceType::kGPU"], [1, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypecv5ValueEv", "torch_tensorrt::Device::DeviceType::operator Value"], [46, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypecv5ValueEv", "torch_tensorrt::Device::DeviceType::operator Value"], [1, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypecvbEv", "torch_tensorrt::Device::DeviceType::operator bool"], [46, 2, 1, "_CPPv4N14torch_tensorrt6Device10DeviceTypecvbEv", "torch_tensorrt::Device::DeviceType::operator bool"], [1, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!="], [46, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!="], [1, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!=::other"], [46, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeneE10DeviceType", "torch_tensorrt::Device::DeviceType::operator!=::other"], [1, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator=="], [46, 2, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator=="], [1, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator==::other"], [46, 3, 1, "_CPPv4NK14torch_tensorrt6Device10DeviceTypeeqE10DeviceType", "torch_tensorrt::Device::DeviceType::operator==::other"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device18allow_gpu_fallbackE", "torch_tensorrt::Device::allow_gpu_fallback"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device11device_typeE", "torch_tensorrt::Device::device_type"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device8dla_coreE", "torch_tensorrt::Device::dla_core"], [46, 6, 1, "_CPPv4N14torch_tensorrt6Device6gpu_idE", "torch_tensorrt::Device::gpu_id"], [17, 4, 1, "_CPPv4N14torch_tensorrt16EngineCapabilityE", "torch_tensorrt::EngineCapability"], [17, 5, 1, "_CPPv4N14torch_tensorrt16EngineCapability15kDLA_STANDALONEE", "torch_tensorrt::EngineCapability::kDLA_STANDALONE"], [17, 5, 1, "_CPPv4N14torch_tensorrt16EngineCapability7kSAFETYE", "torch_tensorrt::EngineCapability::kSAFETY"], [17, 5, 1, "_CPPv4N14torch_tensorrt16EngineCapability9kSTANDARDE", "torch_tensorrt::EngineCapability::kSTANDARD"], [47, 1, 1, "_CPPv4N14torch_tensorrt11GraphInputsE", "torch_tensorrt::GraphInputs"], [47, 6, 1, "_CPPv4N14torch_tensorrt11GraphInputs15input_signatureE", "torch_tensorrt::GraphInputs::input_signature"], [47, 6, 1, "_CPPv4N14torch_tensorrt11GraphInputs6inputsE", "torch_tensorrt::GraphInputs::inputs"], [48, 1, 1, "_CPPv4N14torch_tensorrt5InputE", "torch_tensorrt::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN2at6TensorE", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input"], [48, 2, 1, "_CPPv4N14torch_tensorrt5Input5InputEv", "torch_tensorrt::Input::Input"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::dtype"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::format"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::max_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::min_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::opt_shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataType12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::shape"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN2at6TensorE", "torch_tensorrt::Input::Input::tensor"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputEN3c108ArrayRefI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEE8DataTypeNSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 3, 1, "_CPPv4N14torch_tensorrt5Input5InputENSt6vectorI7int64_tEENSt6vectorIdEE12TensorFormat", "torch_tensorrt::Input::Input::tensor_domain"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input5dtypeE", "torch_tensorrt::Input::dtype"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input6formatE", "torch_tensorrt::Input::format"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input9max_shapeE", "torch_tensorrt::Input::max_shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input9min_shapeE", "torch_tensorrt::Input::min_shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input9opt_shapeE", "torch_tensorrt::Input::opt_shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input5shapeE", "torch_tensorrt::Input::shape"], [48, 6, 1, "_CPPv4N14torch_tensorrt5Input13tensor_domainE", "torch_tensorrt::Input::tensor_domain"], [2, 1, 1, "_CPPv4N14torch_tensorrt12TensorFormatE", "torch_tensorrt::TensorFormat"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatE5Value", "torch_tensorrt::TensorFormat::TensorFormat"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatEN2at12MemoryFormatE", "torch_tensorrt::TensorFormat::TensorFormat"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatEv", "torch_tensorrt::TensorFormat::TensorFormat"], [2, 3, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatE5Value", "torch_tensorrt::TensorFormat::TensorFormat::t"], [2, 3, 1, "_CPPv4N14torch_tensorrt12TensorFormat12TensorFormatEN2at12MemoryFormatE", "torch_tensorrt::TensorFormat::TensorFormat::t"], [2, 4, 1, "_CPPv4N14torch_tensorrt12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::Value"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value13kChannelsLastE", "torch_tensorrt::TensorFormat::Value::kChannelsLast"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value11kContiguousE", "torch_tensorrt::TensorFormat::Value::kContiguous"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value8kUnknownE", "torch_tensorrt::TensorFormat::Value::kUnknown"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value13kChannelsLastE", "torch_tensorrt::TensorFormat::kChannelsLast"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value11kContiguousE", "torch_tensorrt::TensorFormat::kContiguous"], [2, 5, 1, "_CPPv4N14torch_tensorrt12TensorFormat5Value8kUnknownE", "torch_tensorrt::TensorFormat::kUnknown"], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormatcv5ValueEv", "torch_tensorrt::TensorFormat::operator Value"], [2, 2, 1, "_CPPv4N14torch_tensorrt12TensorFormatcvbEv", "torch_tensorrt::TensorFormat::operator bool"], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneE12TensorFormat", "torch_tensorrt::TensorFormat::operator!="], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator!="], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneE12TensorFormat", "torch_tensorrt::TensorFormat::operator!=::other"], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormatneEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator!=::other"], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqE12TensorFormat", "torch_tensorrt::TensorFormat::operator=="], [2, 2, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator=="], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqE12TensorFormat", "torch_tensorrt::TensorFormat::operator==::other"], [2, 3, 1, "_CPPv4NK14torch_tensorrt12TensorFormateqEN12TensorFormat5ValueE", "torch_tensorrt::TensorFormat::operator==::other"], [36, 2, 1, "_CPPv4N14torch_tensorrt15dump_build_infoEv", "torch_tensorrt::dump_build_info"], [34, 2, 1, "_CPPv4N14torch_tensorrt14get_build_infoEv", "torch_tensorrt::get_build_info"], [16, 4, 1, "_CPPv4N14torch_tensorrt7logging5LevelE", "torch_tensorrt::logging::Level"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kDEBUGE", "torch_tensorrt::logging::Level::kDEBUG"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kERRORE", "torch_tensorrt::logging::Level::kERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kGRAPHE", "torch_tensorrt::logging::Level::kGRAPH"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level5kINFOE", "torch_tensorrt::logging::Level::kINFO"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level15kINTERNAL_ERRORE", "torch_tensorrt::logging::Level::kINTERNAL_ERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level8kWARNINGE", "torch_tensorrt::logging::Level::kWARNING"], [24, 2, 1, "_CPPv4N14torch_tensorrt7logging24get_is_colored_output_onEv", "torch_tensorrt::logging::get_is_colored_output_on"], [22, 2, 1, "_CPPv4N14torch_tensorrt7logging18get_logging_prefixEv", "torch_tensorrt::logging::get_logging_prefix"], [23, 2, 1, "_CPPv4N14torch_tensorrt7logging24get_reportable_log_levelEv", "torch_tensorrt::logging::get_reportable_log_level"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kDEBUGE", "torch_tensorrt::logging::kDEBUG"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kERRORE", "torch_tensorrt::logging::kERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level6kGRAPHE", "torch_tensorrt::logging::kGRAPH"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level5kINFOE", "torch_tensorrt::logging::kINFO"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level15kINTERNAL_ERRORE", "torch_tensorrt::logging::kINTERNAL_ERROR"], [16, 5, 1, "_CPPv4N14torch_tensorrt7logging5Level8kWARNINGE", "torch_tensorrt::logging::kWARNING"], [26, 2, 1, "_CPPv4N14torch_tensorrt7logging3logE5LevelNSt6stringE", "torch_tensorrt::logging::log"], [26, 3, 1, "_CPPv4N14torch_tensorrt7logging3logE5LevelNSt6stringE", "torch_tensorrt::logging::log::lvl"], [26, 3, 1, "_CPPv4N14torch_tensorrt7logging3logE5LevelNSt6stringE", "torch_tensorrt::logging::log::msg"], [27, 2, 1, "_CPPv4N14torch_tensorrt7logging24set_is_colored_output_onEb", "torch_tensorrt::logging::set_is_colored_output_on"], [27, 3, 1, "_CPPv4N14torch_tensorrt7logging24set_is_colored_output_onEb", "torch_tensorrt::logging::set_is_colored_output_on::colored_output_on"], [28, 2, 1, "_CPPv4N14torch_tensorrt7logging18set_logging_prefixENSt6stringE", "torch_tensorrt::logging::set_logging_prefix"], [28, 3, 1, "_CPPv4N14torch_tensorrt7logging18set_logging_prefixENSt6stringE", "torch_tensorrt::logging::set_logging_prefix::prefix"], [25, 2, 1, "_CPPv4N14torch_tensorrt7logging24set_reportable_log_levelE5Level", "torch_tensorrt::logging::set_reportable_log_level"], [25, 3, 1, "_CPPv4N14torch_tensorrt7logging24set_reportable_log_levelE5Level", "torch_tensorrt::logging::set_reportable_log_level::lvl"], [3, 1, 1, "_CPPv4I0EN14torch_tensorrt3ptq19Int8CacheCalibratorE", "torch_tensorrt::ptq::Int8CacheCalibrator"], [3, 7, 1, "_CPPv4I0EN14torch_tensorrt3ptq19Int8CacheCalibratorE", "torch_tensorrt::ptq::Int8CacheCalibrator::Algorithm"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator19Int8CacheCalibratorERKNSt6stringE", "torch_tensorrt::ptq::Int8CacheCalibrator::Int8CacheCalibrator"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator19Int8CacheCalibratorERKNSt6stringE", "torch_tensorrt::ptq::Int8CacheCalibrator::Int8CacheCalibrator::cache_file_path"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch::bindings"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch::names"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatch::nbBindings"], [3, 2, 1, "_CPPv4NK14torch_tensorrt3ptq19Int8CacheCalibrator12getBatchSizeEv", "torch_tensorrt::ptq::Int8CacheCalibrator::getBatchSize"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibratorcvPN8nvinfer115IInt8CalibratorEEv", "torch_tensorrt::ptq::Int8CacheCalibrator::operator nvinfer1::IInt8Calibrator*"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::readCalibrationCache"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::readCalibrationCache::length"], [3, 2, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::writeCalibrationCache"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::writeCalibrationCache::cache"], [3, 3, 1, "_CPPv4N14torch_tensorrt3ptq19Int8CacheCalibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8CacheCalibrator::writeCalibrationCache::length"], [4, 1, 1, "_CPPv4I00EN14torch_tensorrt3ptq14Int8CalibratorE", "torch_tensorrt::ptq::Int8Calibrator"], [4, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq14Int8CalibratorE", "torch_tensorrt::ptq::Int8Calibrator::Algorithm"], [4, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq14Int8CalibratorE", "torch_tensorrt::ptq::Int8Calibrator::DataLoaderUniquePtr"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator::cache_file_path"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator::dataloader"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator14Int8CalibratorE19DataLoaderUniquePtrRKNSt6stringEb", "torch_tensorrt::ptq::Int8Calibrator::Int8Calibrator::use_cache"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch::bindings"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch::names"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator8getBatchEA_PvA_PKci", "torch_tensorrt::ptq::Int8Calibrator::getBatch::nbBindings"], [4, 2, 1, "_CPPv4NK14torch_tensorrt3ptq14Int8Calibrator12getBatchSizeEv", "torch_tensorrt::ptq::Int8Calibrator::getBatchSize"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8CalibratorcvPN8nvinfer115IInt8CalibratorEEv", "torch_tensorrt::ptq::Int8Calibrator::operator nvinfer1::IInt8Calibrator*"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8Calibrator::readCalibrationCache"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator20readCalibrationCacheER6size_t", "torch_tensorrt::ptq::Int8Calibrator::readCalibrationCache::length"], [4, 2, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8Calibrator::writeCalibrationCache"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8Calibrator::writeCalibrationCache::cache"], [4, 3, 1, "_CPPv4N14torch_tensorrt3ptq14Int8Calibrator21writeCalibrationCacheEPKv6size_t", "torch_tensorrt::ptq::Int8Calibrator::writeCalibrationCache::length"], [29, 2, 1, "_CPPv4I0EN14torch_tensorrt3ptq26make_int8_cache_calibratorE19Int8CacheCalibratorI9AlgorithmERKNSt6stringE", "torch_tensorrt::ptq::make_int8_cache_calibrator"], [29, 7, 1, "_CPPv4I0EN14torch_tensorrt3ptq26make_int8_cache_calibratorE19Int8CacheCalibratorI9AlgorithmERKNSt6stringE", "torch_tensorrt::ptq::make_int8_cache_calibrator::Algorithm"], [29, 3, 1, "_CPPv4I0EN14torch_tensorrt3ptq26make_int8_cache_calibratorE19Int8CacheCalibratorI9AlgorithmERKNSt6stringE", "torch_tensorrt::ptq::make_int8_cache_calibrator::cache_file_path"], [30, 2, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator"], [30, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::Algorithm"], [30, 7, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::DataLoader"], [30, 3, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::cache_file_path"], [30, 3, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::dataloader"], [30, 3, 1, "_CPPv4I00EN14torch_tensorrt3ptq20make_int8_calibratorE14Int8CalibratorI9Algorithm10DataLoaderE10DataLoaderRKNSt6stringEb", "torch_tensorrt::ptq::make_int8_calibrator::use_cache"], [35, 2, 1, "_CPPv4N14torch_tensorrt10set_deviceEKi", "torch_tensorrt::set_device"], [35, 3, 1, "_CPPv4N14torch_tensorrt10set_deviceEKi", "torch_tensorrt::set_device::gpu_id"], [49, 1, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpecE", "torch_tensorrt::torchscript::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecEN5torch3jit6IValueE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorI5InputEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorIN3c108ArrayRefI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 2, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorINSt6vectorI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorIN3c108ArrayRefI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::fixed_sizes"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorINSt6vectorI7int64_tEEEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::fixed_sizes"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecEN5torch3jit6IValueE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::input_signature"], [49, 3, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec11CompileSpecENSt6vectorI5InputEE", "torch_tensorrt::torchscript::CompileSpec::CompileSpec::inputs"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec19allow_shape_tensorsE", "torch_tensorrt::torchscript::CompileSpec::allow_shape_tensors"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec10capabilityE", "torch_tensorrt::torchscript::CompileSpec::capability"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec5debugE", "torch_tensorrt::torchscript::CompileSpec::debug"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec6deviceE", "torch_tensorrt::torchscript::CompileSpec::device"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec12disable_tf32E", "torch_tensorrt::torchscript::CompileSpec::disable_tf32"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec20dla_global_dram_sizeE", "torch_tensorrt::torchscript::CompileSpec::dla_global_dram_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec19dla_local_dram_sizeE", "torch_tensorrt::torchscript::CompileSpec::dla_local_dram_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec13dla_sram_sizeE", "torch_tensorrt::torchscript::CompileSpec::dla_sram_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec18enabled_precisionsE", "torch_tensorrt::torchscript::CompileSpec::enabled_precisions"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec12graph_inputsE", "torch_tensorrt::torchscript::CompileSpec::graph_inputs"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14min_block_sizeE", "torch_tensorrt::torchscript::CompileSpec::min_block_size"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec20num_avg_timing_itersE", "torch_tensorrt::torchscript::CompileSpec::num_avg_timing_iters"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14ptq_calibratorE", "torch_tensorrt::torchscript::CompileSpec::ptq_calibrator"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec5refitE", "torch_tensorrt::torchscript::CompileSpec::refit"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec24require_full_compilationE", "torch_tensorrt::torchscript::CompileSpec::require_full_compilation"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14sparse_weightsE", "torch_tensorrt::torchscript::CompileSpec::sparse_weights"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec22torch_executed_modulesE", "torch_tensorrt::torchscript::CompileSpec::torch_executed_modules"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec18torch_executed_opsE", "torch_tensorrt::torchscript::CompileSpec::torch_executed_ops"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec24truncate_long_and_doubleE", "torch_tensorrt::torchscript::CompileSpec::truncate_long_and_double"], [49, 6, 1, "_CPPv4N14torch_tensorrt11torchscript11CompileSpec14workspace_sizeE", "torch_tensorrt::torchscript::CompileSpec::workspace_size"], [31, 2, 1, "_CPPv4N14torch_tensorrt11torchscript29check_method_operator_supportERKN5torch3jit6ModuleENSt6stringE", "torch_tensorrt::torchscript::check_method_operator_support"], [31, 3, 1, "_CPPv4N14torch_tensorrt11torchscript29check_method_operator_supportERKN5torch3jit6ModuleENSt6stringE", "torch_tensorrt::torchscript::check_method_operator_support::method_name"], [31, 3, 1, "_CPPv4N14torch_tensorrt11torchscript29check_method_operator_supportERKN5torch3jit6ModuleENSt6stringE", "torch_tensorrt::torchscript::check_method_operator_support::module"], [32, 2, 1, "_CPPv4N14torch_tensorrt11torchscript7compileERKN5torch3jit6ModuleE11CompileSpec", "torch_tensorrt::torchscript::compile"], [32, 3, 1, "_CPPv4N14torch_tensorrt11torchscript7compileERKN5torch3jit6ModuleE11CompileSpec", "torch_tensorrt::torchscript::compile::info"], [32, 3, 1, "_CPPv4N14torch_tensorrt11torchscript7compileERKN5torch3jit6ModuleE11CompileSpec", "torch_tensorrt::torchscript::compile::module"], [37, 2, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine"], [37, 3, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine::info"], [37, 3, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine::method_name"], [37, 3, 1, "_CPPv4N14torch_tensorrt11torchscript28convert_method_to_trt_engineERKN5torch3jit6ModuleENSt6stringE11CompileSpec", "torch_tensorrt::torchscript::convert_method_to_trt_engine::module"], [33, 2, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::device"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::engine"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::input_binding_names"], [33, 3, 1, "_CPPv4N14torch_tensorrt11torchscript26embed_engine_in_new_moduleERKNSt6stringE6DeviceRKNSt6vectorINSt6stringEEERKNSt6vectorINSt6stringEEE", "torch_tensorrt::torchscript::embed_engine_in_new_module::output_binding_names"], [76, 8, 0, "-", "torch_tensorrt"]], "torch_tensorrt": [[76, 9, 1, "", "Device"], [76, 9, 1, "", "DeviceType"], [76, 9, 1, "", "EngineCapability"], [76, 9, 1, "", "Input"], [76, 9, 1, "", "MutableTorchTensorRTModule"], [76, 12, 1, "", "compile"], [76, 12, 1, "", "convert_method_to_trt_engine"], [76, 9, 1, "", "dtype"], [117, 8, 0, "-", "dynamo"], [72, 8, 0, "-", "fx"], [76, 12, 1, "", "load"], [73, 8, 0, "-", "logging"], [76, 9, 1, "", "memory_format"], [75, 8, 0, "-", "runtime"], [76, 12, 1, "", "save"], [77, 8, 0, "-", "ts"]], "torch_tensorrt.Device": [[76, 10, 1, "", "__init__"], [76, 11, 1, "", "device_type"], [76, 11, 1, "", "dla_core"], [76, 11, 1, "", "gpu_id"]], "torch_tensorrt.DeviceType": [[76, 11, 1, "", "DLA"], [76, 11, 1, "", "GPU"], [76, 11, 1, "", "UNKNOWN"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"]], "torch_tensorrt.EngineCapability": [[76, 11, 1, "", "DLA_STANDALONE"], [76, 11, 1, "", "SAFETY"], [76, 11, 1, "", "STANDARD"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"]], "torch_tensorrt.Input": [[76, 10, 1, "", "__init__"], [76, 11, 1, "", "dtype"], [76, 10, 1, "", "example_tensor"], [76, 11, 1, "", "format"], [76, 10, 1, "", "from_tensor"], [76, 10, 1, "", "from_tensors"]], "torch_tensorrt.MutableTorchTensorRTModule": [[76, 10, 1, "", "__init__"], [76, 10, 1, "", "compile"], [76, 10, 1, "", "refit_gm"]], "torch_tensorrt.dtype": [[76, 11, 1, "", "b"], [76, 11, 1, "", "bf16"], [76, 11, 1, "", "f16"], [76, 11, 1, "", "f32"], [76, 11, 1, "", "f64"], [76, 11, 1, "", "f8"], [76, 11, 1, "", "i32"], [76, 11, 1, "", "i64"], [76, 11, 1, "", "i8"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"], [76, 11, 1, "", "u8"], [76, 11, 1, "", "unknown"]], "torch_tensorrt.dynamo": [[71, 9, 1, "", "CompilationSettings"], [71, 12, 1, "", "compile"], [71, 12, 1, "", "export"], [71, 12, 1, "", "refit_module_weights"], [71, 12, 1, "", "trace"]], "torch_tensorrt.fx": [[72, 9, 1, "", "InputTensorSpec"], [72, 9, 1, "", "TRTInterpreter"], [72, 9, 1, "", "TRTInterpreterResult"], [72, 9, 1, "", "TRTModule"], [72, 12, 1, "", "compile"]], "torch_tensorrt.logging": [[73, 9, 1, "", "debug"], [73, 9, 1, "", "errors"], [73, 9, 1, "", "graphs"], [73, 9, 1, "", "info"], [73, 9, 1, "", "internal_errors"], [73, 9, 1, "", "warnings"]], "torch_tensorrt.memory_format": [[76, 11, 1, "", "cdhw32"], [76, 11, 1, "", "chw16"], [76, 11, 1, "", "chw2"], [76, 11, 1, "", "chw32"], [76, 11, 1, "", "chw4"], [76, 11, 1, "", "dhwc"], [76, 11, 1, "", "dhwc8"], [76, 11, 1, "", "dla_hwc4"], [76, 11, 1, "", "dla_linear"], [76, 11, 1, "", "hwc"], [76, 11, 1, "", "hwc16"], [76, 11, 1, "", "hwc8"], [76, 11, 1, "", "linear"], [76, 10, 1, "", "to"], [76, 10, 1, "", "try_from"], [76, 10, 1, "", "try_to"]], "torch_tensorrt.runtime": [[75, 9, 1, "", "PythonTorchTensorRTModule"], [75, 9, 1, "", "TorchTensorRTModule"], [75, 12, 1, "", "set_multi_device_safe_mode"]], "torch_tensorrt.runtime.PythonTorchTensorRTModule": [[75, 10, 1, "", "__init__"], [75, 10, 1, "", "cudagraphs_validate_shapes"], [75, 10, 1, "", "disable_profiling"], [75, 10, 1, "", "enable_profiling"], [75, 10, 1, "", "forward"], [75, 10, 1, "", "get_layer_info"]], "torch_tensorrt.runtime.TorchTensorRTModule": [[75, 10, 1, "", "__init__"], [75, 10, 1, "", "forward"], [75, 10, 1, "", "get_extra_state"], [75, 10, 1, "", "set_extra_state"]], "torch_tensorrt.ts": [[77, 12, 1, "", "TensorRTCompileSpec"], [77, 12, 1, "", "check_method_op_support"], [77, 12, 1, "", "compile"], [77, 12, 1, "", "convert_method_to_trt_engine"], [77, 12, 1, "", "embed_engine_in_new_module"], [74, 8, 0, "-", "ptq"]], "torch_tensorrt.ts.ptq": [[74, 9, 1, "", "CacheCalibrator"], [74, 9, 1, "", "CalibrationAlgo"], [74, 9, 1, "", "DataLoaderCalibrator"]], "torch_tensorrt.ts.ptq.CalibrationAlgo": [[74, 11, 1, "", "ENTROPY_CALIBRATION"], [74, 11, 1, "", "ENTROPY_CALIBRATION_2"], [74, 11, 1, "", "LEGACY_CALIBRATION"], [74, 11, 1, "", "MINMAX_CALIBRATION"]]}, "objtypes": {"0": "c:macro", "1": "cpp:class", "2": "cpp:function", "3": "cpp:functionParam", "4": "cpp:enum", "5": "cpp:enumerator", "6": "cpp:member", "7": "cpp:templateParam", "8": "py:module", "9": "py:class", "10": "py:method", "11": "py:attribute", "12": "py:function"}, "objnames": {"0": ["c", "macro", "C macro"], "1": ["cpp", "class", "C++ class"], "2": ["cpp", "function", "C++ function"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "enum", "C++ enum"], "5": ["cpp", "enumerator", "C++ enumerator"], "6": ["cpp", "member", "C++ member"], "7": ["cpp", "templateParam", "C++ template parameter"], "8": ["py", "module", "Python module"], "9": ["py", "class", "Python class"], "10": ["py", "method", "Python method"], "11": ["py", "attribute", "Python attribute"], "12": ["py", "function", "Python function"]}, "titleterms": {"class": [0, 1, 2, 3, 4, 20, 21, 38, 40, 41, 50, 71, 72, 74, 75, 76], "datatyp": 0, "document": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 17, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 46, 47, 48, 49, 61, 69, 85, 86], "devic": [1, 46, 116], "devicetyp": 1, "nest": [1, 46], "relationship": [1, 3, 4, 46, 48], "tensorformat": 2, "templat": [3, 4, 29, 30], "int8cachecalibr": 3, "inherit": [3, 4, 48], "base": [3, 4, 48, 80], "type": [3, 4, 46, 48, 54], "int8calibr": 4, "defin": [5, 6, 7, 8, 9, 10, 11, 12, 19, 50, 108], "str": 5, "torch_tensorrt_patch_vers": 6, "torch_tensorrt_major_vers": 7, "torch_tensorrt_minor_vers": 8, "torchtrt_api": 9, "xstr": 10, "torchtrt_hidden": 11, "torch_tensorrt_vers": 12, "directori": [13, 14, 15, 51], "cpp": [13, 18, 19, 20, 21, 56], "subdirectori": [13, 14], "includ": [14, 18, 19, 20, 21], "torch_tensorrt": [15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 67, 71, 72, 73, 74, 75, 76, 77, 102, 104, 105, 118], "file": [15, 18, 19, 20, 21, 42, 43, 44, 45, 50, 51], "enum": [16, 17, 18, 21, 38, 39, 50, 74, 76], "level": [16, 80, 82, 83], "enginecap": 17, "log": [18, 22, 23, 24, 25, 26, 27, 28, 39, 42, 73], "h": [18, 19, 20, 21, 42, 43, 44, 45, 56], "content": [18, 19, 20, 21, 38, 39, 40, 41, 80, 81, 82, 83, 84, 85], "definit": [18, 19, 20, 21, 83, 94, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "By": [18, 19], "namespac": [18, 19, 20, 21, 38, 39, 40, 41, 50], "function": [18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 50, 61, 71, 72, 75, 76, 77, 108], "macro": [19, 43], "ptq": [20, 29, 30, 40, 44, 74, 91, 108], "get_logging_prefix": 22, "get_reportable_log_level": 23, "get_is_colored_output_on": 24, "set_reportable_log_level": 25, "set_is_colored_output_on": 27, "set_logging_prefix": 28, "make_int8_cache_calibr": 29, "make_int8_calibr": 30, "torchscript": [31, 32, 33, 37, 41, 60, 66, 69, 88, 89, 92, 117, 118], "check_method_operator_support": 31, "compil": [32, 57, 59, 63, 64, 66, 68, 69, 89, 94, 97, 100, 101, 102, 103, 104, 105, 106, 107, 109, 112, 114, 115, 117, 118], "embed_engine_in_new_modul": 33, "get_build_info": 34, "set_devic": 35, "dump_build_info": 36, "convert_method_to_trt_engin": 37, "program": [42, 43, 44, 45, 63, 100, 116], "list": [42, 43, 44, 45, 83], "struct": [46, 47, 48, 49, 50], "graphinput": 47, "input": [48, 102, 104], "compilespec": 49, "torch": [50, 61, 63, 64, 65, 66, 68, 69, 89, 90, 92, 93, 95, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118], "tensorrt": [50, 58, 61, 63, 64, 65, 66, 69, 89, 90, 92, 93, 95, 99, 100, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118], "c": [50, 61, 66, 68, 69, 89, 91, 112], "api": [50, 51, 61, 66, 69], "hierarchi": 50, "full": [50, 51], "torchtrtc": [52, 89], "convers": [53, 57, 59, 60], "phase": [53, 55, 56, 57, 58, 59], "node": 53, "evalu": [53, 54, 70], "convert": [53, 54, 60, 65, 70, 89, 93], "write": [54, 60, 62, 93, 95], "dynamo": [54, 62, 69, 71, 106, 107, 117, 118], "implement": [54, 93], "registr": 54, "capabl": 54, "valid": 54, "contract": [54, 60], "exampl": [54, 62, 82, 84, 94], "convolut": 54, "oper": [54, 64, 70, 89, 95], "decomposit": 54, "addmm": [54, 55], "lower": [55, 57, 59, 62], "pass": [55, 62], "us": [55, 61, 89, 90, 92, 93, 95, 101, 102, 103, 104, 105, 106, 107, 108, 112, 114], "eliminatecommonsubexpress": 55, "elimin": 55, "dead": 55, "code": [55, 69, 82], "except": 55, "Or": 55, "pattern": 55, "redund": 55, "guard": 55, "freez": 55, "modul": [55, 88, 89, 99, 118], "fuse": 55, "branch": 55, "linear": 55, "flatten": 55, "graph": [55, 58, 118], "tupl": 55, "fallback": [55, 56], "peephol": 55, "optim": [55, 68, 110, 111, 113], "remov": 55, "contigu": 55, "dropout": 55, "To": 55, "unpack": 55, "logsoftmax": 55, "unrol": 55, "loop": [55, 108], "replac": [55, 82], "tile": 55, "repeat": 55, "partit": [56, 57, 59], "partitoninfo": 56, "segmentedblock": 56, "shape_analysi": 56, "automat": [56, 109], "depend": [56, 66, 98, 110], "awar": [56, 112], "runtim": [57, 58, 59, 75, 94, 116], "background": [58, 60], "engin": [58, 65, 95, 96, 97], "executor": 58, "op": [58, 65, 95], "construct": 58, "result": 58, "serial": [58, 64, 68], "deseri": 58, "abi": [58, 66], "version": [58, 66], "format": [58, 118], "system": [59, 66], "overview": [59, 67], "what": 60, "guarante": 60, "respons": 60, "context": [60, 80, 109], "arg": [60, 81], "weight": [60, 100, 108, 109], "other": 60, "advic": 60, "link": [61, 82], "develop": 61, "avail": 61, "layer": 61, "expect": 61, "dimens": 61, "python": [61, 66, 68, 69, 88, 90, 91], "sometim": 61, "easier": 61, "read": 61, "pytorch": [61, 65, 69, 92, 95, 106, 107, 112], "native_op": 61, "ir": [61, 117, 118], "aten": 62, "basic": 62, "requir": 62, "regist": [62, 89], "export": [63, 68, 105, 114], "customiz": [63, 64], "set": [63, 64, 99, 101, 105, 110, 111, 113], "under": [63, 89, 114], "hood": [63, 89, 114], "trace": 63, "backend": [64, 102, 103, 104, 106, 107], "kei": 64, "featur": 64, "custom": [64, 89, 93, 95, 97, 101, 114], "usag": [64, 100, 101], "after": 64, "model": [64, 65, 69, 94, 95, 98, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115, 117], "perform": 64, "coverag": 64, "feasibl": 64, "dynam": [64, 102, 112, 114], "shape": [64, 102, 112, 114], "support": [64, 70], "recompil": [64, 102], "condit": 64, "fx": [65, 69, 72, 112, 118], "frontend": [65, 66, 69, 92, 112, 118], "user": [65, 69], "guid": [65, 69], "acc": 65, "tracer": 65, "fx2trt": 65, "how": [65, 80, 91], "add": 65, "miss": 65, "instal": [66, 87], "precompil": 66, "binari": 66, "specif": 66, "cuda": [66, 101, 104], "nightli": 66, "build": [66, 67, 80, 110, 111, 113], "onli": 66, "from": [66, 92], "sourc": 66, "linux": 66, "packag": [66, 116], "addit": 66, "option": [66, 68, 80, 81, 83, 102, 104, 109, 118], "distribut": 66, "No": 66, "librari": [66, 116], "standalon": 66, "releas": 66, "debug": 66, "pre": [66, 108], "cxx11": 66, "choos": 66, "right": 66, "window": [66, 94], "step": [66, 68, 110, 111, 113], "advanc": [66, 100, 101], "setup": 66, "troubleshoot": 66, "altern": 66, "cmake": 66, "nativ": 66, "aarch64": 66, "jetson": 66, "prerequisit": [66, 67], "environ": 66, "cli": [66, 69], "jetpack": 67, "6": [67, 84], "1": [67, 68, 84, 110, 111, 113], "quick": 68, "start": [68, 69], "2": [68, 84, 85, 110, 111, 113], "deploi": [68, 108, 112, 116], "deploy": 68, "In": [69, 100], "framework": 69, "infer": [69, 102, 103, 104, 105, 108, 110, 111, 113], "nvidia": 69, "gpu": 69, "get": 69, "tutori": [69, 110], "zoo": [69, 98, 110], "contributor": 69, "indic": 69, "legaci": [69, 112, 118], "further": 69, "inform": 69, "current": 70, "through": 70, "ts": [74, 77, 118], "submodul": 76, "comput": 78, "time": [78, 118], "changelog": 79, "configur": 80, "project": 80, "wide": 80, "html": 80, "theme": [80, 86], "toc": 80, "page": 80, "tabl": [80, 81, 82, 83, 84, 85], "mod": 81, "test_py_modul": 81, "gener": [81, 106, 107], "index": 81, "paramet": 81, "data": 81, "paragraph": [82, 85], "markup": 82, "inlin": 82, "math": 82, "meta": 82, "block": 82, "liter": 82, "line": 82, "quot": 82, "doctest": 82, "emphas": 82, "number": [82, 83], "sidebar": 82, "ch": 82, "ien": 82, "The": [82, 89], "creativ": 82, "A": 82, "refer": 82, "footnot": 82, "citat": [82, 91], "glossari": 82, "target": 82, "direct": 82, "center": 82, "text": 82, "imag": [82, 83], "figur": 82, "admonit": 82, "And": 82, "wai": 82, "topic": 82, "rubric": 82, "titl": 82, "compound": 82, "download": [82, 87], "enumer": 83, "field": 83, "bullet": 83, "second": 83, "But": 83, "deeper": 83, "down": 83, "rabbit": 83, "hole": 83, "hlist": 83, "grid": 83, "giant": 83, "can": 83, "have": 83, "caption": [83, 86], "like": 83, "thi": [83, 86], "one": 83, "long": [84, 86], "sticki": 84, "nav": 84, "menu": [84, 86], "3": [84, 110, 111, 113], "4": 84, "5": 84, "7": 84, "8": 84, "9": 84, "10": 84, "11": 84, "12": 84, "13": 84, "14": 84, "15": 84, "16": 84, "17": 84, "18": 84, "19": 84, "20": 84, "submenu": 84, "subsubmenu": 84, "structur": 85, "element": 85, "section": 85, "subsect": 85, "subsubsect": 85, "demo": 86, "an": 86, "incred": 86, "via": 87, "git": 87, "creat": [88, 91], "work": [88, 89], "save": [88, 99, 117], "disk": 88, "quickstart": 89, "unsupport": 89, "post": 91, "train": [91, 108, 112], "quantiz": [91, 108, 112], "your": [91, 110, 111, 113], "own": 91, "applic": 91, "directli": 92, "overload": 93, "metadata": 93, "our": [93, 95], "cross": 94, "import": [94, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109], "kernel": 95, "within": 95, "test": 95, "wrap": 95, "insert": 95, "cach": [96, 97, 100], "bert": [96, 104, 112], "jit": [97, 114], "aot": [97, 114], "mutabl": 99, "initi": 99, "make": [99, 100], "modif": 99, "stabl": [99, 103], "diffus": [99, 103], "huggingfac": 99, "refit": 100, "new": 100, "standard": 100, "workflow": 100, "refitt": 100, "pretrain": 100, "map": 100, "place": 100, "default": [101, 105], "cleanup": [101, 104], "driver": [101, 104], "error": [101, 104], "note": [101, 104], "resnet": 102, "argument": [102, 104], "avoid": 102, "specifi": 102, "befor": 102, "trt": 102, "cudagraph": [105, 116], "integr": 105, "gpt2": 106, "output": [106, 107], "decod": [106, 107], "sentenc": [106, 107], "llama2": 107, "load": [108, 117], "dataset": 108, "loss": 108, "calibr": 108, "tune": 108, "fp8": 108, "stream": 109, "run": 109, "budget": 109, "size": 109, "manag": 109, "serv": [110, 111, 112, 113], "triton": [110, 111, 113], "up": [110, 111, 113], "server": [110, 111, 113], "client": [110, 111, 113], "queri": [110, 111, 113], "notebook": 112, "citrinet": 112, "efficientnet": 112, "mask": 112, "languag": 112, "mlm": 112, "hug": 112, "face": 112, "transform": 112, "acceler": 112, "resnet50": 112, "lenet": 112, "deep": 112, "learn": 112, "object": 112, "detect": 112, "ssd": 112, "int8": 112, "constraint": 114, "mix": 115, "precis": 115, "libtorchtrt": 116, "so": 116, "plugin": 116, "multi": 116, "safe": 116, "mode": 116, "exportedprogram": 117, "b": 117, "explain": 118, "just": 118, "accept": 118, "return": 118, "ahead": 118, "dla": 119}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file diff --git a/docs/sg_execution_times.html b/docs/sg_execution_times.html index a79ed53199..0bcebc06da 100644 --- a/docs/sg_execution_times.html +++ b/docs/sg_execution_times.html @@ -10,7 +10,7 @@ - Computation times — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Computation times — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/changelog.html b/docs/src/pytorch-sphinx-theme/docs/changelog.html index 0202154362..2ae378e0ab 100644 --- a/docs/src/pytorch-sphinx-theme/docs/changelog.html +++ b/docs/src/pytorch-sphinx-theme/docs/changelog.html @@ -10,7 +10,7 @@ - Changelog — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Changelog — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/configuring.html b/docs/src/pytorch-sphinx-theme/docs/configuring.html index 40c79eb8e8..9414c465ba 100644 --- a/docs/src/pytorch-sphinx-theme/docs/configuring.html +++ b/docs/src/pytorch-sphinx-theme/docs/configuring.html @@ -10,7 +10,7 @@ - Configuration — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Configuration — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/demo/api.html b/docs/src/pytorch-sphinx-theme/docs/demo/api.html index 9b78631981..ea3d1e3737 100644 --- a/docs/src/pytorch-sphinx-theme/docs/demo/api.html +++ b/docs/src/pytorch-sphinx-theme/docs/demo/api.html @@ -10,7 +10,7 @@ - 5. :mod:`test_py_module` — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + 5. :mod:`test_py_module` — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/demo/demo.html b/docs/src/pytorch-sphinx-theme/docs/demo/demo.html index fcdfd7efad..a5ed7e490f 100644 --- a/docs/src/pytorch-sphinx-theme/docs/demo/demo.html +++ b/docs/src/pytorch-sphinx-theme/docs/demo/demo.html @@ -12,7 +12,7 @@ - 3. Paragraph Level Markup — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + 3. Paragraph Level Markup — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -664,7 +664,7 @@

    3.4.4.

    3.4.5. Code Blocks¶

    # parsed-literal test
    -curl -O http://someurl/release-v2.6.0.dev0+a945aeb.tar-gz
    +curl -O http://someurl/release-v2.6.0.dev0+38b1804.tar-gz

    Code Blocks can have captions.¶
    {
    diff --git a/docs/src/pytorch-sphinx-theme/docs/demo/lists_tables.html b/docs/src/pytorch-sphinx-theme/docs/demo/lists_tables.html
    index c098019c56..24bb761789 100644
    --- a/docs/src/pytorch-sphinx-theme/docs/demo/lists_tables.html
    +++ b/docs/src/pytorch-sphinx-theme/docs/demo/lists_tables.html
    @@ -10,7 +10,7 @@
     
       
       
    -  4. Lists & Tables — Torch-TensorRT v2.6.0.dev0+a945aeb documentation
    +  4. Lists & Tables — Torch-TensorRT v2.6.0.dev0+38b1804 documentation
       
     
       
    @@ -273,7 +273,7 @@
                   
                   
                     
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/demo/long.html b/docs/src/pytorch-sphinx-theme/docs/demo/long.html index dde528db37..cddb2a73d5 100644 --- a/docs/src/pytorch-sphinx-theme/docs/demo/long.html +++ b/docs/src/pytorch-sphinx-theme/docs/demo/long.html @@ -10,7 +10,7 @@ - 1. Long Sticky Nav — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + 1. Long Sticky Nav — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/demo/structure.html b/docs/src/pytorch-sphinx-theme/docs/demo/structure.html index eaba1477d7..a186b82d6d 100644 --- a/docs/src/pytorch-sphinx-theme/docs/demo/structure.html +++ b/docs/src/pytorch-sphinx-theme/docs/demo/structure.html @@ -10,7 +10,7 @@ - 1. Structural Elements — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + 1. Structural Elements — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/index.html b/docs/src/pytorch-sphinx-theme/docs/index.html index 34967e25e5..16c7fc6b4f 100644 --- a/docs/src/pytorch-sphinx-theme/docs/index.html +++ b/docs/src/pytorch-sphinx-theme/docs/index.html @@ -10,7 +10,7 @@ - <no title> — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + <no title> — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/src/pytorch-sphinx-theme/docs/installing.html b/docs/src/pytorch-sphinx-theme/docs/installing.html index 850745efbd..de94040bdf 100644 --- a/docs/src/pytorch-sphinx-theme/docs/installing.html +++ b/docs/src/pytorch-sphinx-theme/docs/installing.html @@ -10,7 +10,7 @@ - Installation — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Installation — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/ts/creating_torchscript_module_in_python.html b/docs/ts/creating_torchscript_module_in_python.html index 77938e826c..a31391dedb 100644 --- a/docs/ts/creating_torchscript_module_in_python.html +++ b/docs/ts/creating_torchscript_module_in_python.html @@ -10,7 +10,7 @@ - Creating a TorchScript Module — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Creating a TorchScript Module — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/ts/getting_started_with_cpp_api.html b/docs/ts/getting_started_with_cpp_api.html index 24f97339c2..bcb56048d1 100644 --- a/docs/ts/getting_started_with_cpp_api.html +++ b/docs/ts/getting_started_with_cpp_api.html @@ -10,7 +10,7 @@ - Using Torch-TensorRT in C++ — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Using Torch-TensorRT in C++ — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/ts/getting_started_with_python_api.html b/docs/ts/getting_started_with_python_api.html index d9d7cd765c..355853a55b 100644 --- a/docs/ts/getting_started_with_python_api.html +++ b/docs/ts/getting_started_with_python_api.html @@ -10,7 +10,7 @@ - Using Torch-TensorRT in Python — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Using Torch-TensorRT in Python — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/ts/ptq.html b/docs/ts/ptq.html index b4133e1795..4a36c12ace 100644 --- a/docs/ts/ptq.html +++ b/docs/ts/ptq.html @@ -10,7 +10,7 @@ - Post Training Quantization (PTQ) — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Post Training Quantization (PTQ) — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/ts/torchscript_frontend_from_pytorch.html b/docs/ts/torchscript_frontend_from_pytorch.html index 8db994ee5f..2c0831dae9 100644 --- a/docs/ts/torchscript_frontend_from_pytorch.html +++ b/docs/ts/torchscript_frontend_from_pytorch.html @@ -10,7 +10,7 @@ - Using Torch-TensorRT TorchScript Frontend Directly From PyTorch — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Using Torch-TensorRT TorchScript Frontend Directly From PyTorch — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/converter_overloading.html b/docs/tutorials/_rendered_examples/dynamo/converter_overloading.html index 7dc7565fcf..49c2961f11 100644 --- a/docs/tutorials/_rendered_examples/dynamo/converter_overloading.html +++ b/docs/tutorials/_rendered_examples/dynamo/converter_overloading.html @@ -10,7 +10,7 @@ - Overloading Torch-TensorRT Converters with Custom Converters — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Overloading Torch-TensorRT Converters with Custom Converters — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows.html b/docs/tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows.html index 850dcc13e2..b306bb3fab 100644 --- a/docs/tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows.html +++ b/docs/tutorials/_rendered_examples/dynamo/cross_runtime_compilation_for_windows.html @@ -10,7 +10,7 @@ - Cross runtime compilation for windows example — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Cross runtime compilation for windows example — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html b/docs/tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html index 1004b98547..a8e2b7ce3b 100644 --- a/docs/tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html +++ b/docs/tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html @@ -10,7 +10,7 @@ - Using Custom Kernels within TensorRT Engines with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Using Custom Kernels within TensorRT Engines with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.html b/docs/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.html index 0e28fc7f13..2e6dfaa71f 100644 --- a/docs/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/engine_caching_bert_example.html @@ -10,7 +10,7 @@ - Engine Caching (BERT) — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Engine Caching (BERT) — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -519,7 +519,7 @@ "truncate_double": True, "debug": False, "min_block_size": 1, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "engine_cache_dir": "/tmp/torch_trt_bert_engine_cache", diff --git a/docs/tutorials/_rendered_examples/dynamo/engine_caching_example.html b/docs/tutorials/_rendered_examples/dynamo/engine_caching_example.html index e507e9c70b..627df3a642 100644 --- a/docs/tutorials/_rendered_examples/dynamo/engine_caching_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/engine_caching_example.html @@ -10,7 +10,7 @@ - Engine Caching — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Engine Caching — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -529,7 +529,7 @@

    Engine Caching for JIT Compilationcache_built_engines=True), -the engine must be refittable (make_refittable=True). See Refitting Torch-TensorRT Programs with New Weights for more details.

    +the engine must be refittable (immutable_weights=False). See Refitting Torch-TensorRT Programs with New Weights for more details.

    def torch_compile(iterations=3):
         times = []
         start = torch.cuda.Event(enable_timing=True)
    @@ -561,7 +561,7 @@ 

    Engine Caching for JIT Compilation"enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, }, @@ -620,7 +620,7 @@

    Engine Caching for AOT Compilationenabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, + immutable_weights=False, cache_built_engines=cache_built_engines, reuse_cached_engines=reuse_cached_engines, engine_cache_size=1 << 30, # 1GB @@ -730,7 +730,7 @@

    Custom Engine Cache"enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "custom_engine_cache": engine_cache, diff --git a/docs/tutorials/_rendered_examples/dynamo/index.html b/docs/tutorials/_rendered_examples/dynamo/index.html index 65fc6c9c1b..6888510b51 100644 --- a/docs/tutorials/_rendered_examples/dynamo/index.html +++ b/docs/tutorials/_rendered_examples/dynamo/index.html @@ -10,7 +10,7 @@ - Dependencies — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Dependencies — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html b/docs/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html index b0ff6a0969..3692818cc1 100644 --- a/docs/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html @@ -10,7 +10,7 @@ - Mutable Torch TensorRT Module — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Mutable Torch TensorRT Module — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -497,7 +497,7 @@

    Initialize the Mutable Torch TensorRT Module with settings.
    settings = {
         "use_python": False,
         "enabled_precisions": {torch.float32},
    -    "make_refittable": True,
    +    "immutable_weights": False,
     }
     
     model = models.resnet18(pretrained=True).eval().to("cuda")
    @@ -545,7 +545,7 @@ 

    Stable Diffusion with Huggingface"use_python_runtime": True, "enabled_precisions": {torch.float16}, "debug": True, - "make_refittable": True, + "immutable_weights": False, } model_id = "runwayml/stable-diffusion-v1-5" diff --git a/docs/tutorials/_rendered_examples/dynamo/refit_engine_example.html b/docs/tutorials/_rendered_examples/dynamo/refit_engine_example.html index f8eeb0a6cf..2702d3c2c3 100644 --- a/docs/tutorials/_rendered_examples/dynamo/refit_engine_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/refit_engine_example.html @@ -10,7 +10,7 @@ - Refitting Torch-TensorRT Programs with New Weights — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Refitting Torch-TensorRT Programs with New Weights — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    @@ -512,7 +512,7 @@

    Imports and model definition

    Make a refittable Compilation Program¶

    The inital step is to compile a module and save it as with a normal. Note that there is an -additional parameter make_refittable that is set to True. This parameter is used to +additional parameter immutable_weights that is set to False. This parameter is used to indicate that the engine being built should support weight refitting later. Engines built without these setttings will not be able to be refit.

    In this case we are going to compile a ResNet18 model with randomly initialized weights and save it.

    @@ -532,7 +532,7 @@

    Make a refittable Compilation Programdebug=debug, min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, - make_refittable=True, + immutable_weights=False, reuse_cached_engines=False, ) # Output is a torch.fx.GraphModule diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage.html b/docs/tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage.html index fa31cc2854..a82422f494 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_compile_advanced_usage.html @@ -10,7 +10,7 @@ - Torch Compile Advanced Usage — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch Compile Advanced Usage — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html b/docs/tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html index cb19831f70..3af879c701 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_compile_resnet_example.html @@ -10,7 +10,7 @@ - Compiling ResNet with dynamic shapes using the torch.compile backend — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiling ResNet with dynamic shapes using the torch.compile backend — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html b/docs/tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html index c0fc6409cf..ab2c82b92e 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html @@ -10,7 +10,7 @@ - Compiling Stable Diffusion model using the torch.compile backend — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiling Stable Diffusion model using the torch.compile backend — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html b/docs/tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html index 09d29e00de..b6a84d56ff 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_compile_transformers_example.html @@ -10,7 +10,7 @@ - Compiling BERT using the torch.compile backend — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiling BERT using the torch.compile backend — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_export_cudagraphs.html b/docs/tutorials/_rendered_examples/dynamo/torch_export_cudagraphs.html index 9948251940..f465c0c8d1 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_export_cudagraphs.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_export_cudagraphs.html @@ -10,7 +10,7 @@ - Torch Export with Cudagraphs — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch Export with Cudagraphs — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_export_gpt2.html b/docs/tutorials/_rendered_examples/dynamo/torch_export_gpt2.html index 9322e5949a..e630b9b8b4 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_export_gpt2.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_export_gpt2.html @@ -10,7 +10,7 @@ - Compiling GPT2 using the dynamo backend — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiling GPT2 using the dynamo backend — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/torch_export_llama2.html b/docs/tutorials/_rendered_examples/dynamo/torch_export_llama2.html index 32cda12e8c..b13d91f3a1 100644 --- a/docs/tutorials/_rendered_examples/dynamo/torch_export_llama2.html +++ b/docs/tutorials/_rendered_examples/dynamo/torch_export_llama2.html @@ -10,7 +10,7 @@ - Compiling Llama2 using the dynamo backend — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compiling Llama2 using the dynamo backend — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/vgg16_ptq.html b/docs/tutorials/_rendered_examples/dynamo/vgg16_ptq.html index dcdbb95a01..f9895a223b 100644 --- a/docs/tutorials/_rendered_examples/dynamo/vgg16_ptq.html +++ b/docs/tutorials/_rendered_examples/dynamo/vgg16_ptq.html @@ -10,7 +10,7 @@ - Deploy Quantized Models using Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Deploy Quantized Models using Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/dynamo/weight_streaming_example.html b/docs/tutorials/_rendered_examples/dynamo/weight_streaming_example.html index a04452d1aa..1866c8cb51 100644 --- a/docs/tutorials/_rendered_examples/dynamo/weight_streaming_example.html +++ b/docs/tutorials/_rendered_examples/dynamo/weight_streaming_example.html @@ -10,7 +10,7 @@ - Weight Streaming — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Weight Streaming — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/index.html b/docs/tutorials/_rendered_examples/index.html index bd5bb6e20f..adb36c127e 100644 --- a/docs/tutorials/_rendered_examples/index.html +++ b/docs/tutorials/_rendered_examples/index.html @@ -10,7 +10,7 @@ - Torch-TensorRT Tutorials — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch-TensorRT Tutorials — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/_rendered_examples/triton/index.html b/docs/tutorials/_rendered_examples/triton/index.html index 671746429c..ad3394a8bc 100644 --- a/docs/tutorials/_rendered_examples/triton/index.html +++ b/docs/tutorials/_rendered_examples/triton/index.html @@ -10,7 +10,7 @@ - Serving a Torch-TensorRT model with Triton — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Serving a Torch-TensorRT model with Triton — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -273,7 +273,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/notebooks.html b/docs/tutorials/notebooks.html index 9691489ac8..9df4933d91 100644 --- a/docs/tutorials/notebooks.html +++ b/docs/tutorials/notebooks.html @@ -10,7 +10,7 @@ - Legacy notebooks — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Legacy notebooks — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/tutorials/serving_torch_tensorrt_with_triton.html b/docs/tutorials/serving_torch_tensorrt_with_triton.html index 19137aac7a..2baeb06648 100644 --- a/docs/tutorials/serving_torch_tensorrt_with_triton.html +++ b/docs/tutorials/serving_torch_tensorrt_with_triton.html @@ -10,7 +10,7 @@ - Serving a Torch-TensorRT model with Triton — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Serving a Torch-TensorRT model with Triton — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/user_guide/dynamic_shapes.html b/docs/user_guide/dynamic_shapes.html index 0e46c241c6..5fe79f9a60 100644 --- a/docs/user_guide/dynamic_shapes.html +++ b/docs/user_guide/dynamic_shapes.html @@ -10,7 +10,7 @@ - Dynamic shapes with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Dynamic shapes with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/user_guide/mixed_precision.html b/docs/user_guide/mixed_precision.html index 8ae2c16fbe..5d5d36a72a 100644 --- a/docs/user_guide/mixed_precision.html +++ b/docs/user_guide/mixed_precision.html @@ -10,7 +10,7 @@ - Compile Mixed Precision models with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Compile Mixed Precision models with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/user_guide/runtime.html b/docs/user_guide/runtime.html index 3a4998af9f..803b72bf85 100644 --- a/docs/user_guide/runtime.html +++ b/docs/user_guide/runtime.html @@ -10,7 +10,7 @@ - Deploying Torch-TensorRT Programs — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Deploying Torch-TensorRT Programs — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/user_guide/saving_models.html b/docs/user_guide/saving_models.html index 4d19414322..c157f13610 100644 --- a/docs/user_guide/saving_models.html +++ b/docs/user_guide/saving_models.html @@ -10,7 +10,7 @@ - Saving models compiled with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Saving models compiled with Torch-TensorRT — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/user_guide/torch_tensorrt_explained.html b/docs/user_guide/torch_tensorrt_explained.html index 16305fe826..06267a5cac 100644 --- a/docs/user_guide/torch_tensorrt_explained.html +++ b/docs/user_guide/torch_tensorrt_explained.html @@ -10,7 +10,7 @@ - Torch-TensorRT Explained — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + Torch-TensorRT Explained — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/docs/user_guide/using_dla.html b/docs/user_guide/using_dla.html index 62f86a2457..c35833ba11 100644 --- a/docs/user_guide/using_dla.html +++ b/docs/user_guide/using_dla.html @@ -10,7 +10,7 @@ - DLA — Torch-TensorRT v2.6.0.dev0+a945aeb documentation + DLA — Torch-TensorRT v2.6.0.dev0+38b1804 documentation @@ -275,7 +275,7 @@
    - v2.6.0.dev0+a945aeb + v2.6.0.dev0+38b1804
    diff --git a/examples/dynamo/engine_caching_bert_example.py b/examples/dynamo/engine_caching_bert_example.py index 989913bd31..1148d4f792 100644 --- a/examples/dynamo/engine_caching_bert_example.py +++ b/examples/dynamo/engine_caching_bert_example.py @@ -52,7 +52,7 @@ def compile_bert(iterations=3): "truncate_double": True, "debug": False, "min_block_size": 1, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "engine_cache_dir": "/tmp/torch_trt_bert_engine_cache", diff --git a/examples/dynamo/engine_caching_example.py b/examples/dynamo/engine_caching_example.py index 28ff73aa72..fb4c341077 100644 --- a/examples/dynamo/engine_caching_example.py +++ b/examples/dynamo/engine_caching_example.py @@ -63,7 +63,7 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): # in a subsequent compilation, either as part of this session or a new session, the cache will # pull the built engine and **refit** the weights which can reduce compilation times by orders of magnitude. # As such, in order to insert a new engine into the cache (i.e. ``cache_built_engines=True``), -# the engine must be refittable (``make_refittable=True``). See :ref:`refit_engine_example` for more details. +# the engine must be refittable (``immutable_weights=False``). See :ref:`refit_engine_example` for more details. def torch_compile(iterations=3): @@ -97,7 +97,7 @@ def torch_compile(iterations=3): "enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, }, @@ -157,7 +157,7 @@ def dynamo_compile(iterations=3): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, + immutable_weights=False, cache_built_engines=cache_built_engines, reuse_cached_engines=reuse_cached_engines, engine_cache_size=1 << 30, # 1GB @@ -268,7 +268,7 @@ def torch_compile_my_cache(iterations=3): "enabled_precisions": enabled_precisions, "debug": debug, "min_block_size": min_block_size, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "custom_engine_cache": engine_cache, diff --git a/examples/dynamo/mutable_torchtrt_module_example.py b/examples/dynamo/mutable_torchtrt_module_example.py index b68c9a11ee..8b62855c32 100644 --- a/examples/dynamo/mutable_torchtrt_module_example.py +++ b/examples/dynamo/mutable_torchtrt_module_example.py @@ -31,7 +31,7 @@ settings = { "use_python": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -80,7 +80,7 @@ "use_python_runtime": True, "enabled_precisions": {torch.float16}, "debug": True, - "make_refittable": True, + "immutable_weights": False, } model_id = "runwayml/stable-diffusion-v1-5" diff --git a/examples/dynamo/refit_engine_example.py b/examples/dynamo/refit_engine_example.py index f93b097385..66a1a70964 100644 --- a/examples/dynamo/refit_engine_example.py +++ b/examples/dynamo/refit_engine_example.py @@ -47,7 +47,7 @@ # --------------------------------------- # # The inital step is to compile a module and save it as with a normal. Note that there is an -# additional parameter `make_refittable` that is set to `True`. This parameter is used to +# additional parameter `immutable_weights` that is set to `False`. This parameter is used to # indicate that the engine being built should support weight refitting later. Engines built without # these setttings will not be able to be refit. # @@ -69,7 +69,7 @@ debug=debug, min_block_size=min_block_size, torch_executed_ops=torch_executed_ops, - make_refittable=True, + immutable_weights=False, reuse_cached_engines=False, ) # Output is a torch.fx.GraphModule diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 6b107b63b0..6acc19354b 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -23,18 +23,20 @@ export TORCH_INSTALL_PATH=$(python -c "import torch, os; print(os.path.dirname(t if [[ ${TENSORRT_VERSION} != "" ]]; then # this is the upgraded TensorRT version, replace current tensorrt version to the upgrade tensorRT version in the pyproject.toml + # example: __tensorrt_version__: ">=10.3.0,<=10.6.0" + # replace: tensorrt-cu12>=10.3.0,<=10.6.0 to tensorrt-cu12==10.7.0 current_version=$(cat dev_dep_versions.yml | grep __tensorrt_version__ | sed 's/__tensorrt_version__: //g' | sed 's/"//g') - sed -i -e "s/tensorrt-cu12==${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ - -e "s/tensorrt-cu12-bindings==${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ - -e "s/tensorrt-cu12-libs==${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ + sed -i -e "s/tensorrt-cu12${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-bindings${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-libs${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ pyproject.toml fi if [[ "${CU_VERSION::4}" < "cu12" ]]; then # replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs - sed -i -e "s/tensorrt-cu12==/tensorrt-${CU_VERSION::4}==/g" \ - -e "s/tensorrt-cu12-bindings==/tensorrt-${CU_VERSION::4}-bindings==/g" \ - -e "s/tensorrt-cu12-libs==/tensorrt-${CU_VERSION::4}-libs==/g" \ + sed -i -e "s/tensorrt-cu12/tensorrt-${CU_VERSION::4}/g" \ + -e "s/tensorrt-cu12-bindings/tensorrt-${CU_VERSION::4}-bindings/g" \ + -e "s/tensorrt-cu12-libs/tensorrt-${CU_VERSION::4}-libs/g" \ pyproject.toml fi diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 5e6f32f569..219c0f9fbb 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -8,23 +8,23 @@ echo TENSORRT_VERSION=${TENSORRT_VERSION} if [[ ${TENSORRT_VERSION} != "" ]]; then # this is the upgraded TensorRT version, replace current tensorrt version to the upgrade tensorRT version in the pyproject.toml + # example: __tensorrt_version__: ">=10.3.0,<=10.6.0" + # replace: tensorrt-cu12>=10.3.0,<=10.6.0 to tensorrt-cu12==10.7.0 current_version=$(cat dev_dep_versions.yml | grep __tensorrt_version__ | sed 's/__tensorrt_version__: //g' | sed 's/"//g') - sed -i -e "s/tensorrt-cu12==${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ - -e "s/tensorrt-cu12-bindings==${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ - -e "s/tensorrt-cu12-libs==${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ + sed -i -e "s/tensorrt-cu12${current_version}/tensorrt-cu12==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-bindings${current_version}/tensorrt-cu12-bindings==${TENSORRT_VERSION}/g" \ + -e "s/tensorrt-cu12-libs${current_version}/tensorrt-cu12-libs==${TENSORRT_VERSION}/g" \ pyproject.toml fi if [[ "${CU_VERSION::4}" < "cu12" ]]; then # replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs - sed -i -e "s/tensorrt-cu12==/tensorrt-${CU_VERSION::4}==/g" \ - -e "s/tensorrt-cu12-bindings==/tensorrt-${CU_VERSION::4}-bindings==/g" \ - -e "s/tensorrt-cu12-libs==/tensorrt-${CU_VERSION::4}-libs==/g" \ + sed -i -e "s/tensorrt-cu12/tensorrt-${CU_VERSION::4}/g" \ + -e "s/tensorrt-cu12-bindings/tensorrt-${CU_VERSION::4}-bindings/g" \ + -e "s/tensorrt-cu12-libs/tensorrt-${CU_VERSION::4}-libs/g" \ pyproject.toml fi -#curl -Lo TensorRT.zip https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip -#unzip -o TensorRT.zip -d C:/ TORCH_TORCHVISION=$(grep "^torch" py/requirements.txt) INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION} diff --git a/py/ci/Dockerfile.ci b/py/ci/Dockerfile.ci index eddf12cefb..823c8bb7a1 100644 --- a/py/ci/Dockerfile.ci +++ b/py/ci/Dockerfile.ci @@ -1,4 +1,4 @@ -FROM pytorch/manylinux-builder:cuda12.4 +FROM pytorch/manylinux2_28-builder:cuda12.6 RUN yum install -y ninja-build diff --git a/py/torch_tensorrt/_enums.py b/py/torch_tensorrt/_enums.py index a580e6efbb..eaefb68ce5 100644 --- a/py/torch_tensorrt/_enums.py +++ b/py/torch_tensorrt/_enums.py @@ -220,7 +220,7 @@ def _from( return dtype.f32 elif t == np.float64: return dtype.f64 - elif t == np.bool: + elif t == np.bool_: return dtype.b # TODO: Consider using ml_dtypes when issues like this are resolved: # https://github.com/pytorch/pytorch/issues/109873 @@ -1384,7 +1384,7 @@ def current_platform(cls) -> Platform: def __str__(self) -> str: return str(self.name) - @needs_torch_tensorrt_runtime + @needs_torch_tensorrt_runtime # type: ignore def _to_serialized_rt_platform(self) -> str: val: str = torch.ops.tensorrt._platform_unknown() diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py index 9859668cd9..88e66b0f3c 100644 --- a/py/torch_tensorrt/dynamo/_compiler.py +++ b/py/torch_tensorrt/dynamo/_compiler.py @@ -63,7 +63,6 @@ def cross_compile_for_windows( Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]] ] = _defaults.ENABLED_PRECISIONS, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, - make_refittable: bool = _defaults.MAKE_REFITTABLE, debug: bool = _defaults.DEBUG, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, workspace_size: int = _defaults.WORKSPACE_SIZE, @@ -93,6 +92,9 @@ def cross_compile_for_windows( custom_engine_cache: Optional[BaseEngineCache] = _defaults.CUSTOM_ENGINE_CACHE, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, use_fp32_acc: bool = _defaults.USE_FP32_ACC, + refit_identical_engine_weights: bool = _defaults.REFIT_IDENTICAL_ENGINE_WEIGHTS, + strip_engine_weights: bool = _defaults.STRIP_ENGINE_WEIGHTS, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING, **kwargs: Any, ) -> torch.fx.GraphModule: @@ -132,7 +134,6 @@ def cross_compile_for_windows( assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False sparse_weights (bool): Enable sparsity for convolution and fully connected layers. enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels - refit (bool): Enable refitting debug (bool): Enable debuggable engine capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -164,6 +165,9 @@ def cross_compile_for_windows( custom_engine_cache (Optional[BaseEngineCache]): Engine cache instance to use for saving and loading engines. Users can provide their own engine cache by inheriting from BaseEngineCache. If used, engine_cache_dir and engine_cache_size will be ignored. use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs. + strip_engine_weights (bool): Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required. + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored. enable_weight_streaming (bool): Enable weight streaming. **kwargs: Any, Returns: @@ -193,14 +197,44 @@ def cross_compile_for_windows( if "refit" in kwargs.keys(): warnings.warn( - "Refit is deprecated. Please use make_refittable=True if you want to enable refitting of the engine.", + "`refit` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted.", DeprecationWarning, stacklevel=2, ) - if make_refittable: - raise ValueError("Use flag make_refittable only. Flag refit is deprecated.") + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `refit` is deprecated." + ) else: - make_refittable = kwargs["refit"] + immutable_weights = not kwargs["refit"] + + if "make_refittable" in kwargs.keys(): + warnings.warn( + "`make_refittable` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", + DeprecationWarning, + stacklevel=2, + ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `make_refittable` is deprecated." + ) + else: + immutable_weights = not kwargs["make_refittable"] + + if refit_identical_engine_weights: + if immutable_weights: + raise ValueError( + "`immutable_weights` must be False when `refit_identical_engine_weights` is True." + ) + + if ( + not immutable_weights + and not refit_identical_engine_weights + and enable_weight_streaming + ): + raise ValueError( + "TensorRT's `REFIT` flag is not compatible with `enable_weight_streaming=True` for now. This issue was reported on https://github.com/pytorch/TensorRT/issues/3305" + ) engine_capability = EngineCapability._from(engine_capability) @@ -275,7 +309,6 @@ def cross_compile_for_windows( "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, "engine_capability": engine_capability, "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, @@ -286,6 +319,9 @@ def cross_compile_for_windows( "lazy_engine_init": lazy_engine_init, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, + "refit_identical_engine_weights": refit_identical_engine_weights, + "strip_engine_weights": strip_engine_weights, + "immutable_weights": immutable_weights, "enable_cross_compile_for_windows": True, "enable_weight_streaming": enable_weight_streaming, } @@ -342,7 +378,6 @@ def compile( Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]] ] = _defaults.ENABLED_PRECISIONS, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, - make_refittable: bool = _defaults.MAKE_REFITTABLE, debug: bool = _defaults.DEBUG, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, workspace_size: int = _defaults.WORKSPACE_SIZE, @@ -372,6 +407,9 @@ def compile( custom_engine_cache: Optional[BaseEngineCache] = _defaults.CUSTOM_ENGINE_CACHE, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, use_fp32_acc: bool = _defaults.USE_FP32_ACC, + refit_identical_engine_weights: bool = _defaults.REFIT_IDENTICAL_ENGINE_WEIGHTS, + strip_engine_weights: bool = _defaults.STRIP_ENGINE_WEIGHTS, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING, **kwargs: Any, ) -> torch.fx.GraphModule: @@ -413,7 +451,6 @@ def compile( assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False sparse_weights (bool): Enable sparsity for convolution and fully connected layers. enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels - refit (bool): Enable refitting debug (bool): Enable debuggable engine capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -445,6 +482,9 @@ def compile( custom_engine_cache (Optional[BaseEngineCache]): Engine cache instance to use for saving and loading engines. Users can provide their own engine cache by inheriting from BaseEngineCache. If used, engine_cache_dir and engine_cache_size will be ignored. use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs. + strip_engine_weights (bool): Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required. + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored. enable_weight_streaming (bool): Enable weight streaming. **kwargs: Any, Returns: @@ -468,14 +508,44 @@ def compile( if "refit" in kwargs.keys(): warnings.warn( - "Refit is deprecated. Please use make_refittable=True if you want to enable refitting of the engine.", + "`refit` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", + DeprecationWarning, + stacklevel=2, + ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `refit` is deprecated." + ) + else: + immutable_weights = not kwargs["refit"] + + if "make_refittable" in kwargs.keys(): + warnings.warn( + "`make_refittable` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", DeprecationWarning, stacklevel=2, ) - if make_refittable: - raise ValueError("Use flag make_refittable only. Flag refit is deprecated.") + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `make_refittable` is deprecated." + ) else: - make_refittable = kwargs["refit"] + immutable_weights = not kwargs["make_refittable"] + + if refit_identical_engine_weights: + if immutable_weights: + raise ValueError( + "`immutable_weights` must be False when `refit_identical_engine_weights` is True." + ) + + if ( + not immutable_weights + and not refit_identical_engine_weights + and enable_weight_streaming + ): + raise ValueError( + "TensorRT's `REFIT` flag is not compatible with `enable_weight_streaming=True` for now. This issue was reported on https://github.com/pytorch/TensorRT/issues/3305" + ) if ( "enable_cross_compile_for_windows" in kwargs.keys() @@ -541,9 +611,6 @@ def compile( engine_cache = None if cache_built_engines or reuse_cached_engines: - assert ( - make_refittable - ), "Engine caching requires make_refittable to be set to True" engine_cache = ( custom_engine_cache if custom_engine_cache is not None @@ -574,7 +641,6 @@ def compile( "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, "engine_capability": engine_capability, "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, @@ -587,6 +653,9 @@ def compile( "reuse_cached_engines": reuse_cached_engines, "use_explicit_typing": use_explicit_typing, "use_fp32_acc": use_fp32_acc, + "refit_identical_engine_weights": refit_identical_engine_weights, + "strip_engine_weights": strip_engine_weights, + "immutable_weights": immutable_weights, "enable_cross_compile_for_windows": False, "enable_weight_streaming": enable_weight_streaming, } @@ -861,7 +930,6 @@ def convert_exported_program_to_serialized_trt_engine( require_full_compilation: bool = _defaults.REQUIRE_FULL_COMPILATION, disable_tf32: bool = _defaults.DISABLE_TF32, sparse_weights: bool = _defaults.SPARSE_WEIGHTS, - make_refittable: bool = _defaults.MAKE_REFITTABLE, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, dla_sram_size: int = _defaults.DLA_SRAM_SIZE, @@ -872,6 +940,9 @@ def convert_exported_program_to_serialized_trt_engine( timing_cache_path: str = _defaults.TIMING_CACHE_PATH, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, use_fp32_acc: bool = _defaults.USE_FP32_ACC, + refit_identical_engine_weights: bool = _defaults.REFIT_IDENTICAL_ENGINE_WEIGHTS, + strip_engine_weights: bool = _defaults.STRIP_ENGINE_WEIGHTS, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, enable_weight_streaming: bool = _defaults.ENABLE_WEIGHT_STREAMING, **kwargs: Any, ) -> bytes: @@ -922,7 +993,6 @@ def convert_exported_program_to_serialized_trt_engine( Only applicable for `ir="dynamo"`; has no effect for `torch.compile` path disable_tf32 (bool): Whether to disable TF32 computation for TRT layers sparse_weights (bool): Whether to allow the builder to use sparse weights - refit (bool): Whether to build a refittable engine engine_capability (trt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer. @@ -933,6 +1003,9 @@ def convert_exported_program_to_serialized_trt_engine( timing_cache_path (str): Path to the timing cache if it exists (or) where it will be saved after compilation use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Refit engines with identical weights. This is useful when the same model is compiled multiple times with different inputs and the weights are the same. This will save time by reusing the same engine for different inputs. + strip_engine_weights (bool): Strip engine weights from the serialized engine. This is useful when the engine is to be deployed in an environment where the weights are not required. + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored. enable_weight_streaming (bool): Enable weight streaming. Returns: bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs @@ -952,12 +1025,48 @@ def convert_exported_program_to_serialized_trt_engine( DeprecationWarning, stacklevel=2, ) + if "refit" in kwargs.keys(): warnings.warn( - "Refit is deprecated. Please use make_refittable=True if you want to enable refitting of the engine.", + "`refit` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", + DeprecationWarning, + stacklevel=2, + ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `refit` is deprecated." + ) + else: + immutable_weights = not kwargs["refit"] + + if "make_refittable" in kwargs.keys(): + warnings.warn( + "`make_refittable` is deprecated. Please set `immutable_weights=False` to build a refittable engine whose weights can be refitted", DeprecationWarning, stacklevel=2, ) + if immutable_weights: + raise ValueError( + "Use flag `immutable_weights` only. Flag `make_refittable` is deprecated." + ) + else: + immutable_weights = not kwargs["make_refittable"] + + if refit_identical_engine_weights: + if immutable_weights: + raise ValueError( + "`immutable_weights` must be False when `refit_identical_engine_weights` is True." + ) + + if ( + not immutable_weights + and not refit_identical_engine_weights + and enable_weight_streaming + ): + raise ValueError( + "TensorRT's `REFIT` flag is not compatible with `enable_weight_streaming=True` for now. This issue was reported on https://github.com/pytorch/TensorRT/issues/3305" + ) + if arg_inputs is None and inputs is None: raise AssertionError("'arg_inputs' and 'inputs' should not both be None.") @@ -1000,7 +1109,6 @@ def convert_exported_program_to_serialized_trt_engine( "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, "engine_capability": engine_capability, "num_avg_timing_iters": num_avg_timing_iters, "dla_sram_size": dla_sram_size, @@ -1009,6 +1117,9 @@ def convert_exported_program_to_serialized_trt_engine( "timing_cache_path": timing_cache_path, "use_explicit_typing": use_explicit_typing, "use_fp32_acc": use_fp32_acc, + "refit_identical_engine_weights": refit_identical_engine_weights, + "strip_engine_weights": strip_engine_weights, + "immutable_weights": immutable_weights, "enable_weight_streaming": enable_weight_streaming, } diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py index f6b97b1fbb..76630a75a5 100644 --- a/py/torch_tensorrt/dynamo/_defaults.py +++ b/py/torch_tensorrt/dynamo/_defaults.py @@ -26,7 +26,6 @@ USE_PYTHON_RUNTIME = False USE_FAST_PARTITIONER = True ENABLE_EXPERIMENTAL_DECOMPOSITIONS = False -MAKE_REFITTABLE = False REQUIRE_FULL_COMPILATION = False DRYRUN = False HARDWARE_COMPATIBLE = False @@ -38,10 +37,13 @@ CACHE_BUILT_ENGINES = False REUSE_CACHED_ENGINES = False ENGINE_CACHE_DIR = os.path.join(tempfile.gettempdir(), "torch_tensorrt_engine_cache") -ENGINE_CACHE_SIZE = 1073741824 +ENGINE_CACHE_SIZE = 5368709120 # 5GB CUSTOM_ENGINE_CACHE = None USE_EXPLICIT_TYPING = False USE_FP32_ACC = False +REFIT_IDENTICAL_ENGINE_WEIGHTS = False +STRIP_ENGINE_WEIGHTS = False +IMMUTABLE_WEIGHTS = True ENABLE_WEIGHT_STREAMING = False ENABLE_CROSS_COMPILE_FOR_WINDOWS = False diff --git a/py/torch_tensorrt/dynamo/_refit.py b/py/torch_tensorrt/dynamo/_refit.py index 519423e15d..f1041682f8 100644 --- a/py/torch_tensorrt/dynamo/_refit.py +++ b/py/torch_tensorrt/dynamo/_refit.py @@ -156,13 +156,26 @@ def _refit_single_trt_engine_with_gm( if torch_device.type == "cuda" else trt.TensorLocation.HOST ) + + constant_mapping: dict[str, Any] = weight_name_map.pop( + "constant_mapping", {} + ) # type: ignore mapping = construct_refit_mapping_from_weight_name_map( weight_name_map, new_gm.state_dict() ) + constant_mapping_with_type = {} + + for constant_name, val in constant_mapping.items(): + np_weight_type = val.dtype + val_tensor = torch.from_numpy(val).cuda() + trt_dtype = dtype.try_from(np_weight_type).to(trt.DataType) + torch_dtype = dtype.try_from(np_weight_type).to(torch.dtype) + constant_mapping_with_type[constant_name] = ( + val_tensor.clone().reshape(-1).contiguous().to(torch_dtype), + trt_dtype, + ) - # Debug Use - # correct = construct_refit_mapping(new_gm, input_list, settings) - # comparison = {k: (np.allclose(correct[k][0], mapping[k][0].cpu().numpy(), 1e-2, 1e-2), correct[k][0], mapping[k][0]) for k in mapping if k in correct} + mapping.update(constant_mapping_with_type) for layer_name in weight_list: if layer_name not in mapping: @@ -251,7 +264,7 @@ def refit_module_weights( ] assert ( encoded_metadata != "" - ), "The engine provided is either not refittable or was built with a version of Torch-TensorRT that is too old, please recompile using the latest version with make_refittable=True" + ), "The engine provided is either not refittable or was built with a version of Torch-TensorRT that is too old, please recompile using the latest version" settings = TorchTensorRTModule.decode_metadata(encoded_metadata)["settings"] # Handle torch modules compiled_submodules_map = dict(compiled_submodules) @@ -269,8 +282,8 @@ def refit_module_weights( assert settings is not None assert ( - settings.make_refittable - ), "Refitting is not enabled. Please recompile the engine with refit=True." + not settings.immutable_weights + ), "Refitting is not enabled. Please recompile the engine with immutable_weights=False." if settings.debug: set_log_level(logger.parent, logging.DEBUG) @@ -449,17 +462,21 @@ def refit_module_weights( weight_name_map=None, ) - if isinstance(compiled_submodule, TorchTensorRTModule): - serialized_engine = bytes(engine.serialize()) - new_engine_info = list(engine_info) - new_engine_info[ENGINE_IDX] = serialized_engine - refitted_engine = torch.classes.tensorrt.Engine(tuple(new_engine_info)) - compiled_submodule.engine = refitted_engine + # clear EXCLUDE_WEIGHTS flag + serialization_config = engine.create_serialization_config() + serialization_config.clear_flag(trt.SerializationFlag.EXCLUDE_WEIGHTS) + serialized_engine = engine.serialize_with_config(serialization_config) + + if isinstance( + compiled_submodule, (PythonTorchTensorRTModule, TorchTensorRTModule) + ): + compiled_submodule.engine = None # Clear the engine for TorchTensorRTModule, otherwise it won't be updated + compiled_submodule.serialized_engine = bytes(serialized_engine) + compiled_submodule.setup_engine() elif inline_module: - serialized_engine = bytes(engine.serialize()) new_engine_info = list(engine_info) - new_engine_info[ENGINE_IDX] = serialized_engine + new_engine_info[ENGINE_IDX] = bytes(serialized_engine) refitted_engine = torch.classes.tensorrt.Engine(tuple(new_engine_info)) setattr(compiled_module, f"{name}_engine", refitted_engine) diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py index 9062e2e539..7a22663af3 100644 --- a/py/torch_tensorrt/dynamo/_settings.py +++ b/py/torch_tensorrt/dynamo/_settings.py @@ -19,16 +19,18 @@ ENABLED_PRECISIONS, ENGINE_CAPABILITY, HARDWARE_COMPATIBLE, + IMMUTABLE_WEIGHTS, LAZY_ENGINE_INIT, - MAKE_REFITTABLE, MAX_AUX_STREAMS, MIN_BLOCK_SIZE, NUM_AVG_TIMING_ITERS, OPTIMIZATION_LEVEL, PASS_THROUGH_BUILD_FAILURES, + REFIT_IDENTICAL_ENGINE_WEIGHTS, REQUIRE_FULL_COMPILATION, REUSE_CACHED_ENGINES, SPARSE_WEIGHTS, + STRIP_ENGINE_WEIGHTS, TIMING_CACHE_PATH, TRUNCATE_DOUBLE, USE_EXPLICIT_TYPING, @@ -69,7 +71,6 @@ class CompilationSettings: assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False disable_tf32 (bool): Whether to disable TF32 computation for TRT layers sparse_weights (bool): Whether to allow the builder to use sparse weights - refit (bool): Whether to build a refittable engine engine_capability (trt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer. @@ -84,6 +85,9 @@ class CompilationSettings: reuse_cached_engines (bool): Whether to load the compiled TRT engines from storage use_strong_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. use_fp32_acc (bool): This option inserts cast to FP32 nodes around matmul layers and TensorRT ensures the accumulation of matmul happens in FP32. Use this only when FP16 precision is configured in enabled_precisions. + refit_identical_engine_weights (bool): Whether to refit the engine with identical weights + strip_engine_weights (bool): Whether to strip the engine weights + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. If this argument is set to true, `strip_engine_weights` and `refit_identical_engine_weights` will be ignored enable_weight_streaming (bool): Enable weight streaming. enable_cross_compile_for_windows (bool): By default this is False means TensorRT engines can only be executed on the same platform where they were built. True will enable cross-platform compatibility which allows the engine to be built on Linux and run on Windows @@ -107,7 +111,6 @@ class CompilationSettings: disable_tf32: bool = DISABLE_TF32 assume_dynamic_shape_support: bool = ASSUME_DYNAMIC_SHAPE_SUPPORT sparse_weights: bool = SPARSE_WEIGHTS - make_refittable: bool = MAKE_REFITTABLE engine_capability: EngineCapability = field( default_factory=lambda: ENGINE_CAPABILITY ) @@ -123,6 +126,9 @@ class CompilationSettings: reuse_cached_engines: bool = REUSE_CACHED_ENGINES use_explicit_typing: bool = USE_EXPLICIT_TYPING use_fp32_acc: bool = USE_FP32_ACC + refit_identical_engine_weights: bool = REFIT_IDENTICAL_ENGINE_WEIGHTS + strip_engine_weights: bool = STRIP_ENGINE_WEIGHTS + immutable_weights: bool = IMMUTABLE_WEIGHTS enable_weight_streaming: bool = ENABLE_WEIGHT_STREAMING enable_cross_compile_for_windows: bool = ENABLE_CROSS_COMPILE_FOR_WINDOWS @@ -134,9 +140,11 @@ class CompilationSettings: "optimization_level", "disable_tf32", "sparse_weights", - "make_refittable", "engine_capability", "hardware_compatible", + "refit_identical_engine_weights", + "strip_engine_weights", # TODO: @Evan to remove this after implementing caching weight-stripped engines as default? + "immutable_weights", "enable_weight_streaming", ) diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py index e15ed0495f..c8a30e656b 100644 --- a/py/torch_tensorrt/dynamo/backend/backends.py +++ b/py/torch_tensorrt/dynamo/backend/backends.py @@ -111,6 +111,10 @@ def _pretraced_backend( logger.warning( "require_full_compilation arg is not applicable for torch.compile with backend='torch_tensorrt" ) + if settings.strip_engine_weights: + logger.error( + "strip_engine_weights arg is not supported for torch.compile()" + ) trt_compiled = compile_module( gm, torchtrt_inputs, diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 03852ae6ae..d7c0ea449e 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -287,8 +287,21 @@ def _populate_trt_builder_config( if self.compilation_settings.disable_tf32: builder_config.clear_flag(trt.BuilderFlag.TF32) - if self.compilation_settings.make_refittable: - builder_config.set_flag(trt.BuilderFlag.REFIT) + if self.compilation_settings.immutable_weights: + # non-refittable engine + if self.compilation_settings.strip_engine_weights: + _LOGGER.warning("strip_engine_weights will be ignored.") + if self.compilation_settings.refit_identical_engine_weights: + _LOGGER.warning("refit_identical_engine_weights will be ignored.") + else: + # refittable engine + if self.compilation_settings.refit_identical_engine_weights: + builder_config.set_flag(trt.BuilderFlag.REFIT_IDENTICAL) + else: + builder_config.set_flag(trt.BuilderFlag.REFIT) + + if self.compilation_settings.strip_engine_weights: + builder_config.set_flag(trt.BuilderFlag.STRIP_PLAN) if strict_type_constraints: builder_config.set_flag(trt.BuilderFlag.STRICT_TYPES) @@ -371,7 +384,6 @@ def find_weight( np_map: the map from weight name to np values in INetworkDefinition state_dict: state of the graph module """ - network_weight = np_map[weight_name] network_weight = torch.from_numpy(np_map[weight_name]).cuda() for sd_w_name, sd_weight in state_dict.items(): if TRTInterpreter.check_weight_equal(sd_weight, network_weight): @@ -460,6 +472,7 @@ def _save_weight_mapping(self) -> None: sd = {k: v.reshape(-1) for k, v in self.module.state_dict().items()} weight_name_map: dict[str, Any] = {} np_map = {} + constant_mapping = {} net = self.ctx.net for i in range(net.num_layers): layer = net[i] @@ -485,19 +498,22 @@ def _save_weight_mapping(self) -> None: suffix = sd_weight_name_list[-1] # Retrieve each weight name(s) in state_dict if layer_type == "CONSTANT": - if "embedding" in suffix: - sd_weight_name = f"{sd_weight_name}.weight" - elif "weight" in suffix or "mm_other" in suffix: - # Linear layer weight + if ( + "embedding" in suffix + or "weight" in suffix + or "mm_other" in suffix + ): sd_weight_name = f"{sd_weight_name}.weight" elif "running_mean" in suffix: - # Linear layer weight sd_weight_name = f"{sd_weight_name}.running_mean" elif "running_var" in suffix: - # Linear layer weight sd_weight_name = f"{sd_weight_name}.running_var" - else: + elif "bias" in suffix: sd_weight_name = f"{sd_weight_name}.bias" + else: + # Save the constant weights for future fast refit + sd_weight_name = f"{sd_weight_name}.unknown" + constant_mapping[engine_weight_name] = weight elif layer_type == "SCALE": # Batch norm needs all weights to calculate scale and shift sd_weight_name = [f"{sd_weight_name}.{n}" for n in torch_attr] @@ -518,18 +534,126 @@ def _save_weight_mapping(self) -> None: weight_name_map[engine_weight_name] = TRTInterpreter.find_weight( engine_weight_name, np_map, sd ) + if ( + weight_name_map[engine_weight_name] != "" + and engine_weight_name in constant_mapping + ): + # If the weight is found in state_dict, remove it from constant_mapping + del constant_mapping[engine_weight_name] weight_name_map[engine_weight_name] = [ weight_name_map[engine_weight_name], np_map[engine_weight_name].dtype, ] + weight_name_map["constant_mapping"] = constant_mapping self.weight_name_map = weight_name_map del np_map, sd gc.collect() torch.cuda.empty_cache() + def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> None: + # TODO: @Evan is waiting for TRT's feature to cache the weight-stripped engine + # if not self.compilation_settings.strip_engine_weights: + # # set EXCLUDE_WEIGHTS flag to strip weights + # runtime = trt.Runtime(TRT_LOGGER) + # engine = runtime.deserialize_cuda_engine(serialized_engine) + + # serialization_config = engine.create_serialization_config() + # serialization_config.set_flag(trt.SerializationFlag.EXCLUDE_WEIGHTS) + # serialized_engine = engine.serialize_with_config( + # serialization_config + # ) + + # Cache weighted engine for now + self.engine_cache.insert( # type: ignore[union-attr] + hash_val, + ( + serialized_engine, + self._input_names, + self._output_names, + self.input_specs, + self.compilation_settings, + self.weight_name_map, + ), + ) + + def _pull_cached_engine(self, hash_val: str) -> Optional[TRTInterpreterResult]: + # query the cached TRT engine + cached_data = self.engine_cache.check(hash_val) # type: ignore[union-attr] + if cached_data is not None: # hit the cache + ( + serialized_engine, + self._input_names, + self._output_names, + cached_engine_input_specs, + engine_compilation_settings, + self.weight_name_map, + ) = cached_data + + setting_compatiblity, incompattible_settings = settings_are_compatible( + self.compilation_settings, engine_compilation_settings + ) + assert ( + setting_compatiblity + ), f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})" + + for i, e in enumerate( + [ + Input.equivalent_spec(c, i) + for c, i in zip(cached_engine_input_specs, self.input_specs) + ] + ): + assert ( + e + ), f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}" + + _LOGGER.info( + "Found the cached engine that corresponds to this graph. It is directly loaded." + ) + + # refit the cached engine with the new graph module + if not self.compilation_settings.strip_engine_weights: + runtime = trt.Runtime(TRT_LOGGER) + engine = runtime.deserialize_cuda_engine(serialized_engine) + + from torch_tensorrt.dynamo._refit import ( + _refit_single_trt_engine_with_gm, + ) + + _refit_single_trt_engine_with_gm( + new_gm=self.module, + old_engine=engine, + input_list=self.input_specs, + settings=self.compilation_settings, + weight_name_map=self.weight_name_map, + ) + serialized_engine = engine.serialize() + + # TODO: @Evan is waiting for TRT's feature to load the weight-stripped engine + # # EXCLUDE_WEIGHTS flag must be cleared + # serialization_config = engine.create_serialization_config() + # serialization_config.clear_flag( + # trt.SerializationFlag.EXCLUDE_WEIGHTS + # ) + # serialized_engine = engine.serialize_with_config( + # serialization_config + # ) + # # As of now, the engine becomes non-refittable because when EXCLUDE_WEIGHTS flag is cleared, the REFIT flag is also cleared by TRT to make the plan file smaller + + with io.BytesIO() as engine_bytes: + engine_bytes.write(serialized_engine) + engine_str = engine_bytes.getvalue() + + return TRTInterpreterResult( + engine_str, + self._input_names, + self._output_names, + self.weight_name_map, + ) + return None + def run( self, strict_type_constraints: bool = False, @@ -548,7 +672,10 @@ def run( # self.engine_cache could be None if: # 1) engine_cache is not passed in when calling this function like convert_exported_program_to_serialized_trt_engine etc., or # 2) both cache_built_engines and reuse_cached_engines are False - if self.engine_cache is not None: + if ( + self.engine_cache is not None + and not self.compilation_settings.immutable_weights + ): if ( self.compilation_settings.cache_built_engines or self.compilation_settings.reuse_cached_engines @@ -557,75 +684,14 @@ def run( self.module, self.input_specs, self.compilation_settings ) - if self.compilation_settings.reuse_cached_engines: - # query the cached TRT engine - cached_data = self.engine_cache.check(hash_val) - if cached_data is not None: # hit the cache - ( - serialized_engine, - self._input_names, - self._output_names, - cached_engine_input_specs, - engine_compilation_settings, - self.weight_name_map, - ) = cached_data - - setting_compatiblity, incompattible_settings = ( - settings_are_compatible( - self.compilation_settings, engine_compilation_settings - ) - ) - assert ( - setting_compatiblity - ), f"Attempted to refit a cached engine with incompatible settings: {incompattible_settings}, (old_settings: {engine_compilation_settings}, new_settings: {self.compilation_settings})" - - for i, e in enumerate( - [ - Input.equivalent_spec(c, i) - for c, i in zip(cached_engine_input_specs, self.input_specs) - ] - ): - assert ( - e - ), f"Attempted to refit a cached engine built for a different input size (input: {i}, cached size: {cached_engine_input_specs[i]}, new size: {self.input_specs[i]}" - - _LOGGER.info( - "Found the cached engine that corresponds to this graph. It is directly loaded." - ) - - runtime = trt.Runtime(TRT_LOGGER) - engine = runtime.deserialize_cuda_engine(serialized_engine) - - from torch_tensorrt.dynamo._refit import ( - _refit_single_trt_engine_with_gm, - ) - - # TODO: Fast refit is problematic for now. It will fail if the engine has batch_norm layers. - # We set weight_name_map=None to use slow refit anyway for now. Will fix it in the future. - _refit_single_trt_engine_with_gm( - new_gm=self.module, - old_engine=engine, - input_list=self.input_specs, - settings=self.compilation_settings, - weight_name_map=None, - ) - - serialized_engine = engine.serialize() - - with io.BytesIO() as engine_bytes: - engine_bytes.write(serialized_engine) - engine_str = engine_bytes.getvalue() - - return TRTInterpreterResult( - engine_str, - self._input_names, - self._output_names, - self.weight_name_map, - ) + if self.compilation_settings.reuse_cached_engines: + interpreter_result = self._pull_cached_engine(hash_val) + if interpreter_result is not None: # hit the cache + return interpreter_result self._construct_trt_network_def() - if self.compilation_settings.make_refittable: + if not self.compilation_settings.immutable_weights: self._save_weight_mapping() build_engine_start_time = datetime.now() @@ -652,28 +718,24 @@ def run( self._save_timing_cache( builder_config, self.compilation_settings.timing_cache_path ) + + # Engine caching only for refittable engines if ( - self.engine_cache is not None + not self.compilation_settings.immutable_weights and self.compilation_settings.cache_built_engines + and self.engine_cache is not None ): - self.engine_cache.insert( - hash_val, - ( - serialized_engine, - self._input_names, - self._output_names, - self.input_specs, - self.compilation_settings, - self.weight_name_map, - ), - ) + self._insert_engine_to_cache(hash_val, serialized_engine) with io.BytesIO() as engine_bytes: engine_bytes.write(serialized_engine) engine_str = engine_bytes.getvalue() return TRTInterpreterResult( - engine_str, self._input_names, self._output_names, self.weight_name_map + engine_str, + self._input_names, + self._output_names, + self.weight_name_map, ) def run_node(self, n: torch.fx.Node) -> torch.fx.Node: diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index 884c51e8ea..64e6a26297 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -49,7 +49,9 @@ def get_ir(target: Target) -> SourceIR: return SourceIR.UNKNOWN -def one_user_validator(node: Node, settings: CompilationSettings = None) -> bool: +def one_user_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: # Validate only one user, which is a getitem node that accesses the first element in the list return ( len(node.users) == 1 @@ -131,7 +133,6 @@ def aten_ops_batch_norm_legit_no_training( @dynamo_tensorrt_converter( torch.ops.aten.native_layer_norm.default, - capability_validator=one_user_validator, supports_dynamic_shapes=True, ) @enforce_tensor_types( @@ -182,8 +183,8 @@ def aten_ops_native_group_norm( SourceIR.ATEN, name, input=args[0], - weight=args[1], - bias=args[2], + weight=args_bounds_check(args, 1), + bias=args_bounds_check(args, 2), N=args[3], C=args[4], HxW=args[5], @@ -192,40 +193,6 @@ def aten_ops_native_group_norm( ) -@dynamo_tensorrt_converter( - torch.ops.aten.group_norm.default, - supports_dynamic_shapes=True, -) -@dynamo_tensorrt_converter( - torch.ops.aten.group_norm, - supports_dynamic_shapes=True, -) -@enforce_tensor_types( - { - 0: (TRTTensor,), - } -) -def aten_ops_group_norm( - ctx: ConversionContext, - target: Target, - args: Tuple[Argument, ...], - kwargs: Dict[str, Argument], - name: str, -) -> Union[TRTTensor, Sequence[TRTTensor]]: - return impl.normalization.group_norm( - ctx, - target, - SourceIR.ATEN, - name, - input=args[0], - num_groups=args[1], - weight=args_bounds_check(args, 2, None), - bias=args_bounds_check(args, 3, None), - eps=args_bounds_check(args, 4, 1e-05), - cudnn_enabled=args_bounds_check(args, 5, True), - ) - - @dynamo_tensorrt_converter(torch.ops.aten.cat.default, supports_dynamic_shapes=True) def aten_ops_cat( ctx: ConversionContext, @@ -265,9 +232,11 @@ def aten_ops_embedding( ) -def embedding_bag_validator(node: Node, settings: CompilationSettings = None) -> bool: +def embedding_bag_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: # Embedding bag op is not refitable - if settings.make_refittable: + if settings and not settings.immutable_weights: return False if not one_user_validator(node): @@ -415,7 +384,9 @@ def aten_ops_symsize_int( return impl.shape.shape(ctx, target, SourceIR.ATEN, name, args[0], args[1]) -def index_dtype_validator(node: Node, settings: CompilationSettings = None) -> bool: +def index_dtype_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: index = node.args[1] for ind in index: if ind is not None: @@ -841,7 +812,9 @@ def aten_ops_select( ) -def index_put_validator(node: Node, settings: CompilationSettings = None) -> bool: +def index_put_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: if args_bounds_check(node.args, 3, False): # Check if accumulate is valid _LOGGER.debug("We do not support accumulate=True for aten.index_put operation") accumulate_valid = False @@ -928,9 +901,9 @@ def aten_ops_slice( ) -def refit_validator(node: Node, settings: CompilationSettings = None) -> bool: +def refit_validator(node: Node, settings: Optional[CompilationSettings] = None) -> bool: # cumsum op is not refitable - if settings and settings.make_refittable: + if settings and not settings.immutable_weights: return False return True @@ -985,7 +958,9 @@ def aten_ops_tile( ) -def zero_output_validator(node: Node, settings: CompilationSettings = None) -> bool: +def zero_output_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: if 0 in node.args[1]: _LOGGER.debug( f"We do not support output tensor {node.args[1]} tensors with zero-sized dimensions for this operation." @@ -999,7 +974,6 @@ def zero_output_validator(node: Node, settings: CompilationSettings = None) -> b torch.ops.aten.as_strided.default, capability_validator=zero_output_validator, ) -@dynamo_tensorrt_converter(torch.ops.aten.as_strided.default) def aten_ops_as_strided( ctx: ConversionContext, target: Target, @@ -1043,7 +1017,7 @@ def aten_ops_permute( def to_copy_dtype_validator( - placeholder_only: bool, settings: CompilationSettings = None + placeholder_only: bool, settings: Optional[CompilationSettings] = None ) -> Callable[[Node, CompilationSettings], bool]: """Return validator for to_copy node with placeholder restrictions""" @@ -1076,7 +1050,9 @@ def validate_dtype(to_copy_node: Node) -> bool: ) return False - def validator(to_copy_node: Node, settings: CompilationSettings = None) -> bool: + def validator( + to_copy_node: Node, settings: Optional[CompilationSettings] = None + ) -> bool: """Returns true if the to_copy node can be converted to TRT and the placeholder restriction is satisfied """ @@ -2045,7 +2021,6 @@ def aten_ops_div( @dynamo_tensorrt_converter( torch.ops.aten.pow.Tensor_Scalar, supports_dynamic_shapes=True ) -@dynamo_tensorrt_converter(operator.pow, supports_dynamic_shapes=True) def aten_ops_pow( ctx: ConversionContext, target: Target, @@ -2147,7 +2122,9 @@ def aten_ops_logical_xor( ) -def bitwise_type_validator(node: Node, settings: CompilationSettings = None) -> bool: +def bitwise_type_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: supported_type = [torch.bool, bool] tensor_targets = [ @@ -2291,7 +2268,7 @@ def aten_ops_bitwise_xor( def bitwise_not_type_validator( - node: Node, settings: CompilationSettings = None + node: Node, settings: Optional[CompilationSettings] = None ) -> bool: val = node.args[0] val_meta = val.meta.get("tensor_meta") @@ -2474,7 +2451,9 @@ def aten_ops_le( ) -def conv_param_validator(conv_node: Node, settings: CompilationSettings = None) -> bool: +def conv_param_validator( + conv_node: Node, settings: Optional[CompilationSettings] = None +) -> bool: return conv_node.args[7] in ([0], [0, 0], [0, 0, 0]) @@ -2571,7 +2550,7 @@ def aten_ops_cdist_forward( def avg_pool_param_validator( - pool_node: Node, settings: CompilationSettings = None + pool_node: Node, settings: Optional[CompilationSettings] = None ) -> bool: ceil_mode = args_bounds_check(pool_node.args, 4, False) divisor_override = args_bounds_check(pool_node.args, 6) @@ -2688,12 +2667,12 @@ def aten_ops_adaptive_avg_poolNd( ) -def topk_validator(node: Node, settings: CompilationSettings = None) -> bool: +def topk_validator(node: Node, settings: Optional[CompilationSettings] = None) -> bool: k = node.args[1] return topk_sort_validator(k) -def sort_validator(node: Node, settings: CompilationSettings = None) -> bool: +def sort_validator(node: Node, settings: Optional[CompilationSettings] = None) -> bool: meta_data = node.args[0].meta.get("tensor_meta") if meta_data is None: return False @@ -2716,7 +2695,7 @@ def topk_sort_validator(k: int) -> bool: def max_pool_param_validator( - pool_node: Node, settings: CompilationSettings = None + pool_node: Node, settings: Optional[CompilationSettings] = None ) -> bool: dilation = args_bounds_check(pool_node.args, 4, 1) ceil_mode = args_bounds_check(pool_node.args, 5, False) @@ -2771,7 +2750,9 @@ def aten_ops_max_pool( ) -def attention_validator(node: Node, settings: CompilationSettings = None) -> bool: +def attention_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: # Currently, `attn_mask` is not supported return args_bounds_check(node.args, 3) is None @@ -3309,7 +3290,6 @@ def aten_ops_copy( @dynamo_tensorrt_converter( torch.ops.aten.remainder.Tensor, supports_dynamic_shapes=True ) -@dynamo_tensorrt_converter(operator.mod, supports_dynamic_shapes=True) @enforce_tensor_types( { 0: (TRTTensor,), @@ -3401,7 +3381,9 @@ def aten_ops_flip( ) -def zero_diag_size_validator(node: Node, settings: CompilationSettings = None) -> bool: +def zero_diag_size_validator( + node: Node, settings: Optional[CompilationSettings] = None +) -> bool: meta = node.args[0].meta.get("tensor_meta") if meta: input_shape = meta.shape @@ -3530,7 +3512,7 @@ def aten_ops_index_select( def dropout_inference_validator( - node: Node, settings: CompilationSettings = None + node: Node, settings: Optional[CompilationSettings] = None ) -> bool: train_mode = args_bounds_check(node.args, 2, None) if train_mode is False: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py index b737fb7dbc..a46a9319c4 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py @@ -1,5 +1,5 @@ import logging -from typing import Any, List, Optional, Sequence, Tuple, Union, cast +from typing import List, Optional, Sequence, Tuple, Union import numpy as np import tensorrt as trt @@ -16,7 +16,6 @@ get_trt_tensor, has_dynamic_shape, set_layer_name, - to_numpy, ) from torch_tensorrt.dynamo.conversion.impl.cat import cat from torch_tensorrt.dynamo.conversion.impl.elementwise.ops import ge @@ -203,234 +202,72 @@ def native_group_norm( source_ir: Optional[SourceIR], name: str, input: TRTTensor, - weight: Optional[Union[torch.Tensor, np.ndarray]], - bias: Optional[Union[torch.Tensor, np.ndarray]], + weight: Optional[Union[TRTTensor, torch.Tensor, np.ndarray]], + bias: Optional[Union[TRTTensor, torch.Tensor, np.ndarray]], N: int, C: int, HxW: int, group: int, eps: float, - return_mean_rstd: bool = True, -) -> Union[TRTTensor, Sequence[TRTTensor]]: - # TODO: Ask TRT team about the usage of INormalization Layer usage with num_groups and update the implementation - # with INormalization Layer - assert ( - len(input.shape) >= 3 - ), f"The input dimension should not be less than 3, got {len(input.shape)}!" - - B = input.shape[0] - # if C is provided, it must be as same as the channel from the input shape, - # else if C is zero, we should get the channel from the input shape - if C == 0: - C = input.shape[1] - assert ( - C == input.shape[1] - ), f"The number of Channel={C} must be equal to the number of channels in the input shape={input.shape[1]}" - # Groups are a subdivision of the channel dimension. - assert ( - C % group == 0 - ), f"The num of channels ({C}) should be divisible by num_groups ({group})!" - input = get_trt_tensor(ctx, input, f"{name}_input") - - shape = list(input.shape) - - for i, s in enumerate(shape): - if i == 0 and s > 0: - shape[i] = B * group - elif i == 1: - shape[i] = C // group - elif i > 1 and s == -1: - shape[i] = 0 - - # Normalize every group. - reshaped_input = impl.shuffle.reshape( - ctx, - target, - source_ir, - f"{name}_reshape_input", - input, - shape, - ) - - if weight is None: - weight = to_numpy(1.0) - - if bias is None: - bias = to_numpy(0.0) - - weight = get_trt_tensor(ctx, weight, f"{name}_weight") - bias = get_trt_tensor(ctx, bias, f"{name}_bias") - weight_bias_shape = (1, C) + (1,) * (len(input.shape) - 2) - - dims = list(range(1, len(input.shape))) - - # E[X] - mean_trt = impl.reduce.mean( - ctx, - target, - source_ir, - f"{name}_mean", - reshaped_input, - dims, - True, - ) +) -> Tuple[TRTTensor, torch.Tensor, torch.Tensor]: + rank = len(input.shape) - mean_trt = impl.slice.expand( - ctx, - target, - source_ir, - f"{name}_expand_mean_trt", - mean_trt, - reshaped_input.shape, - ) + assert rank >= 3, f"Expected at least 3 dimensions for input tensor but got {rank}" - # X - E[X] - sub_trt = impl.elementwise.sub( - ctx, - target, - source_ir, - f"{name}_sub", - reshaped_input, - mean_trt, - ) + assert ( + C == input.shape[1] + ), f"num_channels ({C}) must be equal to number of channels in input ({input.shape[1]})" - # variance - pow_trt = get_trt_tensor(ctx, 2, f"{name}_power", np.float32) - pow_var = impl.elementwise.pow( - ctx, - target, - source_ir, - f"{name}_pow", - sub_trt, - pow_trt, - ) + weight_one = get_trt_tensor(ctx, 1.0, f"{name}_weight_one", input.dtype) + bias_zero = get_trt_tensor(ctx, 0.0, f"{name}_bias_zero", input.dtype) - var_trt = impl.reduce.mean( - ctx, - target, - source_ir, - f"{name}_mean_var", - pow_var, - dims, - True, - ) + shape = [1, group] + [1] * (rank - 2) - var_trt = impl.slice.expand( - ctx, - target, - source_ir, - f"{name}_expand_var_trt", - var_trt, - reshaped_input.shape, + weight_one = impl.slice.expand( + ctx, target, source_ir, f"{name}_expand_weight_one", weight_one, shape ) - - eps_trt = get_trt_tensor(ctx, eps, f"{name}_eps", np.float32) - add_trt = impl.elementwise.add( - ctx, - target, - source_ir, - f"{name}_add", - var_trt, - eps_trt, + bias_zero = impl.slice.expand( + ctx, target, source_ir, f"{name}_expand_bias_zero", bias_zero, shape ) - sqrt_trt = impl.unary.sqrt( - ctx, - target, - source_ir, - f"{name}_sqrt", - add_trt, - ) + axes = get_axes_for_reduce_op([i for i in range(1 if group == 1 else 2, rank)]) - # y = (X - E[X]) / sqrt((var + eps)) - output = impl.elementwise.div( - ctx, - target, - source_ir, - f"{name}_div", - sub_trt, - sqrt_trt, - ) + # INormalizationLayer scales the normalized output per-group, but PyTorch scales the normalized output per-channel, + # hence causing diverse result. Let TensorRT does no-op for scaling here, and do scaling ourselves later + layer = ctx.net.add_normalization(input, weight_one, bias_zero, axes) + layer.epsilon = eps + layer.num_groups = group + set_layer_name(layer, target, name, source_ir) + output = layer.get_output(0) - shape = list(output.shape) - for i, s in enumerate(shape): - if i == 0 and s > 0: - shape[i] = B - elif i == 1: - shape[i] = C - elif i > 1 and s == -1: - shape[i] = 0 + shape[1] = C - reshaped_output = impl.shuffle.reshape( - ctx, target, source_ir, f"{name}_reshape_output", output, shape - ) - reshaped_gamma = impl.shuffle.reshape( - ctx, - target, - source_ir, - f"{name}_reshape_gamma", - weight, - weight_bias_shape, - ) - - reshaped_output = impl.elementwise.mul( - ctx, - target, - source_ir, - f"{name}_mul_gamma", - reshaped_output, - reshaped_gamma, - ) - - reshaped_bias = impl.shuffle.reshape( - ctx, - target, - source_ir, - f"{name}_reshape_beta", - bias, - weight_bias_shape, - ) - reshaped_output = impl.elementwise.add( - ctx, - target, - source_ir, - f"{name}_add_beta", - reshaped_output, - reshaped_bias, - ) - if return_mean_rstd: - # return fake mean and rstd for now - return reshaped_output, None, None - return reshaped_output + if weight is not None: + weight = get_trt_tensor(ctx, weight, f"{name}_weight") + weight = cast_trt_tensor( + ctx, weight, input.dtype, f"{name}_cast_weight", target, source_ir + ) + weight = impl.shuffle.reshape( + ctx, target, source_ir, f"{name}_reshape_weight", weight, shape + ) + output = impl.elementwise.mul( + ctx, target, source_ir, f"{name}_mul_weight", output, weight + ) + if bias is not None: + bias = get_trt_tensor(ctx, bias, f"{name}_bias") + bias = cast_trt_tensor( + ctx, bias, input.dtype, f"{name}_cast_bias", target, source_ir + ) + bias = impl.shuffle.reshape( + ctx, target, source_ir, f"{name}_reshape_bias", bias, shape + ) + output = impl.elementwise.add( + ctx, target, source_ir, f"{name}_add_bias", output, bias + ) -def group_norm( - ctx: ConversionContext, - target: Target, - source_ir: Optional[SourceIR], - name: str, - input: TRTTensor, - num_groups: int, - weight: Optional[Union[torch.Tensor, np.ndarray]], - bias: Optional[Union[torch.Tensor, np.ndarray]], - eps: float, - cudnn_enabled: bool, -) -> Union[TRTTensor, Sequence[TRTTensor]]: - return native_group_norm( - ctx, - target, - source_ir, - name, - input, - weight, - bias, - 0, - 0, - 0, - num_groups, - eps, - return_mean_rstd=False, - ) + # return fake mean and rstd for now + return output, None, None def softmax( diff --git a/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py b/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py index 4464555261..795b42f879 100644 --- a/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py +++ b/py/torch_tensorrt/dynamo/lowering/passes/view_to_reshape.py @@ -6,7 +6,7 @@ from torch_tensorrt.dynamo.lowering.passes.pass_utils import ( clean_up_graph_after_modifications, ) -from torch_tensorrt.dynamo.utils import get_metadata, set_metadata +from torch_tensorrt.dynamo.utils import copy_metadata logger = logging.getLogger(__name__) @@ -26,14 +26,14 @@ def orig(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor: def replacement(input: torch.Tensor, shape: List[torch.SymInt]) -> torch.Tensor: return replacement_op(input, shape) - # Store metadata of the orig_op - metadata = get_metadata(gm, orig_op) - - if torch.fx.subgraph_rewriter.replace_pattern(gm, orig, replacement): + match_and_replacements = torch.fx.subgraph_rewriter._replace_pattern( + gm, orig, replacement + ) + if match_and_replacements: gm = clean_up_graph_after_modifications(gm) logger.debug(f"Graph after replacing view with reshape:\n{gm.graph}") # Copy the orig_op's metadata to the replacement op - set_metadata(gm, replacement_op, metadata) + copy_metadata(match_and_replacements) return gm diff --git a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py index 28f0954185..134d84cf6d 100644 --- a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py @@ -65,7 +65,7 @@ def __init__( Union[torch.dtype, dtype] ] = _defaults.ENABLED_PRECISIONS, engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY, - make_refittable: bool = _defaults.MAKE_REFITTABLE, + immutable_weights: bool = _defaults.IMMUTABLE_WEIGHTS, debug: bool = _defaults.DEBUG, num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS, workspace_size: int = _defaults.WORKSPACE_SIZE, @@ -103,7 +103,7 @@ def __init__( assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False sparse_weights (bool): Enable sparsity for convolution and fully connected layers. enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels - refit (bool): Enable refitting + immutable_weights (bool): Build non-refittable engines. This is useful for some layers that are not refittable. debug (bool): Enable debuggable engine capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels @@ -152,8 +152,8 @@ def __init__( device = to_torch_tensorrt_device(device) enabled_precisions = {dtype._from(p) for p in enabled_precisions} assert ( - make_refittable - ), "'make_refittable' has to be True for a MutableTorchTensorRTModule." + not immutable_weights + ), "`immutable_weights` has to be False for a MutableTorchTensorRTModule." compilation_options = { "enabled_precisions": ( enabled_precisions @@ -180,7 +180,7 @@ def __init__( "require_full_compilation": require_full_compilation, "disable_tf32": disable_tf32, "sparse_weights": sparse_weights, - "make_refittable": make_refittable, + "immutable_weights": immutable_weights, "engine_capability": engine_capability, "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py index e31d73f337..ffe7e9e03a 100644 --- a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py @@ -38,7 +38,7 @@ def __init__( *, name: str = "", settings: CompilationSettings = CompilationSettings(), - weight_name_map: Any = None, + weight_name_map: Optional[dict[Any, Any]] = None, ): """Takes a name, target device, serialized TensorRT engine, and binding names / order and constructs a PyTorch ``torch.nn.Module`` around it. Uses TensorRT Python APIs to run the engine @@ -51,6 +51,7 @@ def __init__( Keyword Arguments: name (str): Name for module settings (torch_tensorrt.dynamo.CompilationSettings): Settings used to compile engine, assumes engine was built with default compilation settings if object not passed + weight_name_map (dict): Mapping of engine weight name to state_dict weight name Example: diff --git a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py index 1bebe20fda..d7cfc6608b 100644 --- a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py @@ -96,6 +96,7 @@ def __init__( Keyword Arguments: name (str): Name for module settings (torch_tensorrt.dynamo.CompilationSettings): Settings used to compile engine, assumes engine was built with default compilation settings if object not passed + weight_name_map (dict): Mapping of engine weight name to state_dict weight name Example: diff --git a/py/torch_tensorrt/dynamo/runtime/register_fake_class.py b/py/torch_tensorrt/dynamo/runtime/register_fake_class.py index ac416879b5..eee743c497 100644 --- a/py/torch_tensorrt/dynamo/runtime/register_fake_class.py +++ b/py/torch_tensorrt/dynamo/runtime/register_fake_class.py @@ -26,7 +26,7 @@ def fake_tensorrt_execute_engine( modes = ["opt"] # Get the TRTEngine class and infer output shapes based on input shapes - trt_engine = fake_trt_engine.wrapped_obj.engine + trt_engine = fake_trt_engine.real_obj outputs_mode_dict = defaultdict(list) for mode in modes: input_shapes = [unwrap_tensor_shape(input, mode=mode) for input in inputs] @@ -79,7 +79,21 @@ def fake_tensorrt_execute_engine( @torch._library.register_fake_class("tensorrt::Engine") class FakeTRTEngine: def __init__(self, engine_info: List[str]) -> None: - self.engine = torch.classes.tensorrt.Engine(engine_info) + self.version = engine_info[torch.ops.tensorrt.ABI_TARGET_IDX()] + self.name = engine_info[torch.ops.tensorrt.NAME_IDX()] + self.device_info = engine_info[torch.ops.tensorrt.DEVICE_IDX()] + self.serialized_engine = engine_info[torch.ops.tensorrt.ENGINE_IDX()] + self.in_binding_names = engine_info[ + torch.ops.tensorrt.INPUT_BINDING_NAMES_IDX() + ] + self.out_binding_names = engine_info[ + torch.ops.tensorrt.OUTPUT_BINDING_NAMES_IDX() + ] + self.hardware_compatible = engine_info[torch.ops.tensorrt.HW_COMPATIBLE_IDX()] + self.serialized_metadata = engine_info[ + torch.ops.tensorrt.SERIALIZED_METADATA_IDX() + ] + self.target_platform = engine_info[torch.ops.tensorrt.TARGET_PLATFORM_IDX()] @classmethod def __obj_unflatten__(cls, flattened_tq: Any) -> Any: @@ -127,3 +141,6 @@ def infer_outputs(self, input_shapes: List[Any]) -> Any: def __setstate__(self, serialized_state: List[str]) -> Any: pass + + def __getstate__(self) -> Any: + pass diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py index 95e5f30e4d..5d6807f33a 100644 --- a/py/torch_tensorrt/dynamo/utils.py +++ b/py/torch_tensorrt/dynamo/utils.py @@ -11,8 +11,6 @@ import tensorrt as trt import torch from torch._subclasses.fake_tensor import FakeTensor - -from packaging import version from torch_tensorrt._Device import Device from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES @@ -22,6 +20,8 @@ from torch_tensorrt.dynamo._engine_cache import BaseEngineCache from torch_tensorrt.dynamo._settings import CompilationSettings +from packaging import version + from .types import TRTDataType logger = logging.getLogger(__name__) @@ -558,10 +558,6 @@ def parse_dynamo_kwargs( engine_cache = None if kwargs.get("cache_built_engines") or kwargs.get("reuse_cached_engines"): - assert kwargs.get( - "make_refittable" - ), "Engine caching requires make_refittable to be set to True" - if kwargs.get("custom_engine_cache") is not None: engine_cache = kwargs.get("custom_engine_cache") else: @@ -720,6 +716,20 @@ def set_metadata( node.meta = metadata[idx] +def copy_metadata(match_and_replacements: List[Any]) -> None: + """ + Copy the metadata from anchor node to the replacement node. This should be used + if the anchor node is replaced with only a single replacement node i.e one-one replacement. + """ + for match_and_replacement in match_and_replacements: + anchor_node = match_and_replacement.nodes_map[match_and_replacement.anchor] + assert ( + len(match_and_replacement.replacements) == 1 + ), "Found more than 1 replacements for the anchor node." + replacement_node = match_and_replacement.replacements[0] + replacement_node.meta = anchor_node.meta + + def flatten_nodes(nodes: Any) -> List[torch.fx.node.Node]: ret = [] if isinstance(nodes, torch.fx.node.Node): diff --git a/pyproject.toml b/pyproject.toml index 1284e458f4..465a8e9f26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,9 @@ requires = [ "cffi>=1.15.1", "typing-extensions>=4.7.0", "future>=0.18.3", - "tensorrt-cu12==10.3.0", + "tensorrt-cu12>=10.3.0,<=10.6.0", + "tensorrt-cu12-bindings>=10.3.0,<=10.6.0", + "tensorrt-cu12-libs>=10.3.0,<=10.6.0", "torch>=2.6.0.dev,<2.7.0", "pybind11==2.6.2", "numpy", @@ -55,9 +57,9 @@ keywords = [ ] dependencies = [ "torch>=2.6.0.dev,<2.7.0", - "tensorrt-cu12==10.3.0", - "tensorrt-cu12-bindings==10.3.0", - "tensorrt-cu12-libs==10.3.0", + "tensorrt-cu12>=10.3.0,<=10.6.0", + "tensorrt-cu12-bindings>=10.3.0,<=10.6.0", + "tensorrt-cu12-libs>=10.3.0,<=10.6.0", "packaging>=23", "numpy", "typing-extensions>=4.7.0", diff --git a/tests/py/dynamo/conversion/harness.py b/tests/py/dynamo/conversion/harness.py index 61f891267e..26818acd8a 100644 --- a/tests/py/dynamo/conversion/harness.py +++ b/tests/py/dynamo/conversion/harness.py @@ -403,7 +403,7 @@ def run_test( enable_passes=False, propagate_shapes=False, int32_reqd=False, - make_refittable=False, + immutable_weights=True, ): # TODO: lan to remove this and set use_dynamo_traccer to True by default # once all the converter test files are moved to use_dynamo_tracer @@ -414,7 +414,7 @@ def run_test( enabled_precisions={dtype._from(precision)}, truncate_double=True, debug=True, - make_refittable=make_refittable, + immutable_weights=immutable_weights, ) mod = self.generate_graph( @@ -498,7 +498,7 @@ def run_test_compare_tensor_attributes_only( output_dtypes=None, use_dynamo_tracer=False, enable_passes=False, - make_refittable=False, + immutable_weights=True, ): # Previous instance of the interpreter auto-casted 64-bit inputs @@ -507,7 +507,7 @@ def run_test_compare_tensor_attributes_only( enabled_precisions={dtype._from(precision)}, truncate_double=True, debug=True, - make_refittable=make_refittable, + immutable_weights=immutable_weights, ) mod = self.generate_graph( @@ -541,7 +541,7 @@ def run_test_with_dynamic_shape( pyt_inputs=None, propagate_shapes=False, check_dtype=True, - make_refittable=False, + immutable_weights=True, torch_export_dynamic_shapes=None, ): # TODO: lan to remove this and set use_dynamo_traccer to True by default @@ -551,7 +551,8 @@ def run_test_with_dynamic_shape( # Previous instance of the interpreter auto-casted 64-bit inputs # We replicate this behavior here compilation_settings = CompilationSettings( - truncate_double=True, make_refittable=make_refittable + truncate_double=True, + immutable_weights=immutable_weights, ) mod = self.generate_graph( mod, diff --git a/tests/py/dynamo/conversion/test_chunk_aten.py b/tests/py/dynamo/conversion/test_chunk_aten.py deleted file mode 100644 index eb06c04201..0000000000 --- a/tests/py/dynamo/conversion/test_chunk_aten.py +++ /dev/null @@ -1,187 +0,0 @@ -import unittest - -import torch -from parameterized import parameterized -from torch.testing._internal.common_utils import run_tests -from torch_tensorrt import Input - -from .harness import DispatchTestCase - - -class TestChunkConverter(DispatchTestCase): - @parameterized.expand( - [ - ((1,), 3, 0), - ((3,), 3, 0), - ((4,), 3, 0), - ((6,), 3, 0), - ((3,), 1, -1), - ((3,), 3, -1), - ((3,), 4, -1), - ] - ) - def test_chunk_1D(self, shape, chunks, dim): - class TestChunk(torch.nn.Module): - def forward(self, input): - out = torch.ops.aten.chunk.default(input, chunks, dim) - return out - - input = [torch.randn(shape)] - self.run_test( - TestChunk(), - input, - use_dynamo_tracer=True, - ) - - @parameterized.expand( - [ - ((3, 4), 1, 0), - ((3, 4), 3, 0), - ((3, 4), 4, 0), - ((3, 4), 2, -2), - ((3, 4), 6, -2), - ((3, 4), 3, 1), - ((3, 4), 4, 1), - ((3, 4), 5, -1), - ] - ) - def test_chunk_2D(self, shape, chunks, dim): - class TestChunk(torch.nn.Module): - def forward(self, input): - out = torch.ops.aten.chunk.default(input, chunks, dim) - return out - - input = [torch.randn(shape)] - self.run_test( - TestChunk(), - input, - use_dynamo_tracer=True, - ) - - @parameterized.expand( - [ - ((3, 4, 2), 1, 0), - ((3, 4, 2), 3, -3), - ((3, 4, 2), 3, 1), - ((3, 4, 2), 4, 1), - ((3, 4, 2), 6, -2), - ((3, 4, 2), 1, 2), - ((3, 4, 2), 3, -1), - ((3, 4, 2), 4, -1), - ] - ) - def test_chunk_3D(self, shape, chunks, dim): - class TestChunk(torch.nn.Module): - def forward(self, input): - out = torch.ops.aten.chunk.default(input, chunks, dim) - return out - - input = [torch.randn(shape)] - self.run_test( - TestChunk(), - input, - use_dynamo_tracer=True, - ) - - -#######################Dynamic cases####################### -# The tests are skipped for now. Will be addressed once https://github.com/pytorch/pytorch/issues/134663 is addressed -@unittest.skip( - "Pending aten.split dynamic input torch.export guard bug. Issue- https://github.com/pytorch/pytorch/issues/134663" -) -class TestChunkDynamicConverter(DispatchTestCase): - @parameterized.expand( - [ - ((1,), (1,), (3,), 3, 0), - ((3,), (3,), (4,), 3, 0), - ((4,), (4,), (6,), 3, 0), - ((6,), (6,), (9,), 3, 0), - ((3,), (3,), (4,), 1, -1), - ((3,), (3,), (4,), 3, -1), - ((3,), (3,), (4,), 4, -1), - ] - ) - def test_chunk_1D(self, min_shape, opt_shape, max_shape, chunks, dim): - class TestChunk(torch.nn.Module): - def forward(self, input): - out = torch.ops.aten.chunk.default(input, chunks, dim) - return out - - input_specs = [ - Input( - min_shape=min_shape, - opt_shape=opt_shape, - max_shape=max_shape, - ), - ] - self.run_test_with_dynamic_shape( - TestChunk(), - input_specs, - use_dynamo_tracer=True, - ) - - @parameterized.expand( - [ - ((3, 4), (3, 4), (4, 4), 1, 0), - ((3, 4), (3, 4), (4, 4), 3, 0), - ((3, 4), (3, 4), (4, 4), 4, 0), - ((3, 4), (3, 4), (4, 4), 2, -2), - ((3, 4), (3, 4), (4, 4), 6, -2), - ((3, 4), (3, 4), (4, 4), 3, 1), - ((3, 4), (3, 4), (4, 4), 4, 1), - ((3, 4), (3, 4), (4, 4), 5, -1), - ] - ) - def test_chunk_2D(self, min_shape, opt_shape, max_shape, chunks, dim): - class TestChunk(torch.nn.Module): - def forward(self, input): - out = torch.ops.aten.chunk.default(input, chunks, dim) - return out - - input_specs = [ - Input( - min_shape=min_shape, - opt_shape=opt_shape, - max_shape=max_shape, - ), - ] - self.run_test_with_dynamic_shape( - TestChunk(), - input_specs, - use_dynamo_tracer=True, - ) - - @parameterized.expand( - [ - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 1, 0), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 3, -3), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 3, 1), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 4, 1), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 6, -2), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 1, 2), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 3, -1), - ((3, 4, 2), (3, 4, 2), (4, 4, 2), 4, -1), - ] - ) - def test_chunk_3D(self, min_shape, opt_shape, max_shape, chunks, dim): - class TestChunk(torch.nn.Module): - def forward(self, input): - out = torch.ops.aten.chunk.default(input, chunks, dim) - return out - - input_specs = [ - Input( - min_shape=min_shape, - opt_shape=opt_shape, - max_shape=max_shape, - ), - ] - self.run_test_with_dynamic_shape( - TestChunk(), - input_specs, - use_dynamo_tracer=True, - ) - - -if __name__ == "__main__": - run_tests() diff --git a/tests/py/dynamo/conversion/test_cumsum_aten.py b/tests/py/dynamo/conversion/test_cumsum_aten.py index 1c32be6dd6..8ab699468d 100644 --- a/tests/py/dynamo/conversion/test_cumsum_aten.py +++ b/tests/py/dynamo/conversion/test_cumsum_aten.py @@ -24,7 +24,7 @@ def forward(self, x): self.run_test( Cumsum(), inputs, - make_refittable=False, + immutable_weights=True, ) @parameterized.expand( @@ -44,7 +44,7 @@ def forward(self, x): self.run_test( Cumsum(), inputs, - make_refittable=False, + immutable_weights=True, ) @parameterized.expand( @@ -65,7 +65,7 @@ def forward(self, x): self.run_test( Cumsum(), inputs, - make_refittable=False, + immutable_weights=True, ) @parameterized.expand( @@ -95,7 +95,7 @@ def forward(self, x): self.run_test_with_dynamic_shape( Cumsum(), inputs, - make_refittable=False, + immutable_weights=True, ) diff --git a/tests/py/dynamo/conversion/test_embedding_bag_aten.py b/tests/py/dynamo/conversion/test_embedding_bag_aten.py index 6543ac2306..1f119bd77e 100644 --- a/tests/py/dynamo/conversion/test_embedding_bag_aten.py +++ b/tests/py/dynamo/conversion/test_embedding_bag_aten.py @@ -148,7 +148,7 @@ def forward(self, weight, indices): precision=weight.dtype, enable_passes=True, propagate_shapes=True, - make_refittable=False, + immutable_weights=True, ) @parameterized.expand( @@ -346,7 +346,7 @@ def forward(self, weight, indices, offsets): precision=weight.dtype, enable_passes=True, propagate_shapes=True, - make_refittable=False, + immutable_weights=True, ) @parameterized.expand( @@ -411,7 +411,7 @@ def forward(self, weight, indices, offsets): precision=weight.dtype, enable_passes=True, propagate_shapes=True, - make_refittable=False, + immutable_weights=True, ) @parameterized.expand( @@ -493,7 +493,7 @@ def forward(self, weights, indices, offsets, per_sample_weights=None): min_block_size=1, cache_built_engines=False, reuse_cached_engines=False, - make_refittable=False, + immutable_weights=True, ) # use the inputs with different shape to inference: if per_sample_weights is None: diff --git a/tests/py/dynamo/conversion/test_group_norm_aten.py b/tests/py/dynamo/conversion/test_group_norm_aten.py index 617166d0c4..46e66ecd9b 100644 --- a/tests/py/dynamo/conversion/test_group_norm_aten.py +++ b/tests/py/dynamo/conversion/test_group_norm_aten.py @@ -6,155 +6,62 @@ from .harness import DispatchTestCase -class TestGroupNormConverter(DispatchTestCase): - def test_groupnorm1d(self): - class GroupNorm(torch.nn.Module): - def forward(self, x): - return torch.ops.aten.group_norm.default( - x, - 2, - torch.ones((6,)), - torch.zeros((6,)), - 1e-05, - True, - ) - - inputs = [torch.randn(3, 6, 224)] - self.run_test( - GroupNorm(), - inputs, - ) - - def test_groupnorm2d(self): - class GroupNorm(torch.nn.Module): - def forward(self, x): - return torch.ops.aten.group_norm.default( - x, - 2, - torch.randn((6,)), - torch.randn((6,)), - 1e-05, - True, - ) - - inputs = [torch.randn(3, 6, 224, 224)] - with torch.no_grad(): - self.run_test( - GroupNorm(), - inputs, - ) - - def test_groupnorm_with_dynamic_shape(self): - class GroupNorm(torch.nn.Module): - def forward(self, x): - return torch.ops.aten.group_norm.default( - x, - 2, - torch.randn((6,)), - torch.randn((6,)), - 1e-05, - True, - ) - - input_specs = [ - Input( - dtype=torch.float32, - min_shape=(3, 6, 24, 24), - opt_shape=(5, 6, 24, 24), - max_shape=(8, 6, 48, 24), - ), - ] - self.run_test_with_dynamic_shape( - GroupNorm(), - input_specs, - ) - - class TestNativeGroupNormConverter(DispatchTestCase): - def test_groupnorm1d(self): + def test_groupnorm_1d(self): class GroupNorm(torch.nn.Module): def forward(self, x): return torch.ops.aten.native_group_norm.default( - x, - torch.ones((6,)), - torch.zeros((6,)), - 3, - 6, - 224, - 2, - 1e-05, + x, None, None, 3, 6, 224, 2, 1e-05 )[0] inputs = [torch.randn(3, 6, 224)] - self.run_test( - GroupNorm(), - inputs, - ) + self.run_test(GroupNorm(), inputs, use_dynamo_tracer=True, enable_passes=True) - def test_groupnorm2d(self): + def test_groupnorm_2d(self): class GroupNorm(torch.nn.Module): - def forward(self, x): + def forward(self, x, weight, bias): return torch.ops.aten.native_group_norm.default( - x, - torch.ones((6,)), - torch.zeros((6,)), - 3, - 6, - 224 * 224, - 2, - 1e-05, + x, weight, bias, 3, 6, 224 * 224, 2, 1e-05 )[0] - inputs = [torch.randn(3, 6, 224, 224)] - with torch.no_grad(): - self.run_test( - GroupNorm(), - inputs, - ) + inputs = [torch.randn(3, 6, 224, 224), torch.ones(6), torch.zeros(6)] + self.run_test(GroupNorm(), inputs, use_dynamo_tracer=True, enable_passes=True) def test_groupnorm_sd(self): class GroupNorm(torch.nn.Module): - def forward(self, x): + def forward(self, x, weight, bias): return torch.ops.aten.native_group_norm.default( - x, - torch.randn((320,)).half(), - torch.randn((320,)).half(), - 2, - 320, - 4096, - 32, - 1e-05, + x, weight, bias, 2, 320, 64 * 64, 32, 1e-05 )[0] - inputs = [torch.randn(2, 320, 64, 64).half()] - with torch.no_grad(): - self.run_test( - GroupNorm(), - inputs, - ) + inputs = [ + torch.randn(2, 320, 64, 64, dtype=torch.half), + torch.randn(320, dtype=torch.half), + torch.randn(320, dtype=torch.half), + ] + self.run_test( + GroupNorm(), + inputs, + precision=torch.half, + use_dynamo_tracer=True, + enable_passes=True, + ) @parameterized.expand( [ - (5, 4, 4, 2, (2, 4, 2), (3, 4, 2), (5, 4, 4)), - (5, 4, 2 * 2, 2, (2, 4, 2, 2), (3, 4, 2, 2), (5, 4, 2, 2)), + (5, 4, 4, 1, (2, 4, 2), (3, 4, 2), (5, 4, 4)), + (5, 4, 2 * 2, 4, (2, 4, 2, 2), (3, 4, 2, 2), (5, 4, 2, 2)), (5, 9, 6 * 3, 3, (3, 9, 3, 3), (4, 9, 3, 3), (5, 9, 6, 3)), - (8, 9, 6 * 6, 3, (3, 9, 2, 3, 2), (5, 9, 3, 3, 2), (8, 9, 6, 3, 2)), + (8, 9, 6 * 3 * 2, 3, (3, 9, 2, 3, 2), (5, 9, 3, 3, 2), (8, 9, 6, 3, 2)), ] ) def test_groupnorm_with_dynamic_shape( - self, N, C, HxW, groups, min_shape, opt_shape, max_shape + self, N, C, HxW, group, min_shape, opt_shape, max_shape ): class GroupNorm(torch.nn.Module): - def forward(self, x): + def forward(self, x, weight, bias): return torch.ops.aten.native_group_norm.default( - x, - torch.ones((C,)), - torch.zeros((C,)), - N, - C, - HxW, - groups, - 1e-5, + x, weight, bias, N, C, HxW, group, 1e-05 )[0] input_specs = [ @@ -164,12 +71,10 @@ def forward(self, x): opt_shape=opt_shape, max_shape=max_shape, ), + Input(dtype=torch.float32, shape=(C,)), + Input(dtype=torch.float32, shape=(C,)), ] - self.run_test_with_dynamic_shape( - GroupNorm(), - input_specs, - check_dtype=False, - ) + self.run_test_with_dynamic_shape(GroupNorm(), input_specs, check_dtype=False) if __name__ == "__main__": diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py index 5ceea5e381..68451674c5 100644 --- a/tests/py/dynamo/models/test_engine_cache.py +++ b/tests/py/dynamo/models/test_engine_cache.py @@ -74,7 +74,7 @@ def test_reexport_is_equal(self): ), ) settings1 = CompilationSettings( - make_refittable=True, cache_built_engines=True, reuse_cached_engines=True + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True ) hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1) @@ -89,7 +89,7 @@ def test_reexport_is_equal(self): ), ) settings2 = CompilationSettings( - make_refittable=True, cache_built_engines=True, reuse_cached_engines=True + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True ) hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2) @@ -111,7 +111,7 @@ def test_input_shape_change_is_not_equal(self): ), ) settings1 = CompilationSettings( - make_refittable=True, cache_built_engines=True, reuse_cached_engines=True + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True ) hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1) @@ -126,7 +126,7 @@ def test_input_shape_change_is_not_equal(self): ), ) settings2 = CompilationSettings( - make_refittable=True, cache_built_engines=True, reuse_cached_engines=True + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True ) hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2) @@ -148,7 +148,7 @@ def test_engine_settings_is_not_equal(self): ), ) settings1 = CompilationSettings( - make_refittable=True, + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True, enabled_precisions={torch.float32}, @@ -166,7 +166,7 @@ def test_engine_settings_is_not_equal(self): ), ) settings2 = CompilationSettings( - make_refittable=True, + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True, enabled_precisions={torch.float32, torch.float16}, @@ -206,6 +206,7 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) for i in range(3): + # remove timing cache and reset dynamo for engine caching messurement remove_timing_cache() torch._dynamo.reset() if i == 0: @@ -220,11 +221,11 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): trt_gm = torch_trt.dynamo.compile( exp_program, tuple(inputs), - use_python_runtime=False, + use_python_runtime=True, enabled_precisions={torch.float}, debug=False, min_block_size=1, - make_refittable=True, + immutable_weights=False, cache_built_engines=cache_built_engines, reuse_cached_engines=reuse_cached_engines, engine_cache_dir=engine_cache_dir, @@ -284,11 +285,11 @@ def test_dynamo_compile_with_custom_engine_cache(self): trt_gm = torch_trt.dynamo.compile( exp_program, tuple(inputs), - use_python_runtime=False, + use_python_runtime=True, enabled_precisions={torch.float}, debug=False, min_block_size=1, - make_refittable=True, + immutable_weights=False, cache_built_engines=cache_built_engines, reuse_cached_engines=reuse_cached_engines, custom_engine_cache=custom_engine_cache, @@ -335,7 +336,7 @@ def test_dynamo_compile_change_input_shape(self): enabled_precisions={torch.float}, debug=False, min_block_size=1, - make_refittable=True, + immutable_weights=False, cache_built_engines=True, reuse_cached_engines=True, ) @@ -386,11 +387,11 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): model, backend="tensorrt", options={ - "use_python_runtime": True, + "use_python_runtime": False, "enabled_precisions": {torch.float}, "debug": False, "min_block_size": 1, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "engine_cache_dir": engine_cache_dir, @@ -400,7 +401,6 @@ def remove_timing_cache(path=TIMING_CACHE_PATH): results.append(compiled_model(*inputs)) # trigger the compilation end.record() torch.cuda.synchronize() - torch._dynamo.reset() times.append(start.elapsed_time(end)) cos_sim = cosine_similarity(results[0], results[1]) @@ -451,11 +451,11 @@ def test_torch_compile_with_custom_engine_cache(self): model, backend="tensorrt", options={ - "use_python_runtime": True, + "use_python_runtime": False, "enabled_precisions": {torch.float}, "debug": False, "min_block_size": 1, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": cache_built_engines, "reuse_cached_engines": reuse_cached_engines, "custom_engine_cache": custom_engine_cache, @@ -487,18 +487,59 @@ def test_torch_compile_with_custom_engine_cache(self): for h, count in custom_engine_cache.hashes.items() ] - def test_torch_compile_change_input_shape(self): + def test_torch_trt_compile_change_input_shape(self): # Custom Engine Cache model = models.resnet18(pretrained=True).eval().to("cuda") - - engine_cache_dir = "/tmp/test_torch_compile_with_default_disk_engine_cache" + engine_cache_dir = "/tmp/test_torch_trt_compile_change_input_shape" if os.path.exists(engine_cache_dir): shutil.rmtree(engine_cache_dir) custom_engine_cache = MyEngineCache(engine_cache_dir) for i in range(3): - # remove timing cache and reset dynamo for engine caching messurement inputs = [torch.rand((4 * (i + 1), 3, 224, 224)).to("cuda")] + compiled_model = torch_trt.compile( + model, + inputs=inputs, + **{ + "use_python_runtime": True, + "enabled_precisions": {torch.float}, + "debug": False, + "min_block_size": 1, + "immutable_weights": False, + "cache_built_engines": True, + "reuse_cached_engines": True, + "custom_engine_cache": custom_engine_cache, + }, + ) + compiled_model(*inputs) + [ + assertions.assertTrue( + count == 0, f"Unintended cache hit for entry ({h}, hit: {count})" + ) + for h, count in custom_engine_cache.hashes.items() + ] + + def test_torch_compile_graph_break(self): + class MyModel(torch.nn.Module): + def forward(self, x): + x = x + x + x = x + x + x = torch.ops.aten.relu.default(x) + x = x + x + x = x + x + x = torch.ops.aten.relu.default(x) + x = x + x + x = x + x + return x + + model = MyModel().eval().cuda() + engine_cache_dir = "/tmp/test_torch_compile_graph_break" + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + custom_engine_cache = MyEngineCache(engine_cache_dir) + inputs = [torch.rand((3, 3, 224, 224)).to("cuda")] + for i in range(3): compiled_model = torch.compile( model, backend="tensorrt", @@ -507,17 +548,460 @@ def test_torch_compile_change_input_shape(self): "enabled_precisions": {torch.float}, "debug": False, "min_block_size": 1, - "make_refittable": True, + "immutable_weights": False, "cache_built_engines": True, "reuse_cached_engines": True, "custom_engine_cache": custom_engine_cache, "torch_executed_ops": {"torch.ops.aten.relu.default"}, }, ) + compiled_model(*inputs) [ assertions.assertTrue( - count == 0, f"Unintended cache hit for entry ({h}, hit: {count})" + count == 2, + f"cache was not hit exactly twice for entry ({h}, hit: {count})", ) for h, count in custom_engine_cache.hashes.items() ] + + def test_isomorphic_graphs(self): + class MyModel1(torch.nn.Module): + def forward(self, a, b): + return a + b + + class MyModel2(torch.nn.Module): + def forward(self, c, d): + return c + d + + model1 = MyModel1().eval().cuda() + model2 = MyModel2().eval().cuda() + + inputs1 = (torch.randn((2, 3)).to("cuda"), torch.randn((2, 3)).to("cuda")) + inputs2 = (torch.randn((2, 3)).to("cuda"), torch.randn((2, 3)).to("cuda")) + + exp_program1 = torch.export.export(model1, args=inputs1) + exp_program2 = torch.export.export(model2, args=inputs2) + + input_specs1 = ( + torch_trt.Input( + min_shape=(1, 3), + opt_shape=(2, 3), + max_shape=(10, 3), + ), + ) + + input_specs2 = ( + torch_trt.Input( + min_shape=(1, 3), + opt_shape=(2, 3), + max_shape=(10, 3), + ), + ) + + settings1 = CompilationSettings( + cache_built_engines=True, reuse_cached_engines=True + ) + + settings2 = CompilationSettings( + cache_built_engines=True, reuse_cached_engines=True + ) + + hash1 = BaseEngineCache.get_hash(exp_program1.module(), input_specs1, settings1) + hash2 = BaseEngineCache.get_hash(exp_program2.module(), input_specs2, settings2) + + assertions.assertEqual(hash1, hash2) + + # @unittest.skip("benchmark on small models") + def test_caching_small_model(self): + from torch_tensorrt.dynamo._refit import refit_module_weights + + model = models.resnet18(pretrained=True).eval().to("cuda") + + engine_cache_dir = "/tmp/test_caching_small_model" + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + def remove_timing_cache(path=TIMING_CACHE_PATH): + if os.path.exists(path): + os.remove(path) + + inputs = (torch.rand((100, 3, 224, 224)).to("cuda"),) + exp_program = torch.export.export(model, args=inputs) + + # warm up + trt_gm = torch_trt.dynamo.compile( + exp_program, + inputs, + use_python_runtime=True, + enabled_precisions={torch.float}, + debug=False, + min_block_size=1, + immutable_weights=False, + cache_built_engines=False, + reuse_cached_engines=False, + strip_engine_weights=False, + refit_identical_engine_weights=False, + ) + torch.cuda.empty_cache() + + compile_times = [[] for _ in range(3)] + inference_times = [[] for _ in range(3)] + results = [[] for _ in range(3)] + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + + interval = 3 + for i in range(interval * 3): + if i < interval: + # non-refittable + immutable_weights = True + strip_engine_weights = False + refit_identical_engine_weights = False + cache_built_engines = reuse_cached_engines = False + # continue + elif i < interval * 2: + # REFIT w/ engine caching + immutable_weights = False + strip_engine_weights = False + refit_identical_engine_weights = False + cache_built_engines = reuse_cached_engines = True + # continue + else: + # REFIT_IDENTICAL w/ engine caching + immutable_weights = False + strip_engine_weights = False + refit_identical_engine_weights = True + cache_built_engines = reuse_cached_engines = True + # continue + + if i % interval == 0: + remove_timing_cache() + + torch._dynamo.reset() + + torch.cuda.synchronize() + start.record() + + trt_gm = torch_trt.dynamo.compile( + exp_program, + tuple(inputs), + use_python_runtime=True, + enabled_precisions={torch.float}, + debug=False, + min_block_size=1, + cache_built_engines=cache_built_engines, + reuse_cached_engines=reuse_cached_engines, + engine_cache_dir=engine_cache_dir, + engine_cache_size=1 << 40, + immutable_weights=immutable_weights, + strip_engine_weights=strip_engine_weights, + refit_identical_engine_weights=refit_identical_engine_weights, + ) + + if strip_engine_weights: + trt_gm = refit_module_weights(trt_gm, exp_program) + + end.record() + torch.cuda.synchronize() + compile_times[i // interval].append(start.elapsed_time(end)) + + # inference + torch.cuda.synchronize() + start.record() + out = trt_gm(*inputs) + end.record() + torch.cuda.synchronize() + inference_times[i // interval].append(start.elapsed_time(end)) + + results[i // interval].append(out) + + torch.cuda.empty_cache() + + cos_sim = cosine_similarity(torch.stack(results[0]), torch.stack(results[1])) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + cos_sim = cosine_similarity(torch.stack(results[1]), torch.stack(results[2])) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + assertions.assertTrue( + compile_times[1][0] > compile_times[1][1], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[1][0]} ms, time taken with engine caching: {compile_times[1][1]} ms", + ) + + assertions.assertTrue( + compile_times[1][0] > compile_times[1][2], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[1][0]} ms, time taken with engine caching: {compile_times[1][2]} ms", + ) + + assertions.assertTrue( + compile_times[2][0] > compile_times[2][1], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[2][0]} ms, time taken with engine caching: {compile_times[2][1]} ms", + ) + + assertions.assertTrue( + compile_times[2][0] > compile_times[2][2], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[2][0]} ms, time taken with engine caching: {compile_times[2][2]} ms", + ) + + assertions.assertTrue( + compile_times[0][2] > compile_times[1][2], + msg=f"Engine caching is slower than recompiling a non-refittable engine. Recompile a non-refittable engine: {compile_times[0][2]} ms, time taken with engine caching: {compile_times[1][2]} ms", + ) + + assertions.assertTrue( + compile_times[0][2] > compile_times[2][2], + msg=f"Engine caching is slower than recompiling a non-refittable engine. Recompile a non-refittable engine: {compile_times[0][2]} ms, time taken with engine caching: {compile_times[2][2]} ms", + ) + + @unittest.skip("benchmark on llama2") + def test_caching_llama2_model(self): + import torch + from torch_tensorrt.dynamo._refit import refit_module_weights + from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + StoppingCriteriaList, + ) + from transformers.generation.stopping_criteria import ( + EosTokenCriteria, + MaxLengthCriteria, + ) + + def export_llm(model, inputs, min_seq_len=1, max_seq_len=16): + """ + Exports the LLM model into an ExportedProgram with dynamic shapes. + In the case of guard failures due to some PyTorch kernel implements, we also + try to re-export the graph by expressing them as runtime assert nodes + """ + with torch.no_grad(): + # max=1024 has contraint violation error. https://github.com/pytorch/pytorch/issues/125604 + seq_len = torch.export.Dim("seq_len", min=min_seq_len, max=max_seq_len) + try: + print("Trying to export the model using torch.export.export()..") + # strict=False only enables aotautograd tracing and excludes dynamo. + ep = torch.export.export( + model, (inputs,), dynamic_shapes=({1: seq_len},), strict=False + ) + except: + print( + "Trying torch.export._trace._export to trace the graph since torch.export.export() failed" + ) + # This API is used to express the constraint violation guards as asserts in the graph. + ep = torch.export._trace._export( + model, + (inputs,), + dynamic_shapes=({1: seq_len},), + strict=False, + allow_complex_guards_as_runtime_asserts=True, + ) + + return ep + + def generate(model, input_seq, max_tokens, eos_token_id): + """ + Greedy decoding of the model. This generates up to max_tokens. + """ + # Max length of output seq = current input_seq length + max_tokens allowed to generate + max_output_seq_length = input_seq.shape[1] + max_tokens + stopping_criteria = StoppingCriteriaList( + [ + MaxLengthCriteria(max_length=max_output_seq_length), + EosTokenCriteria(eos_token_id=eos_token_id), + ] + ) + + while True: + outputs = model(input_seq) + logits = outputs.logits + next_token_logits = logits[:, -1, :] + next_tokens = torch.argmax(next_token_logits, dim=-1) + input_seq = torch.cat([input_seq, next_tokens[:, None]], dim=-1) + # TODO: Handle batch in this check + if stopping_criteria(input_seq, logits).item(): + break + + return input_seq + + MAX_TOKENS = 32 + DEVICE = torch.device("cuda:0") + + llama_path = "meta-llama/Llama-2-7b-chat-hf" + with torch.no_grad(): + model = AutoModelForCausalLM.from_pretrained( + llama_path, use_cache=False, attn_implementation="eager" + ).eval() + + tokenizer = AutoTokenizer.from_pretrained(llama_path) + + prompt = "What is dynamic programming?" + model_inputs = tokenizer(prompt, return_tensors="pt") + input_ids = model_inputs.input_ids + + llama2_ep = export_llm(model, input_ids, max_seq_len=64) + + engine_cache_dir = "/tmp/test_caching_llama2_model" + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + timing_cache_path = os.path.join( + engine_cache_dir, "llama2_timing_cache_original.bin" + ) + + def remove_timing_cache(path=timing_cache_path): + if os.path.exists(path): + os.remove(path) + + input_ids = input_ids.to(DEVICE) + + # warm up + trt_gm = torch_trt.dynamo.compile( + llama2_ep, + inputs=[input_ids], + use_python_runtime=True, + enabled_precisions={torch.float32}, + debug=False, + min_block_size=1, + immutable_weights=False, + truncate_double=True, + device=DEVICE, + disable_tf32=True, + cache_built_engines=False, + reuse_cached_engines=False, + strip_engine_weights=False, + refit_identical_engine_weights=False, + timing_cache_path=timing_cache_path, + ) + torch.cuda.empty_cache() + + compile_times = [[] for _ in range(3)] + inference_times = [[] for _ in range(3)] + results = [[] for _ in range(3)] + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + + interval = 3 + for i in range(interval * 3): + if i < interval: + # non-refittable + immutable_weights = True + strip_engine_weights = False + refit_identical_engine_weights = False + cache_built_engines = reuse_cached_engines = False + elif i < interval * 2: + # REFIT w/ engine caching + immutable_weights = False + strip_engine_weights = False + refit_identical_engine_weights = False + cache_built_engines = reuse_cached_engines = True + else: + # REFIT_IDENTICAL w/ engine caching + immutable_weights = False + strip_engine_weights = False + refit_identical_engine_weights = True + cache_built_engines = reuse_cached_engines = True + + if i % interval == 0: + remove_timing_cache() + + torch._dynamo.reset() + + torch.cuda.synchronize() + start.record() + + trt_gm = torch_trt.dynamo.compile( + llama2_ep, + inputs=[input_ids], + use_python_runtime=True, + enabled_precisions={torch.float32}, + debug=False, + min_block_size=1, + truncate_double=True, + device=DEVICE, + disable_tf32=True, + cache_built_engines=cache_built_engines, + reuse_cached_engines=reuse_cached_engines, + engine_cache_dir=engine_cache_dir, + engine_cache_size=1 << 40, + immutable_weights=immutable_weights, + strip_engine_weights=strip_engine_weights, + refit_identical_engine_weights=refit_identical_engine_weights, + timing_cache_path=timing_cache_path, + ) + + if strip_engine_weights: + trt_gm = refit_module_weights(trt_gm, llama2_ep) + + end.record() + torch.cuda.synchronize() + + compile_times[i // interval].append(start.elapsed_time(end)) + + # inference + torch.cuda.synchronize() + start.record() + + trt_gen_tokens = generate( + trt_gm, input_ids, MAX_TOKENS, tokenizer.eos_token_id + ) + # trt_gen_text = tokenizer.batch_decode( + # trt_gen_tokens, + # skip_special_tokens=True, + # clean_up_tokenization_spaces=False, + # )[0], + results[i // interval].append(trt_gen_tokens) + + end.record() + torch.cuda.synchronize() + + inference_times[i // interval].append(start.elapsed_time(end)) + + torch.cuda.empty_cache() + + cos_sim = cosine_similarity(torch.stack(results[0]), torch.stack(results[1])) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + cos_sim = cosine_similarity(torch.stack(results[1]), torch.stack(results[2])) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + assertions.assertTrue( + compile_times[1][0] > compile_times[1][1], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[1][0]} ms, time taken with engine caching: {compile_times[1][1]} ms", + ) + + assertions.assertTrue( + compile_times[1][0] > compile_times[1][2], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[1][0]} ms, time taken with engine caching: {compile_times[1][2]} ms", + ) + + assertions.assertTrue( + compile_times[2][0] > compile_times[2][1], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[2][0]} ms, time taken with engine caching: {compile_times[2][1]} ms", + ) + + assertions.assertTrue( + compile_times[2][0] > compile_times[2][2], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {compile_times[2][0]} ms, time taken with engine caching: {compile_times[2][2]} ms", + ) + + assertions.assertTrue( + compile_times[0][2] > compile_times[1][2], + msg=f"Engine caching is slower than recompiling a non-refittable engine. Recompile a non-refittable engine: {compile_times[0][2]} ms, time taken with engine caching: {compile_times[1][2]} ms", + ) + + assertions.assertTrue( + compile_times[0][2] > compile_times[2][2], + msg=f"Engine caching is slower than recompiling a non-refittable engine. Recompile a non-refittable engine: {compile_times[0][2]} ms, time taken with engine caching: {compile_times[2][2]} ms", + ) diff --git a/tests/py/dynamo/models/test_model_refit.py b/tests/py/dynamo/models/test_model_refit.py index 07a9353037..bb61ac2d43 100644 --- a/tests/py/dynamo/models/test_model_refit.py +++ b/tests/py/dynamo/models/test_model_refit.py @@ -1,9 +1,7 @@ import os import tempfile -import time import unittest -import numpy as np import pytest import tensorrt as trt import torch @@ -57,8 +55,7 @@ def test_mapping(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) settings = trt_gm._run_on_acc_0.settings runtime = trt.Runtime(TRT_LOGGER) @@ -110,8 +107,7 @@ def test_refit_one_engine_with_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) new_trt_gm = refit_module_weights( @@ -161,8 +157,7 @@ def test_refit_one_engine_no_map_with_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) trt_gm._run_on_acc_0.weight_name_map = None @@ -213,8 +208,7 @@ def test_refit_one_engine_with_wrong_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) # Manually Deleted all batch norm layer. This suppose to fail the fast refit trt_gm._run_on_acc_0.weight_name_map = { @@ -271,8 +265,7 @@ def test_refit_one_engine_bert_with_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) new_trt_gm = refit_module_weights( @@ -325,8 +318,7 @@ def test_refit_one_engine_inline_runtime__with_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) torchtrt.save(trt_gm, trt_ep_path) trt_gm = torch.export.load(trt_ep_path) @@ -372,8 +364,7 @@ def test_refit_one_engine_python_runtime_with_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) new_trt_gm = refit_module_weights( @@ -443,7 +434,7 @@ def forward(self, x): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, + immutable_weights=False, torch_executed_ops=torch_executed_ops, reuse_cached_engines=False, ) @@ -494,8 +485,7 @@ def test_refit_one_engine_without_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) new_trt_gm = refit_module_weights( @@ -546,8 +536,7 @@ def test_refit_one_engine_bert_without_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) new_trt_gm = refit_module_weights( @@ -600,8 +589,7 @@ def test_refit_one_engine_inline_runtime_without_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) torchtrt.save(trt_gm, trt_ep_path) trt_gm = torch.export.load(trt_ep_path) @@ -647,8 +635,7 @@ def test_refit_one_engine_python_runtime_without_weightmap(): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, - reuse_cached_engines=False, + immutable_weights=False, ) new_trt_gm = refit_module_weights( @@ -718,7 +705,7 @@ def forward(self, x): enabled_precisions=enabled_precisions, debug=debug, min_block_size=min_block_size, - make_refittable=True, + immutable_weights=False, torch_executed_ops=torch_executed_ops, reuse_cached_engines=False, ) @@ -772,7 +759,7 @@ def forward(self, x): enabled_precisions={torch.float}, debug=True, min_block_size=1, - make_refittable=True, + immutable_weights=False, ) num_pyt_segments = len( diff --git a/tests/py/dynamo/models/test_reexport.py b/tests/py/dynamo/models/test_reexport.py index 9386652815..297410ae55 100644 --- a/tests/py/dynamo/models/test_reexport.py +++ b/tests/py/dynamo/models/test_reexport.py @@ -106,7 +106,6 @@ def forward(self, x): exp_program = torchtrt.dynamo.trace(model, **compile_spec) trt_module = torchtrt.dynamo.compile(exp_program, **compile_spec) - torchtrt.save(trt_module, trt_ep_path) # Reexport trt_exp_program = torch.export.export(trt_module, (input,), strict=False) diff --git a/tests/py/dynamo/models/test_weight_stripped_engine.py b/tests/py/dynamo/models/test_weight_stripped_engine.py new file mode 100644 index 0000000000..0c79ba7a3f --- /dev/null +++ b/tests/py/dynamo/models/test_weight_stripped_engine.py @@ -0,0 +1,564 @@ +import os +import pickle +import shutil +import unittest + +import torch +import torch_tensorrt as torch_trt +import torchvision.models as models +from torch.testing._internal.common_utils import TestCase +from torch_tensorrt.dynamo import convert_exported_program_to_serialized_trt_engine +from torch_tensorrt.dynamo._defaults import TIMING_CACHE_PATH +from torch_tensorrt.dynamo._refit import refit_module_weights +from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity + +assertions = unittest.TestCase() + + +class TestWeightStrippedEngine(TestCase): + def test_three_ways_to_compile(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + exp_program = torch.export.export(pyt_model, example_inputs) + + settings = { + "use_python_runtime": False, + "enabled_precisions": {torch.float}, + "debug": False, + "min_block_size": 1, + "immutable_weights": False, + "strip_engine_weights": False, + "refit_identical_engine_weights": False, + } + + # 1. Compile with torch_trt.dynamo.compile + gm1 = torch_trt.dynamo.compile( + exp_program, + example_inputs, + **settings, + ) + gm1_output = gm1(*example_inputs) + + # 2. Compile with torch.compile using tensorrt backend + gm2 = torch.compile( + pyt_model, + backend="tensorrt", + options=settings, + ) + gm2_output = gm2(*example_inputs) + + pyt_model_output = pyt_model(*example_inputs) + + assert torch.allclose( + pyt_model_output, gm1_output, 1e-2, 1e-2 + ), "gm1_output is not correct" + + assert torch.allclose( + gm1_output, gm2_output, 1e-2, 1e-2 + ), "gm2_output is not correct" + + def test_three_ways_to_compile_weight_stripped_engine(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + + settings = { + "use_python_runtime": False, + "enabled_precisions": {torch.float}, + "debug": False, + "min_block_size": 1, + "immutable_weights": False, + "strip_engine_weights": True, + "refit_identical_engine_weights": False, + } + + # 1. Compile with torch_trt.compile using dynamo backend + gm1 = torch_trt.compile( + pyt_model, ir="dynamo", inputs=example_inputs, **settings + ) + gm1_output = gm1(*example_inputs) + + # 2. Compile with torch.compile using tensorrt backend, which is not supported to set strip_engine_weights=True + # gm2 = torch.compile( + # pyt_model, + # backend="tensorrt", + # options=settings, + # ) + # gm2_output = gm2(*example_inputs) + + assertions.assertEqual( + gm1_output.sum(), 0, msg="gm1_output should be all zeros" + ) + + def test_weight_stripped_engine_sizes(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + exp_program = torch.export.export(pyt_model, example_inputs) + weight_included_engine = convert_exported_program_to_serialized_trt_engine( + exp_program, + example_inputs, + immutable_weights=False, + strip_engine_weights=False, + refit_identical_engine_weights=False, + ) + weight_stripped_engine = convert_exported_program_to_serialized_trt_engine( + exp_program, + example_inputs, + immutable_weights=False, + strip_engine_weights=True, + refit_identical_engine_weights=False, + ) + weight_stripped_refit_identical_engine = ( + convert_exported_program_to_serialized_trt_engine( + exp_program, + example_inputs, + immutable_weights=False, + strip_engine_weights=True, + refit_identical_engine_weights=True, + ) + ) + assertions.assertTrue( + len(bytes(weight_included_engine)) > len(bytes(weight_stripped_engine)), + msg=f"Weight-stripped engine size is not smaller than the weight included engine size. Weight included engine size: {len(bytes(weight_included_engine))}, weight-stripped engine size: {len(bytes(weight_stripped_engine))}", + ) + assertions.assertTrue( + len(bytes(weight_included_engine)) + > len(bytes(weight_stripped_refit_identical_engine)), + msg=f"Weight-stripped refit-identical engine size is not smaller than the weight included engine size. Weight included engine size: {len(bytes(weight_included_engine))}, weight-stripped refit-identical engine size: {len(bytes(weight_stripped_refit_identical_engine))}", + ) + + def test_weight_stripped_engine_results(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + # Mark the dim0 of inputs as dynamic + batch = torch.export.Dim("batch", min=1, max=200) + exp_program = torch.export.export( + pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}} + ) + + inputs = [torch.rand((128, 3, 224, 224)).to("cuda")] + + trt_gm = torch_trt.dynamo.compile( + exp_program, + tuple(inputs), + use_python_runtime=True, + enabled_precisions={torch.float}, + debug=False, + min_block_size=1, + immutable_weights=False, + strip_engine_weights=True, + refit_identical_engine_weights=False, + ) + output = trt_gm(*inputs) + assertions.assertEqual( + output.sum(), 0, msg="weight-stripped engine results should be all zeros" + ) + + # Refit the weight-stripped engine with the same weights + refitted_trt_gm = refit_module_weights(trt_gm, exp_program) + refitted_output = refitted_trt_gm(*inputs) + assertions.assertNotEqual( + refitted_output.sum(), + 0, + msg="refitted engine results should not be all zeros", + ) + + compiled_model = torch.compile( + pyt_model, + backend="tensorrt", + options={ + "use_python_runtime": False, + "enabled_precisions": {torch.float}, + "debug": False, + "min_block_size": 1, + "immutable_weights": False, + "cache_built_engines": False, + "reuse_cached_engines": False, + "refit_identical_engine_weights": False, + "strip_engine_weights": False, + }, + ) + compiled_model_output = compiled_model(*inputs) + cos_sim = cosine_similarity(refitted_output, compiled_model_output) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"refitted_output doesn't match with compiled_model_output. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + @unittest.skip( + "For now, torch-trt will save weighted engine if strip_engine_weights is False. In the near future, we plan to save weight-stripped engine regardless of strip_engine_weights, which is pending on TRT's feature development: NVBug #4914602" + ) + def test_engine_caching_saves_weight_stripped_engine(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + exp_program = torch.export.export(pyt_model, example_inputs) + + engine_cache_dir = "/tmp/test_engine_caching_saves_weight_stripped_engine" + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + weight_included_engine = convert_exported_program_to_serialized_trt_engine( + exp_program, + example_inputs, + strip_engine_weights=False, + refit_identical_engine_weights=False, + ) + + trt_gm = torch_trt.dynamo.compile( + exp_program, + tuple(example_inputs), + use_python_runtime=True, + enabled_precisions={torch.float}, + debug=False, + min_block_size=1, + immutable_weights=False, + strip_engine_weights=False, + refit_identical_engine_weights=True, + cache_built_engines=True, + reuse_cached_engines=True, + engine_cache_dir=engine_cache_dir, + ) + output = trt_gm(*example_inputs) + assertions.assertNotEqual(output.sum(), 0, msg="results shouldn't be all zeros") + + blob_path = os.path.join( + engine_cache_dir, os.listdir(engine_cache_dir)[0], "blob.bin" + ) + with open(blob_path, "rb") as f: + blob = f.read() + unpacked = pickle.loads(blob) + cached_stripped_engine = unpacked["serialized_engine"] + + assertions.assertTrue( + len(bytes(weight_included_engine)) > len(bytes(cached_stripped_engine)), + msg=f"cached engine size is not smaller than the weight included engine size. Weight included engine size: {len(bytes(weight_included_engine))}, cached stripped engine size: {len(bytes(cached_stripped_engine))}", + ) + + def test_dynamo_compile_with_refittable_weight_stripped_engine(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + exp_program = torch.export.export(pyt_model, args=example_inputs) + + engine_cache_dir = ( + "/tmp/test_dynamo_compile_with_refittable_weight_stripped_engine" + ) + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + def remove_timing_cache(path=TIMING_CACHE_PATH): + if os.path.exists(path): + os.remove(path) + + # The 1st iteration is to measure the compilation time without engine caching + # The 2nd and 3rd iterations are to measure the compilation time with engine caching. + # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration. + # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine. + inputs = [torch.rand((128, 3, 224, 224)).to("cuda")] + results = [] + times = [] + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + for i in range(3): + remove_timing_cache() + torch._dynamo.reset() + if i == 0: + cache_built_engines = False + reuse_cached_engines = False + else: + cache_built_engines = True + reuse_cached_engines = True + + torch.cuda.synchronize() + start.record() + trt_gm = torch_trt.dynamo.compile( + exp_program, + tuple(inputs), + use_python_runtime=True, + enabled_precisions={torch.float}, + debug=False, + min_block_size=1, + immutable_weights=False, + cache_built_engines=cache_built_engines, + reuse_cached_engines=reuse_cached_engines, + engine_cache_dir=engine_cache_dir, + strip_engine_weights=False, + refit_identical_engine_weights=False, + ) + end.record() + torch.cuda.synchronize() + times.append(start.elapsed_time(end)) + results.append(trt_gm(*inputs)) + + assertions.assertNotEqual( + results[0].sum(), 0, msg="results[0] shouldn't be all zeros" + ) + assertions.assertNotEqual( + results[1].sum(), 0, msg="results[1] shouldn't be all zeros" + ) + assertions.assertNotEqual( + results[2].sum(), 0, msg="results[2] shouldn't be all zeros" + ) + + cos_sim = cosine_similarity(results[0], results[1]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + cos_sim = cosine_similarity(results[1], results[2]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + assertions.assertTrue( + times[0] > times[2], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", + ) + + def test_torch_compile_with_refittable_weight_stripped_engine(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + + engine_cache_dir = ( + "/tmp/test_torch_compile_with_refittable_weight_stripped_engine" + ) + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + def remove_timing_cache(path=TIMING_CACHE_PATH): + if os.path.exists(path): + os.remove(path) + + # The 1st iteration is to measure the compilation time without engine caching + # The 2nd and 3rd iterations are to measure the compilation time with engine caching. + # Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration. + # The 3rd iteration should be faster than the 1st iteration because it loads the cached engine. + inputs = [torch.rand((128, 3, 224, 224)).to("cuda")] + results = [] + times = [] + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + for i in range(3): + remove_timing_cache() + torch._dynamo.reset() + if i == 0: + cache_built_engines = False + reuse_cached_engines = False + else: + cache_built_engines = True + reuse_cached_engines = True + + torch.cuda.synchronize() + start.record() + compiled_model = torch.compile( + pyt_model, + backend="tensorrt", + options={ + "use_python_runtime": False, + "enabled_precisions": {torch.float}, + "debug": False, + "min_block_size": 1, + "immutable_weights": False, + "cache_built_engines": cache_built_engines, + "reuse_cached_engines": reuse_cached_engines, + "engine_cache_dir": engine_cache_dir, + "strip_engine_weights": False, + "refit_identical_engine_weights": True, + }, + ) + results.append(compiled_model(*inputs)) # trigger the compilation + end.record() + torch.cuda.synchronize() + times.append(start.elapsed_time(end)) + + assertions.assertNotEqual( + results[0].sum(), 0, msg="results[0] shouldn't be all zeros" + ) + assertions.assertNotEqual( + results[1].sum(), 0, msg="results[1] shouldn't be all zeros" + ) + assertions.assertNotEqual( + results[2].sum(), 0, msg="results[2] shouldn't be all zeros" + ) + + cos_sim = cosine_similarity(results[0], results[1]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + cos_sim = cosine_similarity(results[1], results[2]) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + assertions.assertTrue( + times[0] > times[2], + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", + ) + + def test_different_args_dont_share_cached_engine(self): + class MyModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 4, 3, stride=1, bias=True) + self.relu = torch.nn.ReLU() + + def forward(self, x): + out = self.conv(x) + out = self.relu(out) + return out + + pyt_model = MyModel().eval().to("cuda") + + engine_cache_dir = "/tmp/test_different_args_dont_share_cached_engine" + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + inputs = [torch.rand((4, 3, 32, 32)).to("cuda")] + + for i in range(2): + if i == 0: + strip_engine_weights = False + else: + strip_engine_weights = True + + compiled_model = torch.compile( + pyt_model, + backend="tensorrt", + options={ + "use_python_runtime": True, + "enabled_precisions": {torch.float}, + "debug": False, + "min_block_size": 1, + "immutable_weights": False, + "cache_built_engines": True, + "reuse_cached_engines": True, + "engine_cache_dir": engine_cache_dir, + "strip_engine_weights": strip_engine_weights, + }, + ) + compiled_model(*inputs) + + assertions.assertEqual( + len(os.listdir(engine_cache_dir)), + 2, + msg=f"It has {len(os.listdir(engine_cache_dir))} cached engine(s) but should have 2 engines", + ) + + def test_constant_mul_in_refitting(self): + class MyModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.weight = torch.tensor(0.5, requires_grad=False) + + def forward(self, x): + out = x * self.weight + return out + + pyt_model = MyModel().eval().cuda() + inputs = [torch.randn((1, 3, 4, 4)).to("cuda")] + + exp_program = torch.export.export(pyt_model, args=tuple(inputs)) + + trt_module = torch_trt.compile( + pyt_model, + ir="dynamo", + inputs=tuple(inputs), + min_block_size=1, + immutable_weights=False, + use_python_runtime=True, + strip_engine_weights=True, + refit_identical_engine_weights=False, + ) + + refitted_trt_gm = refit_module_weights(trt_module, exp_program) + + outputs_pyt = pyt_model(*inputs) + outputs_trt = refitted_trt_gm(*inputs) + + cos_sim = cosine_similarity(outputs_pyt, outputs_trt) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + def test_two_TRTRuntime_in_refitting(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + batch = torch.export.Dim("batch", min=1, max=200) + exp_program = torch.export.export( + pyt_model, args=example_inputs, dynamic_shapes={"x": {0: batch}} + ) + inputs = [torch.rand((128, 3, 224, 224)).to("cuda")] + + pyt_results = pyt_model(*inputs) + + for i in range(2): + if i == 0: + use_python_runtime = True + else: + use_python_runtime = False + + trt_gm = torch_trt.dynamo.compile( + exp_program, + tuple(inputs), + use_python_runtime=use_python_runtime, + debug=False, + min_block_size=1, + immutable_weights=False, + strip_engine_weights=True, + refit_identical_engine_weights=False, + ) + + output = trt_gm(*inputs) + assertions.assertEqual(output.sum(), 0, msg="results should be all zeros") + + refitted_trt_gm = refit_module_weights(trt_gm, exp_program) + refitted_output = refitted_trt_gm(*inputs) + cos_sim = cosine_similarity(pyt_results, refitted_output) + assertions.assertTrue( + cos_sim > COSINE_THRESHOLD, + msg=f"{'PythonTorchTensorRTModule' if use_python_runtime else 'TorchTensorRTModule'} outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + ) + + @unittest.skip("Waiting for implementation") + def test_refit_identical_engine_weights(self): + pyt_model = models.resnet18(pretrained=True).eval().to("cuda") + example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),) + exp_program = torch.export.export(pyt_model, example_inputs) + + engine_cache_dir = "/tmp/test_refit_identical_engine_weights" + if os.path.exists(engine_cache_dir): + shutil.rmtree(engine_cache_dir) + + trt_gm = torch_trt.dynamo.compile( + exp_program, + tuple(example_inputs), + use_python_runtime=True, + enabled_precisions={torch.float}, + debug=False, + min_block_size=1, + immutable_weights=False, + strip_engine_weights=True, + refit_identical_engine_weights=True, + ) + output = trt_gm(*example_inputs) + + pyt_model2 = models.resnet18(pretrained=False).eval().to("cuda") + exp_program2 = torch.export.export(pyt_model2, example_inputs) + + try: + refit_module_weights(trt_gm, exp_program) + except Exception as e: + assertions.fail( + f"Refitting the engine with the same weights failed with the following error: {e}" + ) + + try: + refit_module_weights(trt_gm, exp_program2) + assertions.fail( + "Refitting the engine with different weights should have failed but it didn't" + ) + except Exception as e: + pass diff --git a/tests/py/dynamo/runtime/test_mutable_torchtrt_module.py b/tests/py/dynamo/runtime/test_mutable_torchtrt_module.py index b52530efd1..f2bcaf7ede 100644 --- a/tests/py/dynamo/runtime/test_mutable_torchtrt_module.py +++ b/tests/py/dynamo/runtime/test_mutable_torchtrt_module.py @@ -49,7 +49,7 @@ def test_resnet18(): compile_spec = { "use_python_runtime": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -89,7 +89,7 @@ def test_save(): compile_spec = { "use_python_runtime": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -123,7 +123,7 @@ def test_resnet18_modify_attribute(): compile_spec = { "use_python_runtime": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -164,7 +164,7 @@ def test_resnet18_modify_attribute_no_refit(): compile_spec = { "use_python_runtime": False, "enabled_precisions": {torch.float32}, - "make_refittable": True, + "immutable_weights": False, } model = models.resnet18(pretrained=True).eval().to("cuda") @@ -243,7 +243,7 @@ def forward(self, x, b=5, c=None, d=None): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", - "make_refittable": True, + "immutable_weights": False, } mutable_module = torch_trt.MutableTorchTensorRTModule(model, **compile_spec) @@ -304,7 +304,7 @@ def set_weights(self): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", - "make_refittable": True, + "immutable_weights": False, } mutable_module = torch_trt.MutableTorchTensorRTModule(model, **compile_spec) @@ -367,7 +367,7 @@ def set_layer(self): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", - "make_refittable": True, + "immutable_weights": False, } mutable_module = torch_trt.MutableTorchTensorRTModule(model, **compile_spec) @@ -436,7 +436,7 @@ def forward(self, x, b=5, c=None, d=None): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", - "make_refittable": True, + "immutable_weights": False, } mutable_module = torch_trt.MutableTorchTensorRTModule(model, **compile_spec) diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 49ad6f473a..142a021609 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -67,20 +67,20 @@ http_archive( http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "adff1cd5abe5d87013806172351e58fd024e5bf0fc61d49ef4b84cd38ed99081", - strip_prefix = "TensorRT-10.3.0.26", + sha256 = "33d3c2f3f4c84dc7991a4337a6fde9ed33f5c8e5c4f03ac2eb6b994a382b03a0", + strip_prefix = "TensorRT-10.6.0.26", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz", ], ) http_archive( name = "tensorrt_win", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "2bb4bcb79e8c33575816d874b0512ea28c302af1c06ee6d224da71aa182f75e0", - strip_prefix = "TensorRT-10.3.0.26", + sha256 = "6c6d92c108a1b3368423e8f69f08d31269830f1e4c9da43b37ba34a176797254", + strip_prefix = "TensorRT-10.6.0.26", urls = [ - "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip", + "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip", ], ) diff --git a/uv.lock b/uv.lock index 493873c773..635a8b92dc 100644 --- a/uv.lock +++ b/uv.lock @@ -75,10 +75,9 @@ wheels = [ [[package]] name = "certifi" version = "2024.8.30" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/ee/9b19140fe824b367c04c5e1b369942dd754c4c5462d5674002f75c4dedc1/certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9", size = 168507 } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } wheels = [ - { url = "https://files.pythonhosted.org/packages/12/90/3c9ff0512038035f59d279fddeb79f5f1eccd8859f06d6163c58798b9487/certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", size = 167321 }, + { url = "https://download.pytorch.org/whl/nightly/certifi-2024.8.30-py3-none-any.whl" }, ] [[package]] @@ -248,10 +247,9 @@ wheels = [ [[package]] name = "filelock" version = "3.16.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/db/3ef5bb276dae18d6ec2124224403d1d67bccdbefc17af4cc8f553e341ab1/filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435", size = 18037 } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, + { url = "https://download.pytorch.org/whl/nightly/filelock-3.16.1-py3-none-any.whl" }, ] [[package]] @@ -293,10 +291,9 @@ wheels = [ [[package]] name = "idna" version = "3.10" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, + { url = "https://download.pytorch.org/whl/nightly/idna-3.10-py3-none-any.whl" }, ] [[package]] @@ -387,6 +384,7 @@ dependencies = [ ] wheels = [ { url = "https://download.pytorch.org/whl/nightly/Jinja2-3.1.4-py3-none-any.whl" }, + { url = "https://download.pytorch.org/whl/nightly/jinja2-3.1.4-py3-none-any.whl" }, ] [[package]] @@ -416,7 +414,7 @@ version = "2.1.5" source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } sdist = { url = "https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.5.tar.gz" } wheels = [ - { url = "https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46" }, { url = "https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, { url = "https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, { url = "https://download.pytorch.org/whl/nightly/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, @@ -701,10 +699,9 @@ wheels = [ [[package]] name = "packaging" version = "24.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/65/50db4dda066951078f0a96cf12f4b9ada6e4b811516bf0262c0f4f7064d4/packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", size = 148788 } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } wheels = [ - { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, + { url = "https://download.pytorch.org/whl/nightly/packaging-24.1-py3-none-any.whl" }, ] [[package]] @@ -841,12 +838,11 @@ wheels = [ [[package]] name = "psutil" version = "6.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/c7/8c6872f7372eb6a6b2e4708b88419fb46b857f7a2e1892966b851cc79fc9/psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2", size = 508067 } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } wheels = [ - { url = "https://files.pythonhosted.org/packages/35/56/72f86175e81c656a01c4401cd3b1c923f891b31fbcebe98985894176d7c9/psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0", size = 287478 }, - { url = "https://files.pythonhosted.org/packages/19/74/f59e7e0d392bc1070e9a70e2f9190d652487ac115bb16e2eff6b22ad1d24/psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd", size = 290455 }, - { url = "https://files.pythonhosted.org/packages/cd/5f/60038e277ff0a9cc8f0c9ea3d0c5eb6ee1d2470ea3f9389d776432888e47/psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132", size = 292046 }, + { url = "https://download.pytorch.org/whl/nightly/psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, ] [[package]] @@ -1006,109 +1002,71 @@ wheels = [ [[package]] name = "pyyaml" version = "6.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463 }, - { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280 }, - { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239 }, - { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802 }, - { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527 }, - { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 }, - { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 }, - { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 }, - { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 }, - { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 }, - { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, - { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, - { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, - { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, - { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, - { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, - { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, - { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, - { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, - { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, - { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 }, - { url = "https://files.pythonhosted.org/packages/e9/6c/6e1b7f40181bc4805e2e07f4abc10a88ce4648e7e95ff1abe4ae4014a9b2/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", size = 722614 }, - { url = "https://files.pythonhosted.org/packages/3d/32/e7bd8535d22ea2874cef6a81021ba019474ace0d13a4819c2a4bce79bd6a/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", size = 737360 }, - { url = "https://files.pythonhosted.org/packages/d7/12/7322c1e30b9be969670b672573d45479edef72c9a0deac3bb2868f5d7469/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", size = 699006 }, - { url = "https://files.pythonhosted.org/packages/82/72/04fcad41ca56491995076630c3ec1e834be241664c0c09a64c9a2589b507/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", size = 723577 }, +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } +wheels = [ + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, ] [[package]] name = "regex" version = "2024.9.11" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/38/148df33b4dbca3bd069b963acab5e0fa1a9dbd6820f8c322d0dd6faeff96/regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd", size = 399403 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/87/1ce4a5357216b19b7055e7d3b0efc75a6e426133bf1e7d094321df514257/regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c", size = 783177 }, - { url = "https://files.pythonhosted.org/packages/3c/65/b9f002ab32f7b68e7d1dcabb67926f3f47325b8dbc22cc50b6a043e1d07c/regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8", size = 823193 }, - { url = "https://files.pythonhosted.org/packages/22/91/8339dd3abce101204d246e31bc26cdd7ec07c9f91598472459a3a902aa41/regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35", size = 809950 }, - { url = "https://files.pythonhosted.org/packages/cb/19/556638aa11c2ec9968a1da998f07f27ec0abb9bf3c647d7c7985ca0b8eea/regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71", size = 782661 }, - { url = "https://files.pythonhosted.org/packages/d1/e9/7a5bc4c6ef8d9cd2bdd83a667888fc35320da96a4cc4da5fa084330f53db/regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8", size = 772348 }, - { url = "https://files.pythonhosted.org/packages/f1/0b/29f2105bfac3ed08e704914c38e93b07c784a6655f8a015297ee7173e95b/regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a", size = 697460 }, - { url = "https://files.pythonhosted.org/packages/71/3a/52ff61054d15a4722605f5872ad03962b319a04c1ebaebe570b8b9b7dde1/regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d", size = 769151 }, - { url = "https://files.pythonhosted.org/packages/97/07/37e460ab5ca84be8e1e197c3b526c5c86993dcc9e13cbc805c35fc2463c1/regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137", size = 777478 }, - { url = "https://files.pythonhosted.org/packages/65/7b/953075723dd5ab00780043ac2f9de667306ff9e2a85332975e9f19279174/regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6", size = 845373 }, - { url = "https://files.pythonhosted.org/packages/40/b8/3e9484c6230b8b6e8f816ab7c9a080e631124991a4ae2c27a81631777db0/regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca", size = 845369 }, - { url = "https://files.pythonhosted.org/packages/b7/99/38434984d912edbd2e1969d116257e869578f67461bd7462b894c45ed874/regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a", size = 773935 }, - { url = "https://files.pythonhosted.org/packages/b1/51/91a5ebdff17f9ec4973cb0aa9d37635efec1c6868654bbc25d1543aca4ec/regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679", size = 791779 }, - { url = "https://files.pythonhosted.org/packages/07/4a/022c5e6f0891a90cd7eb3d664d6c58ce2aba48bff107b00013f3d6167069/regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4", size = 832605 }, - { url = "https://files.pythonhosted.org/packages/ac/1c/3793990c8c83ca04e018151ddda83b83ecc41d89964f0f17749f027fc44d/regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664", size = 818556 }, - { url = "https://files.pythonhosted.org/packages/e9/5c/8b385afbfacb853730682c57be56225f9fe275c5bf02ac1fc88edbff316d/regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50", size = 792808 }, - { url = "https://files.pythonhosted.org/packages/9b/8b/a4723a838b53c771e9240951adde6af58c829fb6a6a28f554e8131f53839/regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199", size = 781115 }, - { url = "https://files.pythonhosted.org/packages/83/5f/031a04b6017033d65b261259c09043c06f4ef2d4eac841d0649d76d69541/regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4", size = 778155 }, - { url = "https://files.pythonhosted.org/packages/fd/cd/4660756070b03ce4a66663a43f6c6e7ebc2266cc6b4c586c167917185eb4/regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd", size = 784614 }, - { url = "https://files.pythonhosted.org/packages/93/8d/65b9bea7df120a7be8337c415b6d256ba786cbc9107cebba3bf8ff09da99/regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f", size = 853744 }, - { url = "https://files.pythonhosted.org/packages/96/a7/fba1eae75eb53a704475baf11bd44b3e6ccb95b316955027eb7748f24ef8/regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96", size = 855890 }, - { url = "https://files.pythonhosted.org/packages/45/14/d864b2db80a1a3358534392373e8a281d95b28c29c87d8548aed58813910/regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1", size = 781887 }, - { url = "https://files.pythonhosted.org/packages/ca/fa/521eb683b916389b4975337873e66954e0f6d8f91bd5774164a57b503185/regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64", size = 795181 }, - { url = "https://files.pythonhosted.org/packages/28/db/63047feddc3280cc242f9c74f7aeddc6ee662b1835f00046f57d5630c827/regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42", size = 835842 }, - { url = "https://files.pythonhosted.org/packages/e3/94/86adc259ff8ec26edf35fcca7e334566c1805c7493b192cb09679f9c3dee/regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766", size = 823533 }, - { url = "https://files.pythonhosted.org/packages/29/52/84662b6636061277cb857f658518aa7db6672bc6d1a3f503ccd5aefc581e/regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a", size = 797037 }, - { url = "https://files.pythonhosted.org/packages/c3/2a/cd4675dd987e4a7505f0364a958bc41f3b84942de9efaad0ef9a2646681c/regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9", size = 784106 }, - { url = "https://files.pythonhosted.org/packages/6f/75/3ea7ec29de0bbf42f21f812f48781d41e627d57a634f3f23947c9a46e303/regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d", size = 782468 }, - { url = "https://files.pythonhosted.org/packages/d3/67/15519d69b52c252b270e679cb578e22e0c02b8dd4e361f2b04efcc7f2335/regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822", size = 790324 }, - { url = "https://files.pythonhosted.org/packages/9c/71/eff77d3fe7ba08ab0672920059ec30d63fa7e41aa0fb61c562726e9bd721/regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0", size = 860214 }, - { url = "https://files.pythonhosted.org/packages/81/11/e1bdf84a72372e56f1ea4b833dd583b822a23138a616ace7ab57a0e11556/regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a", size = 859420 }, - { url = "https://files.pythonhosted.org/packages/ea/75/9753e9dcebfa7c3645563ef5c8a58f3a47e799c872165f37c55737dadd3e/regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a", size = 787333 }, - { url = "https://files.pythonhosted.org/packages/b9/54/9fe8f9aec5007bbbbce28ba3d2e3eaca425f95387b7d1e84f0d137d25237/regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86", size = 795337 }, - { url = "https://files.pythonhosted.org/packages/b2/e7/6b2f642c3cded271c4f16cc4daa7231be544d30fe2b168e0223724b49a61/regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85", size = 835848 }, - { url = "https://files.pythonhosted.org/packages/cd/9e/187363bdf5d8c0e4662117b92aa32bf52f8f09620ae93abc7537d96d3311/regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963", size = 823503 }, - { url = "https://files.pythonhosted.org/packages/f8/10/601303b8ee93589f879664b0cfd3127949ff32b17f9b6c490fb201106c4d/regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6", size = 797049 }, - { url = "https://files.pythonhosted.org/packages/ef/1c/ea200f61ce9f341763f2717ab4daebe4422d83e9fd4ac5e33435fd3a148d/regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802", size = 784144 }, - { url = "https://files.pythonhosted.org/packages/d8/5c/d2429be49ef3292def7688401d3deb11702c13dcaecdc71d2b407421275b/regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29", size = 782483 }, - { url = "https://files.pythonhosted.org/packages/12/d9/cbc30f2ff7164f3b26a7760f87c54bf8b2faed286f60efd80350a51c5b99/regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8", size = 790320 }, - { url = "https://files.pythonhosted.org/packages/19/1d/43ed03a236313639da5a45e61bc553c8d41e925bcf29b0f8ecff0c2c3f25/regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84", size = 860435 }, - { url = "https://files.pythonhosted.org/packages/34/4f/5d04da61c7c56e785058a46349f7285ae3ebc0726c6ea7c5c70600a52233/regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554", size = 859571 }, - { url = "https://files.pythonhosted.org/packages/12/7f/8398c8155a3c70703a8e91c29532558186558e1aea44144b382faa2a6f7a/regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8", size = 787398 }, - { url = "https://files.pythonhosted.org/packages/b4/21/feaa5b0d3e5e3bad659cd7d640e6b76cc0719504dbd9bc8f67cfa21bde82/regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3", size = 782747 }, - { url = "https://files.pythonhosted.org/packages/bb/89/93516f0aa3e8a9366df2cf79bb0290abdc7dbe5dd27373d9bea0978b7ba6/regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199", size = 822700 }, - { url = "https://files.pythonhosted.org/packages/d5/e7/79c04ccb81cee2831d9d4499274919b9153c1741ce8b3421d69cb0032f1b/regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8", size = 809327 }, - { url = "https://files.pythonhosted.org/packages/01/e6/a7256c99c312b68f01cfd4f8eae6e770906fffb3832ecb66f35ca5b86b96/regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca", size = 781970 }, - { url = "https://files.pythonhosted.org/packages/18/c4/29e8b6ff2208775858b5d4a2caa6428d40b5fade95aee426de7e42ffff39/regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9", size = 771885 }, - { url = "https://files.pythonhosted.org/packages/95/78/7acd8882ac335f1f5ae1756417739fda3053e0bcacea8716ae4a04e74553/regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a", size = 696978 }, - { url = "https://files.pythonhosted.org/packages/cb/d2/1d44f9b4a3d33ff5773fd79bea53e992d00f81e0af6f1f4e2efac1e4d897/regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39", size = 768655 }, - { url = "https://files.pythonhosted.org/packages/79/ba/92ef9d3b8f59cb3df9febef07098dfb4a43c3bdcf35b1084c2009b0a93bf/regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba", size = 776922 }, - { url = "https://files.pythonhosted.org/packages/16/71/d964c0c9d447f04bbe6ab5eafd220208e7d52b9608e452e6fcad553b38e0/regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664", size = 845014 }, - { url = "https://files.pythonhosted.org/packages/83/cb/a378cdc2468782eefefa50183bbeabc3357fb588d4109d845f0a56e68713/regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89", size = 844916 }, - { url = "https://files.pythonhosted.org/packages/b9/f0/82ea1565a6639270cfe96263002b3d91084a1db5048d9b6084f83bd5972d/regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35", size = 773409 }, +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } +wheels = [ + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl" }, ] [[package]] name = "requests" version = "2.32.3" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } dependencies = [ { name = "certifi", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "charset-normalizer", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "idna", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "urllib3", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, + { url = "https://download.pytorch.org/whl/nightly/requests-2.32.3-py3-none-any.whl" }, ] [[package]] @@ -1152,59 +1110,38 @@ wheels = [ [[package]] name = "safetensors" version = "0.4.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cb/46/a1c56ed856c6ac3b1a8b37abe5be0cac53219367af1331e721b04d122577/safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310", size = 65702 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/67/49556aeacc00df353767ed31d68b492fecf38c3f664c52692e4d92aa0032/safetensors-0.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6885016f34bef80ea1085b7e99b3c1f92cb1be78a49839203060f67b40aee761", size = 441382 }, - { url = "https://files.pythonhosted.org/packages/5d/ce/e9f4869a37bb11229e6cdb4e73a6ef23b4f360eee9dca5f7e40982779704/safetensors-0.4.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:133620f443450429322f238fda74d512c4008621227fccf2f8cf4a76206fea7c", size = 439001 }, - { url = "https://files.pythonhosted.org/packages/a0/27/aee8cf031b89c34caf83194ec6b7f2eed28d053fff8b6da6d00c85c56035/safetensors-0.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4fb3e0609ec12d2a77e882f07cced530b8262027f64b75d399f1504ffec0ba56", size = 478026 }, - { url = "https://files.pythonhosted.org/packages/da/33/1d9fc4805c623636e7d460f28eec92ebd1856f7a552df8eb78398a1ef4de/safetensors-0.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0f1dd769f064adc33831f5e97ad07babbd728427f98e3e1db6902e369122737", size = 495545 }, - { url = "https://files.pythonhosted.org/packages/b9/df/6f766b56690709d22e83836e4067a1109a7d84ea152a6deb5692743a2805/safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6d156bdb26732feada84f9388a9f135528c1ef5b05fae153da365ad4319c4c5", size = 435016 }, - { url = "https://files.pythonhosted.org/packages/90/fa/7bc3f18086201b1e55a42c88b822ae197d0158e12c54cd45c887305f1b7e/safetensors-0.4.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e347d77e2c77eb7624400ccd09bed69d35c0332f417ce8c048d404a096c593b", size = 456273 }, - { url = "https://files.pythonhosted.org/packages/3e/59/2ae50150d37a65c1c5f01aec74dc737707b8bbecdc76307e5a1a12c8a376/safetensors-0.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9f556eea3aec1d3d955403159fe2123ddd68e880f83954ee9b4a3f2e15e716b6", size = 619669 }, - { url = "https://files.pythonhosted.org/packages/fe/43/10f0bb597aef62c9c154152e265057089f3c729bdd980e6c32c3ec2407a4/safetensors-0.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9483f42be3b6bc8ff77dd67302de8ae411c4db39f7224dec66b0eb95822e4163", size = 605212 }, - { url = "https://files.pythonhosted.org/packages/39/83/c4a7ce01d626e46ea2b45887f2e59b16441408031e2ce2f9fe01860c6946/safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42", size = 441093 }, - { url = "https://files.pythonhosted.org/packages/47/26/cc52de647e71bd9a0b0d78ead0d31d9c462b35550a817aa9e0cab51d6db4/safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688", size = 438960 }, - { url = "https://files.pythonhosted.org/packages/06/78/332538546775ee97e749867df2d58f2282d9c48a1681e4891eed8b94ec94/safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68", size = 478031 }, - { url = "https://files.pythonhosted.org/packages/d9/03/a3c8663f1ddda54e624ecf43fce651659b49e8e1603c52c3e464b442acfa/safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df", size = 494754 }, - { url = "https://files.pythonhosted.org/packages/e6/ee/69e498a892f208bd1da4104d4b9be887f8611bf4942144718b6738482250/safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090", size = 435013 }, - { url = "https://files.pythonhosted.org/packages/a2/61/f0cfce984515b86d1260f556ba3b782158e2855e6a318446ac2613786fa9/safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943", size = 455984 }, - { url = "https://files.pythonhosted.org/packages/e7/a9/3e3b48fcaade3eb4e347d39ebf0bd44291db21a3e4507854b42a7cb910ac/safetensors-0.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0", size = 619513 }, - { url = "https://files.pythonhosted.org/packages/80/23/2a7a1be24258c0e44c1d356896fd63dc0545a98d2d0184925fa09cd3ec76/safetensors-0.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f", size = 604841 }, - { url = "https://files.pythonhosted.org/packages/d6/6c/7e04b7626809fc63f3698f4c50e43aff2864b40089aa4506c918a75b8eed/safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f", size = 441134 }, - { url = "https://files.pythonhosted.org/packages/58/2b/ffe7c86a277e6c1595fbdf415cfe2903f253f574a5405e93fda8baaa582c/safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461", size = 438467 }, - { url = "https://files.pythonhosted.org/packages/67/9c/f271bd804e08c7fda954d17b70ff281228a88077337a9e70feace4f4cc93/safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea", size = 476566 }, - { url = "https://files.pythonhosted.org/packages/4c/ad/4cf76a3e430a8a26108407fa6cb93e6f80d996a5cb75d9540c8fe3862990/safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed", size = 492253 }, - { url = "https://files.pythonhosted.org/packages/d9/40/a6f75ea449a9647423ec8b6f72c16998d35aa4b43cb38536ac060c5c7bf5/safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c", size = 434769 }, - { url = "https://files.pythonhosted.org/packages/52/47/d4b49b1231abf3131f7bb0bc60ebb94b27ee33e0a1f9569da05f8ac65dee/safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1", size = 457166 }, - { url = "https://files.pythonhosted.org/packages/c3/cd/006468b03b0fa42ff82d795d47c4193e99001e96c3f08bd62ef1b5cab586/safetensors-0.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4", size = 619280 }, - { url = "https://files.pythonhosted.org/packages/22/4d/b6208d918e83daa84b424c0ac3191ae61b44b3191613a3a5a7b38f94b8ad/safetensors-0.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646", size = 605390 }, - { url = "https://files.pythonhosted.org/packages/a4/c7/4fda8a0ebb96662550433378f4a74c677fa5fc4d0a43a7ec287d1df254a9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679", size = 441378 }, - { url = "https://files.pythonhosted.org/packages/14/31/9abb431f6209de9c80dab83e1112ebd769f1e32e7ab7ab228a02424a4693/safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89", size = 438831 }, - { url = "https://files.pythonhosted.org/packages/37/37/99bfb195578a808b8d045159ee9264f8da58d017ac0701853dcacda14d4e/safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f", size = 477112 }, - { url = "https://files.pythonhosted.org/packages/7d/05/fac3ef107e60d2a78532bed171a91669d4bb259e1236f5ea8c67a6976c75/safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a", size = 493373 }, - { url = "https://files.pythonhosted.org/packages/cf/7a/825800ee8c68214b4fd3506d5e19209338c69b41e01c6e14dd13969cc8b9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3", size = 435422 }, - { url = "https://files.pythonhosted.org/packages/5e/6c/7a3233c08bde558d6c33a41219119866cb596139a4673cc6c24024710ffd/safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35", size = 457382 }, - { url = "https://files.pythonhosted.org/packages/a0/58/0b7bcba3788ff503990cf9278d611b56c029400612ba93e772c987b5aa03/safetensors-0.4.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523", size = 619301 }, - { url = "https://files.pythonhosted.org/packages/82/cc/9c2cf58611daf1c83ce5d37f9de66353e23fcda36008b13fd3409a760aa3/safetensors-0.4.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142", size = 605580 }, - { url = "https://files.pythonhosted.org/packages/08/94/7760694760f1e5001bd62c93155b8b7ccb652d1f4d0161d1e72b5bf9581a/safetensors-0.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:139fbee92570ecea774e6344fee908907db79646d00b12c535f66bc78bd5ea2c", size = 442391 }, - { url = "https://files.pythonhosted.org/packages/03/1c/0db6e6e5cb293907b2242447b48cc09f31478aa02f08773155c2a2db22de/safetensors-0.4.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c36302c1c69eebb383775a89645a32b9d266878fab619819ce660309d6176c9b", size = 440015 }, - { url = "https://files.pythonhosted.org/packages/15/58/9658bf7ca3a4e77577fbd2c7afda4701c558db66b01daf7cd4d9dbd9781e/safetensors-0.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d641f5b8149ea98deb5ffcf604d764aad1de38a8285f86771ce1abf8e74c4891", size = 478099 }, - { url = "https://files.pythonhosted.org/packages/9e/fa/44d9723a988dd54f43a5fcfa6b4d3a721e9294bb55d1c3e539a88619f1b2/safetensors-0.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b4db6a61d968de73722b858038c616a1bebd4a86abe2688e46ca0cc2d17558f2", size = 497170 }, - { url = "https://files.pythonhosted.org/packages/5d/80/81ba44fc82afbf5ca553913ac49460e325dc5cf00c317b34c14d43ebd76b/safetensors-0.4.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b75a616e02f21b6f1d5785b20cecbab5e2bd3f6358a90e8925b813d557666ec1", size = 436076 }, - { url = "https://files.pythonhosted.org/packages/2e/ad/7880a359b0f93322689804bdbe1e9a3110652963478712933ff04a3d45c3/safetensors-0.4.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:788ee7d04cc0e0e7f944c52ff05f52a4415b312f5efd2ee66389fb7685ee030c", size = 456901 }, - { url = "https://files.pythonhosted.org/packages/89/4f/0b61e4add7ea9dfa8141d0bb1b8357e3a08730a020c3a287f0e889c386b5/safetensors-0.4.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:87bc42bd04fd9ca31396d3ca0433db0be1411b6b53ac5a32b7845a85d01ffc2e", size = 620159 }, - { url = "https://files.pythonhosted.org/packages/a9/60/544687daf8ce8dc9a74260992ac058d7e3f20c91eada5ca232898d005149/safetensors-0.4.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4037676c86365a721a8c9510323a51861d703b399b78a6b4486a54a65a975fca", size = 605993 }, - { url = "https://files.pythonhosted.org/packages/ae/88/3068e1bb16f5e9f9068901de3cf7b3db270b9bfe6e7d51d4b55c1da0425d/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd8a1f6d2063a92cd04145c7fd9e31a1c7d85fbec20113a14b487563fdbc0597", size = 442311 }, - { url = "https://files.pythonhosted.org/packages/f7/15/a2bb77ebbaa76b61ec2e9f731fe4db7f9473fd855d881957c51b3a168892/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:951d2fcf1817f4fb0ef0b48f6696688a4e852a95922a042b3f96aaa67eedc920", size = 436678 }, - { url = "https://files.pythonhosted.org/packages/ec/79/9608c4546cdbfe3860dd7aa59e3562c9289113398b1a0bd89b68ce0a9d41/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ac85d9a8c1af0e3132371d9f2d134695a06a96993c2e2f0bbe25debb9e3f67a", size = 457316 }, - { url = "https://files.pythonhosted.org/packages/0f/23/b17b483f2857835962ad33e38014efd4911791187e177bc23b057d35bee8/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e3cec4a29eb7fe8da0b1c7988bc3828183080439dd559f720414450de076fcab", size = 620565 }, - { url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 }, - { url = "https://files.pythonhosted.org/packages/b3/ff/b26d78b6100a08e57a1986ab71a2f9f093ba9943626f4967cd514cd43de2/safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0b6453c54c57c1781292c46593f8a37254b8b99004c68d6c3ce229688931a22", size = 442275 }, - { url = "https://files.pythonhosted.org/packages/71/29/6ac541358a07ec593ec9e88636908010bc9bf56c8018e0d25b4481adb64a/safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adaa9c6dead67e2dd90d634f89131e43162012479d86e25618e821a03d1eb1dc", size = 437217 }, - { url = "https://files.pythonhosted.org/packages/2b/f8/258564b71fe95d0117356e6915b1c0128f1ec3031cf8522a28f9d2108b47/safetensors-0.4.5-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73e7d408e9012cd17511b382b43547850969c7979efc2bc353f317abaf23c84c", size = 458132 }, - { url = "https://files.pythonhosted.org/packages/18/ac/510eebf3ac521fec3b0ea78e654e22d85de3406613209d20133b5b3cca33/safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:775409ce0fcc58b10773fdb4221ed1eb007de10fe7adbdf8f5e8a56096b6f0bc", size = 621171 }, - { url = "https://files.pythonhosted.org/packages/e0/c8/a02b635e39f3b904f52aff099505bdfbb40252d2d18a05e7fedc0bb64a28/safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:834001bed193e4440c4a3950a31059523ee5090605c907c66808664c932b549c", size = 607366 }, +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } +wheels = [ + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/nightly/safetensors-0.4.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl" }, ] [[package]] @@ -1400,7 +1337,7 @@ wheels = [ [[package]] name = "torch-tensorrt" -version = "2.6.0.dev0+0de0b1651" +version = "2.6.0.dev0+1f92a78a0" source = { editable = "." } dependencies = [ { name = "numpy", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, @@ -1449,9 +1386,9 @@ requires-dist = [ { name = "packaging", specifier = ">=23" }, { name = "rich", marker = "extra == 'monitoring-tools'", specifier = ">=13.7.1" }, { name = "rich", extras = ["jupyter"], marker = "extra == 'jupyter'", specifier = ">=13.7.1" }, - { name = "tensorrt-cu12", specifier = "==10.3.0" }, - { name = "tensorrt-cu12-bindings", specifier = "==10.3.0" }, - { name = "tensorrt-cu12-libs", specifier = "==10.3.0" }, + { name = "tensorrt-cu12", specifier = ">=10.3.0,<=10.6.0" }, + { name = "tensorrt-cu12-bindings", specifier = ">=10.3.0,<=10.6.0" }, + { name = "tensorrt-cu12-libs", specifier = ">=10.3.0,<=10.6.0" }, { name = "torch", specifier = ">=2.6.0.dev0,<2.7.0" }, { name = "torchvision", marker = "extra == 'torchvision'" }, { name = "typing-extensions", specifier = ">=4.7.0" }, @@ -1505,13 +1442,12 @@ wheels = [ [[package]] name = "tqdm" version = "4.66.5" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } dependencies = [ { name = "colorama", marker = "(platform_system == 'Windows' and sys_platform == 'linux') or (platform_system == 'Windows' and sys_platform == 'windows')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/58/83/6ba9844a41128c62e810fddddd72473201f3eacde02046066142a2d96cc5/tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad", size = 169504 } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/5d/acf5905c36149bbaec41ccf7f2b68814647347b72075ac0b1fe3022fdc73/tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd", size = 78351 }, + { url = "https://download.pytorch.org/whl/nightly/tqdm-4.66.5-py3-none-any.whl" }, ] [[package]] @@ -1568,10 +1504,9 @@ wheels = [ [[package]] name = "urllib3" version = "2.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } +source = { registry = "https://download.pytorch.org/whl/nightly/cu124" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, + { url = "https://download.pytorch.org/whl/nightly/urllib3-2.2.3-py3-none-any.whl" }, ] [[package]]