From bc8f866013bfe38b5661ba7a4988dd47eeac3d4f Mon Sep 17 00:00:00 2001 From: Yuri Victorovich Date: Fri, 4 Oct 2024 02:15:10 -0700 Subject: [PATCH] =?UTF-8?q?misc/pytorch:=20update=202.3.1=20=E2=86=92=202.?= =?UTF-8?q?4.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- misc/pytorch/Makefile | 3 +- misc/pytorch/distinfo | 6 +- misc/pytorch/files/patch-CMakeLists.txt | 24 +- ...src_ATen_cpu_vec_vec256_vec256__bfloat16.h | 11 - ...src_ATen_cpu_vec_vec512_vec512__bfloat16.h | 11 - .../files/patch-caffe2_proto_CMakeLists.txt | 8 - misc/pytorch/pkg-plist | 855 ++++-------------- 7 files changed, 181 insertions(+), 737 deletions(-) delete mode 100644 misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec256_vec256__bfloat16.h delete mode 100644 misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec512_vec512__bfloat16.h delete mode 100644 misc/pytorch/files/patch-caffe2_proto_CMakeLists.txt diff --git a/misc/pytorch/Makefile b/misc/pytorch/Makefile index 508353364222..fc2e2e49f8c7 100644 --- a/misc/pytorch/Makefile +++ b/misc/pytorch/Makefile @@ -1,7 +1,6 @@ PORTNAME= pytorch DISTVERSIONPREFIX= v -DISTVERSION= 2.3.1 -PORTREVISION= 6 +DISTVERSION= 2.4.1 CATEGORIES= misc # machine-learning MASTER_SITES= https://github.com/pytorch/pytorch/releases/download/v${DISTVERSION}/ DIST_SUBDIR= ${PORTNAME} diff --git a/misc/pytorch/distinfo b/misc/pytorch/distinfo index d0547799e54e..084af2ec13c7 100644 --- a/misc/pytorch/distinfo +++ b/misc/pytorch/distinfo @@ -1,3 +1,3 @@ -TIMESTAMP = 1718421830 -SHA256 (pytorch/pytorch-v2.3.1.tar.gz) = 6c66b59345091907cd62a693b647cee224558e7f15a9b04f4f322f4f6ffeb75b -SIZE (pytorch/pytorch-v2.3.1.tar.gz) = 277997681 +TIMESTAMP = 1727986762 +SHA256 (pytorch/pytorch-v2.4.1.tar.gz) = 39666a43c0c10f5fd46c1a7ca95dc74d3bc39de2678b70066481cbf02e58384f +SIZE (pytorch/pytorch-v2.4.1.tar.gz) = 296932555 diff --git a/misc/pytorch/files/patch-CMakeLists.txt b/misc/pytorch/files/patch-CMakeLists.txt index 078637ac581d..4018e370063c 100644 --- a/misc/pytorch/files/patch-CMakeLists.txt +++ b/misc/pytorch/files/patch-CMakeLists.txt @@ -1,6 +1,6 @@ ---- CMakeLists.txt.orig 2024-01-31 00:58:01 UTC +--- CMakeLists.txt.orig 2024-09-04 20:01:18 UTC +++ CMakeLists.txt -@@ -145,7 +145,7 @@ set(CPU_INTEL OFF) +@@ -181,7 +181,7 @@ set(CPU_INTEL OFF) set(CPU_AARCH64 OFF) set(CPU_INTEL OFF) @@ -9,16 +9,16 @@ set(CPU_INTEL ON) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)") set(CPU_AARCH64 ON) -@@ -170,7 +170,7 @@ option(BUILD_DOCS "Build Caffe2 documentation" OFF) - option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) +@@ -210,7 +210,7 @@ option(BUILD_CUSTOM_PROTOBUF option(BUILD_BINARY "Build C++ binaries" OFF) option(BUILD_DOCS "Build Caffe2 documentation" OFF) --option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON) -+option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" OFF) + option(BUILD_CUSTOM_PROTOBUF +- "Build and use Caffe2's own protobuf under third_party" ON) ++ "Build and use Caffe2's own protobuf under third_party" OFF) option(BUILD_PYTHON "Build Python binaries" ON) - option(BUILD_CAFFE2 "Master flag to build Caffe2" OFF) option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF) -@@ -405,15 +405,15 @@ option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo + option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON) +@@ -451,15 +451,15 @@ option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo # USE_SYSTEM_LIBS being "OFF". option(USE_SYSTEM_LIBS "Use all available system-provided libraries." OFF) option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo." OFF) @@ -35,9 +35,9 @@ -option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF) +option(USE_SYSTEM_ONNX "Use system-provided onnx." ON) option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF) - option(USE_SYSTEM_ZSTD "Use system-provided zstd." OFF) option(USE_GOLD_LINKER "Use ld.gold to link" OFF) -@@ -838,11 +838,11 @@ if(NOT MSVC) + if(USE_SYSTEM_LIBS) +@@ -971,11 +971,11 @@ if(NOT MSVC) # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459 string(APPEND CMAKE_CXX_FLAGS " -Wall") string(APPEND CMAKE_CXX_FLAGS " -Wextra") @@ -52,5 +52,5 @@ + #append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS) + #append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-missing-field-initializers" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS) + append_cxx_flag_if_supported("-Wno-missing-field-initializers" + CMAKE_CXX_FLAGS) diff --git a/misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec256_vec256__bfloat16.h b/misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec256_vec256__bfloat16.h deleted file mode 100644 index e03ac51d837b..000000000000 --- a/misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec256_vec256__bfloat16.h +++ /dev/null @@ -1,11 +0,0 @@ ---- aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h.orig 2024-03-27 22:28:51 UTC -+++ aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h -@@ -265,7 +265,7 @@ static_assert( (public) - } - return b; - } -- Vectorized map(const __m256 (*const vop)(__m256)) const { -+ Vectorized map(__m256 (*const vop)(__m256)) const { - __m256 lo, hi; - cvt_to_fp32(values, lo, hi); - const auto o1 = vop(lo); diff --git a/misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec512_vec512__bfloat16.h b/misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec512_vec512__bfloat16.h deleted file mode 100644 index 80e0b1832434..000000000000 --- a/misc/pytorch/files/patch-aten_src_ATen_cpu_vec_vec512_vec512__bfloat16.h +++ /dev/null @@ -1,11 +0,0 @@ ---- aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h.orig 2023-10-12 12:54:40 UTC -+++ aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h -@@ -345,7 +345,7 @@ static_assert( (public) - } - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wignored-qualifiers" -- Vectorized map(const __m512 (*const vop)(__m512)) const { -+ Vectorized map(__m512 (*const vop)(__m512)) const { - __m512 lo, hi; - cvt_to_fp32(values, lo, hi); - const auto o1 = vop(lo); diff --git a/misc/pytorch/files/patch-caffe2_proto_CMakeLists.txt b/misc/pytorch/files/patch-caffe2_proto_CMakeLists.txt deleted file mode 100644 index 1df1849c0556..000000000000 --- a/misc/pytorch/files/patch-caffe2_proto_CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ ---- caffe2/proto/CMakeLists.txt.orig 2023-05-08 19:58:16 UTC -+++ caffe2/proto/CMakeLists.txt -@@ -1,3 +1,5 @@ -+set(CMAKE_CXX_STANDARD 17) -+ - if(BUILD_CAFFE2) - file(GLOB Caffe2_PROTOBUF_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.proto") - else() diff --git a/misc/pytorch/pkg-plist b/misc/pytorch/pkg-plist index d54b7b628740..8bd2a42d05ce 100644 --- a/misc/pytorch/pkg-plist +++ b/misc/pytorch/pkg-plist @@ -4,6 +4,7 @@ include/ATen/AccumulateType.h include/ATen/ArrayRef.h include/ATen/Backend.h include/ATen/Backtrace.h +include/ATen/BlasBackend.h include/ATen/CPUApplyUtils.h include/ATen/CPUFixedAllocator.h include/ATen/CPUFunctions.h @@ -70,7 +71,6 @@ include/ATen/Parallel-inl.h include/ATen/Parallel.h include/ATen/ParallelFuture.h include/ATen/ParallelNative.h -include/ATen/ParallelNativeTBB.h include/ATen/ParallelOpenMP.h include/ATen/PythonTorchFunctionTLS.h include/ATen/RedispatchFunctions.h @@ -116,6 +116,7 @@ include/ATen/core/ATen_fwd.h include/ATen/core/ATen_pch.h include/ATen/core/Array.h include/ATen/core/Backtrace.h +include/ATen/core/CachingHostAllocator.h include/ATen/core/CheckMemoryFormat.h include/ATen/core/DeprecatedTypeProperties.h include/ATen/core/DeprecatedTypePropertiesRegistry.h @@ -218,10 +219,13 @@ include/ATen/cpu/vec/vec256/vec256.h include/ATen/cpu/vec/vec256/vec256_bfloat16.h include/ATen/cpu/vec/vec256/vec256_complex_double.h include/ATen/cpu/vec/vec256/vec256_complex_float.h +include/ATen/cpu/vec/vec256/vec256_convert.h include/ATen/cpu/vec/vec256/vec256_double.h include/ATen/cpu/vec/vec256/vec256_float.h include/ATen/cpu/vec/vec256/vec256_float_neon.h +include/ATen/cpu/vec/vec256/vec256_half_neon.h include/ATen/cpu/vec/vec256/vec256_int.h +include/ATen/cpu/vec/vec256/vec256_mask.h include/ATen/cpu/vec/vec256/vec256_qint.h include/ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h include/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h @@ -241,12 +245,16 @@ include/ATen/cpu/vec/vec512/vec512.h include/ATen/cpu/vec/vec512/vec512_bfloat16.h include/ATen/cpu/vec/vec512/vec512_complex_double.h include/ATen/cpu/vec/vec512/vec512_complex_float.h +include/ATen/cpu/vec/vec512/vec512_convert.h include/ATen/cpu/vec/vec512/vec512_double.h include/ATen/cpu/vec/vec512/vec512_float.h include/ATen/cpu/vec/vec512/vec512_int.h +include/ATen/cpu/vec/vec512/vec512_mask.h include/ATen/cpu/vec/vec512/vec512_qint.h include/ATen/cpu/vec/vec_base.h +include/ATen/cpu/vec/vec_convert.h include/ATen/cpu/vec/vec_half.h +include/ATen/cpu/vec/vec_mask.h include/ATen/cpu/vec/vec_n.h include/ATen/cpu/vml.h include/ATen/cuda/ATenCUDAGeneral.h @@ -315,9 +323,9 @@ include/ATen/detail/CUDAHooksInterface.h include/ATen/detail/FunctionTraits.h include/ATen/detail/HIPHooksInterface.h include/ATen/detail/IPUHooksInterface.h +include/ATen/detail/MAIAHooksInterface.h include/ATen/detail/MPSHooksInterface.h include/ATen/detail/MTIAHooksInterface.h -include/ATen/detail/ORTHooksInterface.h include/ATen/detail/PrivateUse1HooksInterface.h include/ATen/detail/XPUHooksInterface.h include/ATen/div_rtn.h @@ -385,6 +393,9 @@ include/ATen/native/Fill.h include/ATen/native/ForeachUtils.h include/ATen/native/FractionalMaxPooling.h include/ATen/native/FunctionOfAMatrixUtils.h +include/ATen/native/FusedAdagrad.h +include/ATen/native/FusedAdam.h +include/ATen/native/FusedSGD.h include/ATen/native/GridSampler.h include/ATen/native/GridSamplerUtils.h include/ATen/native/Histogram.h @@ -572,6 +583,51 @@ include/ATen/native/quantized/cpu/init_qnnpack.h include/ATen/native/quantized/cpu/qembeddingbag.h include/ATen/native/quantized/cpu/qembeddingbag_prepack.h include/ATen/native/quantized/cudnn/utils.h +include/ATen/native/transformers/attention.h +include/ATen/native/transformers/cuda/flash_attn/alibi.h +include/ATen/native/transformers/cuda/flash_attn/block_info.h +include/ATen/native/transformers/cuda/flash_attn/dropout.h +include/ATen/native/transformers/cuda/flash_attn/flash.h +include/ATen/native/transformers/cuda/flash_attn/flash_api.h +include/ATen/native/transformers/cuda/flash_attn/flash_bwd_kernel.h +include/ATen/native/transformers/cuda/flash_attn/flash_bwd_launch_template.h +include/ATen/native/transformers/cuda/flash_attn/flash_bwd_preprocess_kernel.h +include/ATen/native/transformers/cuda/flash_attn/flash_fwd_kernel.h +include/ATen/native/transformers/cuda/flash_attn/flash_fwd_launch_template.h +include/ATen/native/transformers/cuda/flash_attn/kernel_traits.h +include/ATen/native/transformers/cuda/flash_attn/mask.h +include/ATen/native/transformers/cuda/flash_attn/rotary.h +include/ATen/native/transformers/cuda/flash_attn/softmax.h +include/ATen/native/transformers/cuda/flash_attn/static_switch.h +include/ATen/native/transformers/cuda/flash_attn/utils.h +include/ATen/native/transformers/cuda/mem_eff_attention/debug_utils.h +include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_pipelined.h +include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_rescale_output.h +include/ATen/native/transformers/cuda/mem_eff_attention/epilogue/epilogue_thread_apply_logsumexp.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/custom_mma.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/custom_mma_base.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/custom_mma_multistage.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/custom_mma_pipelined.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/find_default_mma.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/mma_accum_lambda_iterator.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm/mma_from_smem.h +include/ATen/native/transformers/cuda/mem_eff_attention/gemm_kernel_utils.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/default_warp_iterator_from_smem.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/epilogue_predicated_tile_iterator.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/make_residual_last.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/predicated_tile_access_iterator_residual_last.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/predicated_tile_iterator_residual_last.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/transpose_warp_iterator.h +include/ATen/native/transformers/cuda/mem_eff_attention/iterators/warp_iterator_from_smem.h +include/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h +include/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h +include/ATen/native/transformers/cuda/mem_eff_attention/kernels/cutlassB.h +include/ATen/native/transformers/cuda/mem_eff_attention/kernels/cutlassF.h +include/ATen/native/transformers/cuda/mem_eff_attention/pytorch_utils.h +include/ATen/native/transformers/cuda/mem_eff_attention/transform/tile_smem_loader.h +include/ATen/native/transformers/cuda/sdp_utils.h +include/ATen/native/transformers/hip/aotriton_adapter.h +include/ATen/native/transformers/sdp_utils_cpp.h include/ATen/native/utils/Factory.h include/ATen/native/utils/ParamUtils.h include/ATen/native/utils/ParamsHash.h @@ -671,6 +727,16 @@ include/ATen/ops/_batch_norm_impl_index_backward_ops.h include/ATen/ops/_batch_norm_impl_index_compositeimplicitautograd_dispatch.h include/ATen/ops/_batch_norm_impl_index_native.h include/ATen/ops/_batch_norm_impl_index_ops.h +include/ATen/ops/_batch_norm_no_update.h +include/ATen/ops/_batch_norm_no_update_compositeexplicitautograd_dispatch.h +include/ATen/ops/_batch_norm_no_update_native.h +include/ATen/ops/_batch_norm_no_update_ops.h +include/ATen/ops/_batch_norm_with_update.h +include/ATen/ops/_batch_norm_with_update_compositeexplicitautograd_dispatch.h +include/ATen/ops/_batch_norm_with_update_cpu_dispatch.h +include/ATen/ops/_batch_norm_with_update_cuda_dispatch.h +include/ATen/ops/_batch_norm_with_update_native.h +include/ATen/ops/_batch_norm_with_update_ops.h include/ATen/ops/_cast_Byte.h include/ATen/ops/_cast_Byte_compositeimplicitautograd_dispatch.h include/ATen/ops/_cast_Byte_native.h @@ -727,6 +793,7 @@ include/ATen/ops/_choose_qparams_per_tensor_native.h include/ATen/ops/_choose_qparams_per_tensor_ops.h include/ATen/ops/_chunk_cat.h include/ATen/ops/_chunk_cat_compositeexplicitautograd_dispatch.h +include/ATen/ops/_chunk_cat_cuda_dispatch.h include/ATen/ops/_chunk_cat_native.h include/ATen/ops/_chunk_cat_ops.h include/ATen/ops/_coalesce.h @@ -1022,265 +1089,226 @@ include/ATen/ops/_foobar_native.h include/ATen/ops/_foobar_ops.h include/ATen/ops/_foreach_abs.h include/ATen/ops/_foreach_abs_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_abs_cpu_dispatch.h include/ATen/ops/_foreach_abs_cuda_dispatch.h include/ATen/ops/_foreach_abs_native.h include/ATen/ops/_foreach_abs_ops.h include/ATen/ops/_foreach_acos.h include/ATen/ops/_foreach_acos_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_acos_cpu_dispatch.h include/ATen/ops/_foreach_acos_cuda_dispatch.h include/ATen/ops/_foreach_acos_native.h include/ATen/ops/_foreach_acos_ops.h include/ATen/ops/_foreach_add.h include/ATen/ops/_foreach_add_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_add_cpu_dispatch.h include/ATen/ops/_foreach_add_cuda_dispatch.h include/ATen/ops/_foreach_add_native.h include/ATen/ops/_foreach_add_ops.h include/ATen/ops/_foreach_addcdiv.h include/ATen/ops/_foreach_addcdiv_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_addcdiv_cpu_dispatch.h include/ATen/ops/_foreach_addcdiv_cuda_dispatch.h include/ATen/ops/_foreach_addcdiv_native.h include/ATen/ops/_foreach_addcdiv_ops.h include/ATen/ops/_foreach_addcmul.h include/ATen/ops/_foreach_addcmul_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_addcmul_cpu_dispatch.h include/ATen/ops/_foreach_addcmul_cuda_dispatch.h include/ATen/ops/_foreach_addcmul_native.h include/ATen/ops/_foreach_addcmul_ops.h include/ATen/ops/_foreach_asin.h include/ATen/ops/_foreach_asin_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_asin_cpu_dispatch.h include/ATen/ops/_foreach_asin_cuda_dispatch.h include/ATen/ops/_foreach_asin_native.h include/ATen/ops/_foreach_asin_ops.h include/ATen/ops/_foreach_atan.h include/ATen/ops/_foreach_atan_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_atan_cpu_dispatch.h include/ATen/ops/_foreach_atan_cuda_dispatch.h include/ATen/ops/_foreach_atan_native.h include/ATen/ops/_foreach_atan_ops.h include/ATen/ops/_foreach_ceil.h include/ATen/ops/_foreach_ceil_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_ceil_cpu_dispatch.h include/ATen/ops/_foreach_ceil_cuda_dispatch.h include/ATen/ops/_foreach_ceil_native.h include/ATen/ops/_foreach_ceil_ops.h include/ATen/ops/_foreach_clamp_max.h include/ATen/ops/_foreach_clamp_max_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_clamp_max_cpu_dispatch.h include/ATen/ops/_foreach_clamp_max_cuda_dispatch.h include/ATen/ops/_foreach_clamp_max_native.h include/ATen/ops/_foreach_clamp_max_ops.h include/ATen/ops/_foreach_clamp_min.h include/ATen/ops/_foreach_clamp_min_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_clamp_min_cpu_dispatch.h include/ATen/ops/_foreach_clamp_min_cuda_dispatch.h include/ATen/ops/_foreach_clamp_min_native.h include/ATen/ops/_foreach_clamp_min_ops.h include/ATen/ops/_foreach_copy.h include/ATen/ops/_foreach_copy_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_copy_cpu_dispatch.h include/ATen/ops/_foreach_copy_cuda_dispatch.h include/ATen/ops/_foreach_copy_native.h include/ATen/ops/_foreach_copy_ops.h include/ATen/ops/_foreach_cos.h include/ATen/ops/_foreach_cos_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_cos_cpu_dispatch.h include/ATen/ops/_foreach_cos_cuda_dispatch.h include/ATen/ops/_foreach_cos_native.h include/ATen/ops/_foreach_cos_ops.h include/ATen/ops/_foreach_cosh.h include/ATen/ops/_foreach_cosh_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_cosh_cpu_dispatch.h include/ATen/ops/_foreach_cosh_cuda_dispatch.h include/ATen/ops/_foreach_cosh_native.h include/ATen/ops/_foreach_cosh_ops.h include/ATen/ops/_foreach_div.h include/ATen/ops/_foreach_div_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_div_cpu_dispatch.h include/ATen/ops/_foreach_div_cuda_dispatch.h include/ATen/ops/_foreach_div_native.h include/ATen/ops/_foreach_div_ops.h include/ATen/ops/_foreach_erf.h include/ATen/ops/_foreach_erf_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_erf_cpu_dispatch.h include/ATen/ops/_foreach_erf_cuda_dispatch.h include/ATen/ops/_foreach_erf_native.h include/ATen/ops/_foreach_erf_ops.h include/ATen/ops/_foreach_erfc.h include/ATen/ops/_foreach_erfc_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_erfc_cpu_dispatch.h include/ATen/ops/_foreach_erfc_cuda_dispatch.h include/ATen/ops/_foreach_erfc_native.h include/ATen/ops/_foreach_erfc_ops.h include/ATen/ops/_foreach_exp.h include/ATen/ops/_foreach_exp_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_exp_cpu_dispatch.h include/ATen/ops/_foreach_exp_cuda_dispatch.h include/ATen/ops/_foreach_exp_native.h include/ATen/ops/_foreach_exp_ops.h include/ATen/ops/_foreach_expm1.h include/ATen/ops/_foreach_expm1_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_expm1_cpu_dispatch.h include/ATen/ops/_foreach_expm1_cuda_dispatch.h include/ATen/ops/_foreach_expm1_native.h include/ATen/ops/_foreach_expm1_ops.h include/ATen/ops/_foreach_floor.h include/ATen/ops/_foreach_floor_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_floor_cpu_dispatch.h include/ATen/ops/_foreach_floor_cuda_dispatch.h include/ATen/ops/_foreach_floor_native.h include/ATen/ops/_foreach_floor_ops.h include/ATen/ops/_foreach_frac.h include/ATen/ops/_foreach_frac_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_frac_cpu_dispatch.h include/ATen/ops/_foreach_frac_cuda_dispatch.h include/ATen/ops/_foreach_frac_native.h include/ATen/ops/_foreach_frac_ops.h include/ATen/ops/_foreach_lerp.h include/ATen/ops/_foreach_lerp_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_lerp_cpu_dispatch.h include/ATen/ops/_foreach_lerp_cuda_dispatch.h include/ATen/ops/_foreach_lerp_native.h include/ATen/ops/_foreach_lerp_ops.h include/ATen/ops/_foreach_lgamma.h include/ATen/ops/_foreach_lgamma_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_lgamma_cpu_dispatch.h include/ATen/ops/_foreach_lgamma_cuda_dispatch.h include/ATen/ops/_foreach_lgamma_native.h include/ATen/ops/_foreach_lgamma_ops.h include/ATen/ops/_foreach_log.h include/ATen/ops/_foreach_log10.h include/ATen/ops/_foreach_log10_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_log10_cpu_dispatch.h include/ATen/ops/_foreach_log10_cuda_dispatch.h include/ATen/ops/_foreach_log10_native.h include/ATen/ops/_foreach_log10_ops.h include/ATen/ops/_foreach_log1p.h include/ATen/ops/_foreach_log1p_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_log1p_cpu_dispatch.h include/ATen/ops/_foreach_log1p_cuda_dispatch.h include/ATen/ops/_foreach_log1p_native.h include/ATen/ops/_foreach_log1p_ops.h include/ATen/ops/_foreach_log2.h include/ATen/ops/_foreach_log2_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_log2_cpu_dispatch.h include/ATen/ops/_foreach_log2_cuda_dispatch.h include/ATen/ops/_foreach_log2_native.h include/ATen/ops/_foreach_log2_ops.h include/ATen/ops/_foreach_log_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_log_cpu_dispatch.h include/ATen/ops/_foreach_log_cuda_dispatch.h include/ATen/ops/_foreach_log_native.h include/ATen/ops/_foreach_log_ops.h +include/ATen/ops/_foreach_max.h +include/ATen/ops/_foreach_max_compositeexplicitautograd_dispatch.h +include/ATen/ops/_foreach_max_cuda_dispatch.h +include/ATen/ops/_foreach_max_native.h +include/ATen/ops/_foreach_max_ops.h include/ATen/ops/_foreach_maximum.h include/ATen/ops/_foreach_maximum_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_maximum_cpu_dispatch.h include/ATen/ops/_foreach_maximum_cuda_dispatch.h include/ATen/ops/_foreach_maximum_native.h include/ATen/ops/_foreach_maximum_ops.h include/ATen/ops/_foreach_minimum.h include/ATen/ops/_foreach_minimum_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_minimum_cpu_dispatch.h include/ATen/ops/_foreach_minimum_cuda_dispatch.h include/ATen/ops/_foreach_minimum_native.h include/ATen/ops/_foreach_minimum_ops.h include/ATen/ops/_foreach_mul.h include/ATen/ops/_foreach_mul_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_mul_cpu_dispatch.h include/ATen/ops/_foreach_mul_cuda_dispatch.h include/ATen/ops/_foreach_mul_native.h include/ATen/ops/_foreach_mul_ops.h include/ATen/ops/_foreach_neg.h include/ATen/ops/_foreach_neg_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_neg_cpu_dispatch.h include/ATen/ops/_foreach_neg_cuda_dispatch.h include/ATen/ops/_foreach_neg_native.h include/ATen/ops/_foreach_neg_ops.h include/ATen/ops/_foreach_norm.h include/ATen/ops/_foreach_norm_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_norm_cpu_dispatch.h include/ATen/ops/_foreach_norm_cuda_dispatch.h include/ATen/ops/_foreach_norm_native.h include/ATen/ops/_foreach_norm_ops.h include/ATen/ops/_foreach_pow.h include/ATen/ops/_foreach_pow_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_pow_cpu_dispatch.h include/ATen/ops/_foreach_pow_cuda_dispatch.h include/ATen/ops/_foreach_pow_native.h include/ATen/ops/_foreach_pow_ops.h include/ATen/ops/_foreach_reciprocal.h include/ATen/ops/_foreach_reciprocal_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_reciprocal_cpu_dispatch.h include/ATen/ops/_foreach_reciprocal_cuda_dispatch.h include/ATen/ops/_foreach_reciprocal_native.h include/ATen/ops/_foreach_reciprocal_ops.h include/ATen/ops/_foreach_round.h include/ATen/ops/_foreach_round_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_round_cpu_dispatch.h include/ATen/ops/_foreach_round_cuda_dispatch.h include/ATen/ops/_foreach_round_native.h include/ATen/ops/_foreach_round_ops.h include/ATen/ops/_foreach_sigmoid.h include/ATen/ops/_foreach_sigmoid_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_sigmoid_cpu_dispatch.h include/ATen/ops/_foreach_sigmoid_cuda_dispatch.h include/ATen/ops/_foreach_sigmoid_native.h include/ATen/ops/_foreach_sigmoid_ops.h include/ATen/ops/_foreach_sign.h include/ATen/ops/_foreach_sign_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_sign_cpu_dispatch.h include/ATen/ops/_foreach_sign_cuda_dispatch.h include/ATen/ops/_foreach_sign_native.h include/ATen/ops/_foreach_sign_ops.h include/ATen/ops/_foreach_sin.h include/ATen/ops/_foreach_sin_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_sin_cpu_dispatch.h include/ATen/ops/_foreach_sin_cuda_dispatch.h include/ATen/ops/_foreach_sin_native.h include/ATen/ops/_foreach_sin_ops.h include/ATen/ops/_foreach_sinh.h include/ATen/ops/_foreach_sinh_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_sinh_cpu_dispatch.h include/ATen/ops/_foreach_sinh_cuda_dispatch.h include/ATen/ops/_foreach_sinh_native.h include/ATen/ops/_foreach_sinh_ops.h include/ATen/ops/_foreach_sqrt.h include/ATen/ops/_foreach_sqrt_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_sqrt_cpu_dispatch.h include/ATen/ops/_foreach_sqrt_cuda_dispatch.h include/ATen/ops/_foreach_sqrt_native.h include/ATen/ops/_foreach_sqrt_ops.h include/ATen/ops/_foreach_sub.h include/ATen/ops/_foreach_sub_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_sub_cpu_dispatch.h include/ATen/ops/_foreach_sub_cuda_dispatch.h include/ATen/ops/_foreach_sub_native.h include/ATen/ops/_foreach_sub_ops.h include/ATen/ops/_foreach_tan.h include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_tan_cpu_dispatch.h include/ATen/ops/_foreach_tan_cuda_dispatch.h include/ATen/ops/_foreach_tan_native.h include/ATen/ops/_foreach_tan_ops.h include/ATen/ops/_foreach_tanh.h include/ATen/ops/_foreach_tanh_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_tanh_cpu_dispatch.h include/ATen/ops/_foreach_tanh_cuda_dispatch.h include/ATen/ops/_foreach_tanh_native.h include/ATen/ops/_foreach_tanh_ops.h include/ATen/ops/_foreach_trunc.h include/ATen/ops/_foreach_trunc_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_trunc_cpu_dispatch.h include/ATen/ops/_foreach_trunc_cuda_dispatch.h include/ATen/ops/_foreach_trunc_native.h include/ATen/ops/_foreach_trunc_ops.h include/ATen/ops/_foreach_zero.h include/ATen/ops/_foreach_zero_compositeexplicitautograd_dispatch.h -include/ATen/ops/_foreach_zero_cpu_dispatch.h include/ATen/ops/_foreach_zero_cuda_dispatch.h include/ATen/ops/_foreach_zero_native.h include/ATen/ops/_foreach_zero_ops.h @@ -1300,13 +1328,20 @@ include/ATen/ops/_functional_sym_constrain_range_for_size_native.h include/ATen/ops/_functional_sym_constrain_range_for_size_ops.h include/ATen/ops/_functional_sym_constrain_range_native.h include/ATen/ops/_functional_sym_constrain_range_ops.h +include/ATen/ops/_fused_adagrad.h +include/ATen/ops/_fused_adagrad_compositeexplicitautograd_dispatch.h +include/ATen/ops/_fused_adagrad_cpu_dispatch.h +include/ATen/ops/_fused_adagrad_native.h +include/ATen/ops/_fused_adagrad_ops.h include/ATen/ops/_fused_adam.h include/ATen/ops/_fused_adam_compositeexplicitautograd_dispatch.h +include/ATen/ops/_fused_adam_cpu_dispatch.h include/ATen/ops/_fused_adam_cuda_dispatch.h include/ATen/ops/_fused_adam_native.h include/ATen/ops/_fused_adam_ops.h include/ATen/ops/_fused_adamw.h include/ATen/ops/_fused_adamw_compositeexplicitautograd_dispatch.h +include/ATen/ops/_fused_adamw_cpu_dispatch.h include/ATen/ops/_fused_adamw_cuda_dispatch.h include/ATen/ops/_fused_adamw_native.h include/ATen/ops/_fused_adamw_ops.h @@ -1329,6 +1364,7 @@ include/ATen/ops/_fused_sdp_choice_native.h include/ATen/ops/_fused_sdp_choice_ops.h include/ATen/ops/_fused_sgd.h include/ATen/ops/_fused_sgd_compositeexplicitautograd_dispatch.h +include/ATen/ops/_fused_sgd_cpu_dispatch.h include/ATen/ops/_fused_sgd_cuda_dispatch.h include/ATen/ops/_fused_sgd_native.h include/ATen/ops/_fused_sgd_ops.h @@ -1392,6 +1428,7 @@ include/ATen/ops/_indices_copy_ops.h include/ATen/ops/_indices_native.h include/ATen/ops/_indices_ops.h include/ATen/ops/_int_mm.h +include/ATen/ops/_int_mm_cpu_dispatch.h include/ATen/ops/_int_mm_cuda_dispatch.h include/ATen/ops/_int_mm_native.h include/ATen/ops/_int_mm_ops.h @@ -1407,6 +1444,10 @@ include/ATen/ops/_is_zerotensor.h include/ATen/ops/_is_zerotensor_compositeimplicitautograd_dispatch.h include/ATen/ops/_is_zerotensor_native.h include/ATen/ops/_is_zerotensor_ops.h +include/ATen/ops/_jagged_to_padded_dense_forward.h +include/ATen/ops/_jagged_to_padded_dense_forward_cuda_dispatch.h +include/ATen/ops/_jagged_to_padded_dense_forward_native.h +include/ATen/ops/_jagged_to_padded_dense_forward_ops.h include/ATen/ops/_lazy_clone.h include/ATen/ops/_lazy_clone_compositeexplicitautograd_dispatch.h include/ATen/ops/_lazy_clone_native.h @@ -1582,6 +1623,11 @@ include/ATen/ops/_neg_view_copy_native.h include/ATen/ops/_neg_view_copy_ops.h include/ATen/ops/_neg_view_native.h include/ATen/ops/_neg_view_ops.h +include/ATen/ops/_nested_compute_contiguous_strides_offsets.h +include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h +include/ATen/ops/_nested_compute_contiguous_strides_offsets_cuda_dispatch.h +include/ATen/ops/_nested_compute_contiguous_strides_offsets_native.h +include/ATen/ops/_nested_compute_contiguous_strides_offsets_ops.h include/ATen/ops/_nested_from_padded.h include/ATen/ops/_nested_from_padded_and_nested_example.h include/ATen/ops/_nested_from_padded_and_nested_example_compositeexplicitautograd_dispatch.h @@ -1701,6 +1747,10 @@ include/ATen/ops/_pad_packed_sequence.h include/ATen/ops/_pad_packed_sequence_compositeimplicitautograd_dispatch.h include/ATen/ops/_pad_packed_sequence_native.h include/ATen/ops/_pad_packed_sequence_ops.h +include/ATen/ops/_padded_dense_to_jagged_forward.h +include/ATen/ops/_padded_dense_to_jagged_forward_cuda_dispatch.h +include/ATen/ops/_padded_dense_to_jagged_forward_native.h +include/ATen/ops/_padded_dense_to_jagged_forward_ops.h include/ATen/ops/_pdist_backward.h include/ATen/ops/_pdist_backward_compositeexplicitautograd_dispatch.h include/ATen/ops/_pdist_backward_cpu_dispatch.h @@ -1783,6 +1833,10 @@ include/ATen/ops/_scaled_dot_product_attention_math_compositeimplicitautograd_di include/ATen/ops/_scaled_dot_product_attention_math_native.h include/ATen/ops/_scaled_dot_product_attention_math_ops.h include/ATen/ops/_scaled_dot_product_cudnn_attention.h +include/ATen/ops/_scaled_dot_product_cudnn_attention_backward.h +include/ATen/ops/_scaled_dot_product_cudnn_attention_backward_cuda_dispatch.h +include/ATen/ops/_scaled_dot_product_cudnn_attention_backward_native.h +include/ATen/ops/_scaled_dot_product_cudnn_attention_backward_ops.h include/ATen/ops/_scaled_dot_product_cudnn_attention_cuda_dispatch.h include/ATen/ops/_scaled_dot_product_cudnn_attention_native.h include/ATen/ops/_scaled_dot_product_cudnn_attention_ops.h @@ -1891,6 +1945,10 @@ include/ATen/ops/_sparse_compressed_tensor_unsafe.h include/ATen/ops/_sparse_compressed_tensor_unsafe_compositeimplicitautograd_dispatch.h include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h include/ATen/ops/_sparse_compressed_tensor_unsafe_ops.h +include/ATen/ops/_sparse_compressed_tensor_with_dims.h +include/ATen/ops/_sparse_compressed_tensor_with_dims_compositeexplicitautograd_dispatch.h +include/ATen/ops/_sparse_compressed_tensor_with_dims_native.h +include/ATen/ops/_sparse_compressed_tensor_with_dims_ops.h include/ATen/ops/_sparse_coo_tensor_unsafe.h include/ATen/ops/_sparse_coo_tensor_unsafe_compositeimplicitautograd_dispatch.h include/ATen/ops/_sparse_coo_tensor_unsafe_native.h @@ -1944,10 +2002,30 @@ include/ATen/ops/_sparse_mm_reduce_impl_backward_native.h include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h include/ATen/ops/_sparse_mm_reduce_impl_native.h include/ATen/ops/_sparse_mm_reduce_impl_ops.h +include/ATen/ops/_sparse_semi_structured_addmm.h +include/ATen/ops/_sparse_semi_structured_addmm_cuda_dispatch.h +include/ATen/ops/_sparse_semi_structured_addmm_native.h +include/ATen/ops/_sparse_semi_structured_addmm_ops.h +include/ATen/ops/_sparse_semi_structured_apply.h +include/ATen/ops/_sparse_semi_structured_apply_cuda_dispatch.h +include/ATen/ops/_sparse_semi_structured_apply_dense.h +include/ATen/ops/_sparse_semi_structured_apply_dense_cuda_dispatch.h +include/ATen/ops/_sparse_semi_structured_apply_dense_native.h +include/ATen/ops/_sparse_semi_structured_apply_dense_ops.h +include/ATen/ops/_sparse_semi_structured_apply_native.h +include/ATen/ops/_sparse_semi_structured_apply_ops.h include/ATen/ops/_sparse_semi_structured_linear.h include/ATen/ops/_sparse_semi_structured_linear_cuda_dispatch.h include/ATen/ops/_sparse_semi_structured_linear_native.h include/ATen/ops/_sparse_semi_structured_linear_ops.h +include/ATen/ops/_sparse_semi_structured_mm.h +include/ATen/ops/_sparse_semi_structured_mm_cuda_dispatch.h +include/ATen/ops/_sparse_semi_structured_mm_native.h +include/ATen/ops/_sparse_semi_structured_mm_ops.h +include/ATen/ops/_sparse_semi_structured_tile.h +include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h +include/ATen/ops/_sparse_semi_structured_tile_native.h +include/ATen/ops/_sparse_semi_structured_tile_ops.h include/ATen/ops/_sparse_softmax.h include/ATen/ops/_sparse_softmax_backward_data.h include/ATen/ops/_sparse_softmax_backward_data_compositeexplicitautograd_dispatch.h @@ -2756,11 +2834,16 @@ include/ATen/ops/bartlett_window_compositeexplicitautograd_dispatch.h include/ATen/ops/bartlett_window_native.h include/ATen/ops/bartlett_window_ops.h include/ATen/ops/batch_norm.h +include/ATen/ops/batch_norm_backward.h +include/ATen/ops/batch_norm_backward_cpu_dispatch.h +include/ATen/ops/batch_norm_backward_cuda_dispatch.h include/ATen/ops/batch_norm_backward_elemt.h include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h include/ATen/ops/batch_norm_backward_elemt_cuda_dispatch.h include/ATen/ops/batch_norm_backward_elemt_native.h include/ATen/ops/batch_norm_backward_elemt_ops.h +include/ATen/ops/batch_norm_backward_native.h +include/ATen/ops/batch_norm_backward_ops.h include/ATen/ops/batch_norm_backward_reduce.h include/ATen/ops/batch_norm_backward_reduce_compositeexplicitautograd_dispatch.h include/ATen/ops/batch_norm_backward_reduce_cuda_dispatch.h @@ -3148,6 +3231,7 @@ include/ATen/ops/convolution_overrideable_ops.h include/ATen/ops/copy.h include/ATen/ops/copy_compositeexplicitautograd_dispatch.h include/ATen/ops/copy_compositeexplicitautogradnonfunctional_dispatch.h +include/ATen/ops/copy_meta_dispatch.h include/ATen/ops/copy_native.h include/ATen/ops/copy_ops.h include/ATen/ops/copy_sparse_to_sparse.h @@ -3325,8 +3409,7 @@ include/ATen/ops/deg2rad_compositeexplicitautograd_dispatch.h include/ATen/ops/deg2rad_native.h include/ATen/ops/deg2rad_ops.h include/ATen/ops/dense_dim.h -include/ATen/ops/dense_dim_cpu_dispatch.h -include/ATen/ops/dense_dim_cuda_dispatch.h +include/ATen/ops/dense_dim_compositeexplicitautograd_dispatch.h include/ATen/ops/dense_dim_native.h include/ATen/ops/dense_dim_ops.h include/ATen/ops/dequantize.h @@ -5977,6 +6060,10 @@ include/ATen/ops/retains_grad.h include/ATen/ops/retains_grad_compositeimplicitautograd_dispatch.h include/ATen/ops/retains_grad_native.h include/ATen/ops/retains_grad_ops.h +include/ATen/ops/rms_norm.h +include/ATen/ops/rms_norm_compositeimplicitautograd_dispatch.h +include/ATen/ops/rms_norm_native.h +include/ATen/ops/rms_norm_ops.h include/ATen/ops/rnn_relu.h include/ATen/ops/rnn_relu_cell.h include/ATen/ops/rnn_relu_cell_compositeimplicitautograd_dispatch.h @@ -6383,8 +6470,7 @@ include/ATen/ops/sparse_csr_tensor_compositeimplicitautograd_dispatch.h include/ATen/ops/sparse_csr_tensor_native.h include/ATen/ops/sparse_csr_tensor_ops.h include/ATen/ops/sparse_dim.h -include/ATen/ops/sparse_dim_cpu_dispatch.h -include/ATen/ops/sparse_dim_cuda_dispatch.h +include/ATen/ops/sparse_dim_compositeexplicitautograd_dispatch.h include/ATen/ops/sparse_dim_native.h include/ATen/ops/sparse_dim_ops.h include/ATen/ops/sparse_mask.h @@ -7442,6 +7528,8 @@ include/ATen/ops/zeros_ops.h include/ATen/quantized/QTensorImpl.h include/ATen/quantized/Quantizer.h include/ATen/record_function.h +include/ATen/xpu/CachingHostAllocator.h +include/ATen/xpu/PinnedMemoryAllocator.h include/ATen/xpu/XPUContext.h include/ATen/xpu/XPUDevice.h include/ATen/xpu/XPUEvent.h @@ -7570,6 +7658,7 @@ include/c10/util/FunctionRef.h include/c10/util/Half-inl.h include/c10/util/Half.h include/c10/util/IdWrapper.h +include/c10/util/Lazy.h include/c10/util/LeftRight.h include/c10/util/Load.h include/c10/util/Logging.h @@ -7645,507 +7734,9 @@ include/c10/xpu/XPUMacros.h include/c10/xpu/XPUStream.h include/c10/xpu/impl/XPUGuardImpl.h include/c10/xpu/test/impl/XPUTest.h -include/caffe2/contrib/aten/aten_op.h -include/caffe2/contrib/aten/aten_op_template.h -include/caffe2/contrib/fakelowp/batch_matmul_fp16_fake_op.h -include/caffe2/contrib/fakelowp/common.h -include/caffe2/contrib/fakelowp/fp16_fc_acc_op.h -include/caffe2/contrib/fakelowp/fp16_fma.h -include/caffe2/contrib/fakelowp/fp16_gemm_utils.h -include/caffe2/contrib/fakelowp/int8_dequantize_op_nnpi.h -include/caffe2/contrib/fakelowp/int8_quantize_op_nnpi.h -include/caffe2/contrib/fakelowp/int8_swish_op_nnpi.h -include/caffe2/contrib/fakelowp/layernorm_fp16_fake_op.h -include/caffe2/contrib/fakelowp/lengths_reducer_fused_4bit_rowwise_fp16_fake_op.h -include/caffe2/contrib/fakelowp/lengths_reducer_fused_8bit_rowwise_fp16_fake_op.h -include/caffe2/contrib/fakelowp/lengths_reducer_ops.h -include/caffe2/contrib/fakelowp/quant_lut_fp16_fake_op.h -include/caffe2/contrib/fakelowp/spatial_batch_norm_fp16_fake_op.h -include/caffe2/contrib/fakelowp/sum_fp16_fake_op.h -include/caffe2/contrib/fakelowp/unary_fp16_fake_op.h -include/caffe2/contrib/gloo/allgather_ops.h -include/caffe2/contrib/gloo/allreduce_ops.h -include/caffe2/contrib/gloo/barrier_ops.h -include/caffe2/contrib/gloo/broadcast_ops.h -include/caffe2/contrib/gloo/common.h -include/caffe2/contrib/gloo/common_world_ops.h -include/caffe2/contrib/gloo/context.h -include/caffe2/contrib/gloo/reduce_scatter_ops.h -include/caffe2/contrib/gloo/store_handler.h -include/caffe2/contrib/nccl/cuda_nccl_gpu.h -include/caffe2/contrib/opencl/context.h -include/caffe2/contrib/prof/prof_dag_stats_op.h -include/caffe2/contrib/shm_mutex/shm_mutex.h -include/caffe2/contrib/tensorrt/tensorrt_op_trt.h -include/caffe2/contrib/tensorrt/tensorrt_tranformer.h -include/caffe2/contrib/tensorrt/trt_utils.h -include/caffe2/contrib/warpctc/ctc_op.h -include/caffe2/core/allocator.h -include/caffe2/core/blob.h -include/caffe2/core/blob_serialization.h -include/caffe2/core/blob_serializer_base.h -include/caffe2/core/blob_stats.h include/caffe2/core/common.h -include/caffe2/core/common_cudnn.h -include/caffe2/core/common_gpu.h -include/caffe2/core/common_omp.h -include/caffe2/core/context.h -include/caffe2/core/context_base.h -include/caffe2/core/context_gpu.h -include/caffe2/core/cudnn_wrappers.h -include/caffe2/core/db.h -include/caffe2/core/distributions_stubs.h -include/caffe2/core/event.h -include/caffe2/core/event_cpu.h -include/caffe2/core/export_c10_op_to_caffe2.h -include/caffe2/core/export_caffe2_op_to_c10.h -include/caffe2/core/flags.h -include/caffe2/core/graph.h -include/caffe2/core/hip/common_miopen.h -include/caffe2/core/hip/miopen_wrapper.h -include/caffe2/core/init.h -include/caffe2/core/logging.h include/caffe2/core/macros.h -include/caffe2/core/memonger.h -include/caffe2/core/module.h -include/caffe2/core/net.h -include/caffe2/core/net_async_base.h -include/caffe2/core/net_async_scheduling.h -include/caffe2/core/net_async_task.h -include/caffe2/core/net_async_task_future.h -include/caffe2/core/net_async_task_graph.h -include/caffe2/core/net_async_tracing.h -include/caffe2/core/net_dag_utils.h -include/caffe2/core/net_parallel.h -include/caffe2/core/net_simple.h -include/caffe2/core/net_simple_refcount.h -include/caffe2/core/nomnigraph/include/nomnigraph/Converters/Dot.h -include/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpClasses.h -include/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpEnum.h -include/caffe2/core/nomnigraph/include/nomnigraph/Generated/OpNames.h -include/caffe2/core/nomnigraph/include/nomnigraph/Graph/Algorithms.h -include/caffe2/core/nomnigraph/include/nomnigraph/Graph/BinaryMatchImpl.h -include/caffe2/core/nomnigraph/include/nomnigraph/Graph/Graph.h -include/caffe2/core/nomnigraph/include/nomnigraph/Graph/TarjansImpl.h -include/caffe2/core/nomnigraph/include/nomnigraph/Graph/TopoSort.h -include/caffe2/core/nomnigraph/include/nomnigraph/Representations/Compiler.h -include/caffe2/core/nomnigraph/include/nomnigraph/Representations/ControlFlow.h -include/caffe2/core/nomnigraph/include/nomnigraph/Representations/NeuralNet.h -include/caffe2/core/nomnigraph/include/nomnigraph/Support/Casting.h -include/caffe2/core/nomnigraph/include/nomnigraph/Support/Common.h -include/caffe2/core/nomnigraph/include/nomnigraph/Transformations/Match.h -include/caffe2/core/nomnigraph/include/nomnigraph/Transformations/SubgraphMatcher.h -include/caffe2/core/nomnigraph/tests/test_util.h -include/caffe2/core/numa.h -include/caffe2/core/observer.h -include/caffe2/core/operator.h -include/caffe2/core/operator_gradient.h -include/caffe2/core/operator_schema.h -include/caffe2/core/plan_executor.h -include/caffe2/core/prof_dag_counters.h -include/caffe2/core/qtensor.h -include/caffe2/core/qtensor_serialization.h -include/caffe2/core/scope_guard.h -include/caffe2/core/stats.h -include/caffe2/core/storage.h -include/caffe2/core/tensor.h -include/caffe2/core/tensor_impl.h -include/caffe2/core/tensor_int8.h -include/caffe2/core/test_utils.h include/caffe2/core/timer.h -include/caffe2/core/transform.h -include/caffe2/core/types.h -include/caffe2/core/workspace.h -include/caffe2/cuda_rtc/common_rtc.h -include/caffe2/db/create_db_op.h -include/caffe2/distributed/file_store_handler.h -include/caffe2/distributed/file_store_handler_op.h -include/caffe2/distributed/redis_store_handler.h -include/caffe2/distributed/redis_store_handler_op.h -include/caffe2/distributed/store_handler.h -include/caffe2/distributed/store_ops.h -include/caffe2/experiments/operators/fully_connected_op_decomposition.h -include/caffe2/experiments/operators/fully_connected_op_prune.h -include/caffe2/experiments/operators/fully_connected_op_sparse.h -include/caffe2/experiments/operators/funhash_op.h -include/caffe2/experiments/operators/sparse_funhash_op.h -include/caffe2/experiments/operators/sparse_matrix_reshape_op.h -include/caffe2/experiments/operators/tt_contraction_op.h -include/caffe2/experiments/operators/tt_pad_op.h -include/caffe2/ideep/ideep_utils.h -include/caffe2/ideep/operators/conv_pool_base_op.h -include/caffe2/ideep/operators/conv_transpose_unpool_base_op.h -include/caffe2/ideep/operators/operator_fallback_ideep.h -include/caffe2/ideep/utils/ideep_context.h -include/caffe2/ideep/utils/ideep_operator.h -include/caffe2/image/image_input_op.h -include/caffe2/image/transform_gpu.h -include/caffe2/mobile/contrib/ios/ios_caffe.h -include/caffe2/mobile/contrib/ios/ios_caffe_defines.h -include/caffe2/mobile/contrib/ios/ios_caffe_predictor.h -include/caffe2/mobile/contrib/ios/mpscnn/mpscnn.h -include/caffe2/mobile/contrib/ios/mpscnn/mpscnn_context.h -include/caffe2/mobile/contrib/ios/mpscnn/mpscnn_graph_mask.h -include/caffe2/mobile/contrib/ios/mpscnn/mpscnn_kernels.h -include/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.h -include/caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h -include/caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h -include/caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h -include/caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h -include/caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h -include/caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h -include/caffe2/mobile/contrib/libopencl-stub/include/libopencl.h -include/caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h -include/caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h -include/caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h -include/caffe2/mobile/contrib/nnapi/NeuralNetworks.h -include/caffe2/mobile/contrib/nnapi/dlnnapi.h -include/caffe2/mobile/contrib/nnapi/nnapi.h -include/caffe2/mobile/contrib/snpe/snpe_ffi.h -include/caffe2/mobile/contrib/ulp2/ulp.h -include/caffe2/mobile/contrib/ulp2/ulp_neon.h -include/caffe2/mpi/mpi_common.h -include/caffe2/mpi/mpi_ops.h -include/caffe2/observers/operator_attaching_net_observer.h -include/caffe2/observers/profile_observer.h -include/caffe2/observers/runcnt_observer.h -include/caffe2/observers/time_observer.h -include/caffe2/onnx/backend.h -include/caffe2/onnx/backend_rep.h -include/caffe2/onnx/device.h -include/caffe2/onnx/helper.h -include/caffe2/onnx/offline_tensor.h -include/caffe2/onnx/onnx_exporter.h -include/caffe2/onnx/onnxifi_graph_info.h -include/caffe2/onnx/onnxifi_init.h -include/caffe2/onnx/torch_ops/constants.h -include/caffe2/onnx/torch_ops/operator_sets.h -include/caffe2/onnx/torch_ops/schema.h -include/caffe2/operators/abs_op.h -include/caffe2/operators/accumulate_op.h -include/caffe2/operators/accuracy_op.h -include/caffe2/operators/acos_op.h -include/caffe2/operators/activation_ops_cudnn.h -include/caffe2/operators/affine_channel_op.h -include/caffe2/operators/alias_with_name.h -include/caffe2/operators/apmeter_op.h -include/caffe2/operators/arg_ops.h -include/caffe2/operators/asin_op.h -include/caffe2/operators/assert_op.h -include/caffe2/operators/async_net_barrier_op.h -include/caffe2/operators/atan_op.h -include/caffe2/operators/batch_box_cox_op.h -include/caffe2/operators/batch_bucketize_op.h -include/caffe2/operators/batch_gather_ops.h -include/caffe2/operators/batch_matmul_op.h -include/caffe2/operators/batch_moments_op.h -include/caffe2/operators/batch_permutation_op.h -include/caffe2/operators/batch_sparse_to_dense_op.h -include/caffe2/operators/bbox_transform_op.h -include/caffe2/operators/bisect_percentile_op.h -include/caffe2/operators/boolean_mask_ops.h -include/caffe2/operators/boolean_unmask_ops.h -include/caffe2/operators/box_with_nms_limit_op.h -include/caffe2/operators/bucketize_op.h -include/caffe2/operators/byte_weight_dequant_op.h -include/caffe2/operators/cast_op.h -include/caffe2/operators/cbrt_op.h -include/caffe2/operators/cc_bmm_bg_op.h -include/caffe2/operators/ceil_op.h -include/caffe2/operators/channel_backprop_stats_op.h -include/caffe2/operators/channel_shuffle_op.h -include/caffe2/operators/channel_stats_op.h -include/caffe2/operators/clip_op.h -include/caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.h -include/caffe2/operators/concat_split_op.h -include/caffe2/operators/conditional_op.h -include/caffe2/operators/conv_op.h -include/caffe2/operators/conv_op_cache_cudnn.h -include/caffe2/operators/conv_op_impl.h -include/caffe2/operators/conv_op_shared.h -include/caffe2/operators/conv_pool_op_base.h -include/caffe2/operators/conv_transpose_op.h -include/caffe2/operators/conv_transpose_op_impl.h -include/caffe2/operators/conv_transpose_op_mobile.h -include/caffe2/operators/conv_transpose_op_mobile_impl.h -include/caffe2/operators/conv_transpose_unpool_op_base.h -include/caffe2/operators/copy_op.h -include/caffe2/operators/copy_rows_to_tensor_op.h -include/caffe2/operators/cos_op.h -include/caffe2/operators/cosh_op.h -include/caffe2/operators/cosine_embedding_criterion_op.h -include/caffe2/operators/counter_ops.h -include/caffe2/operators/create_scope_op.h -include/caffe2/operators/cross_entropy_op.h -include/caffe2/operators/ctc_beam_search_decoder_op.h -include/caffe2/operators/ctc_greedy_decoder_op.h -include/caffe2/operators/cube_op.h -include/caffe2/operators/data_couple.h -include/caffe2/operators/dataset_ops.h -include/caffe2/operators/deform_conv_op.h -include/caffe2/operators/deform_conv_op_impl.h -include/caffe2/operators/dense_vector_to_id_list_op.h -include/caffe2/operators/distance_op.h -include/caffe2/operators/do_op.h -include/caffe2/operators/dropout_op.h -include/caffe2/operators/elementwise_add_op.h -include/caffe2/operators/elementwise_div_op.h -include/caffe2/operators/elementwise_linear_op.h -include/caffe2/operators/elementwise_logical_ops.h -include/caffe2/operators/elementwise_mul_op.h -include/caffe2/operators/elementwise_op_test.h -include/caffe2/operators/elementwise_ops.h -include/caffe2/operators/elementwise_ops_utils.h -include/caffe2/operators/elementwise_sub_op.h -include/caffe2/operators/elu_op.h -include/caffe2/operators/enforce_finite_op.h -include/caffe2/operators/ensure_clipped_op.h -include/caffe2/operators/ensure_cpu_output_op.h -include/caffe2/operators/erf_op.h -include/caffe2/operators/exp_op.h -include/caffe2/operators/expand_op.h -include/caffe2/operators/expand_squeeze_dims_op.h -include/caffe2/operators/fc_inference.h -include/caffe2/operators/feature_maps_ops.h -include/caffe2/operators/feed_blob_op.h -include/caffe2/operators/filler_op.h -include/caffe2/operators/find_duplicate_elements_op.h -include/caffe2/operators/find_op.h -include/caffe2/operators/flatten_op.h -include/caffe2/operators/flexible_top_k.h -include/caffe2/operators/floor_op.h -include/caffe2/operators/free_op.h -include/caffe2/operators/fully_connected_op.h -include/caffe2/operators/fused_rowwise_8bit_conversion_ops.h -include/caffe2/operators/fused_rowwise_nbit_conversion_ops.h -include/caffe2/operators/fused_rowwise_nbitfake_conversion_ops.h -include/caffe2/operators/fused_rowwise_random_quantization_ops.h -include/caffe2/operators/gather_fused_8bit_rowwise_op.h -include/caffe2/operators/gather_op.h -include/caffe2/operators/gather_ranges_to_dense_op.h -include/caffe2/operators/gelu_op.h -include/caffe2/operators/generate_proposals_op.h -include/caffe2/operators/generate_proposals_op_util_boxes.h -include/caffe2/operators/generate_proposals_op_util_nms.h -include/caffe2/operators/generate_proposals_op_util_nms_gpu.h -include/caffe2/operators/given_tensor_byte_string_to_uint8_fill_op.h -include/caffe2/operators/given_tensor_fill_op.h -include/caffe2/operators/glu_op.h -include/caffe2/operators/group_norm_op.h -include/caffe2/operators/gru_unit_op.h -include/caffe2/operators/h_softmax_op.h -include/caffe2/operators/half_float_ops.h -include/caffe2/operators/hard_sigmoid_op.h -include/caffe2/operators/heatmap_max_keypoint_op.h -include/caffe2/operators/hip/activation_ops_miopen.h -include/caffe2/operators/histogram_op.h -include/caffe2/operators/if_op.h -include/caffe2/operators/im2col_op.h -include/caffe2/operators/index_hash_ops.h -include/caffe2/operators/index_ops.h -include/caffe2/operators/inference_lstm_op.h -include/caffe2/operators/instance_norm_op.h -include/caffe2/operators/integral_image_op.h -include/caffe2/operators/is_empty_op.h -include/caffe2/operators/jsd_op.h -include/caffe2/operators/key_split_ops.h -include/caffe2/operators/layer_norm_op.h -include/caffe2/operators/leaky_relu_op.h -include/caffe2/operators/length_split_op.h -include/caffe2/operators/lengths_pad_op.h -include/caffe2/operators/lengths_reducer_fused_8bit_rowwise_ops.h -include/caffe2/operators/lengths_reducer_fused_nbit_rowwise_ops.h -include/caffe2/operators/lengths_reducer_ops.h -include/caffe2/operators/lengths_reducer_rowwise_8bit_ops.h -include/caffe2/operators/lengths_tile_op.h -include/caffe2/operators/lengths_top_k_op.h -include/caffe2/operators/listwise_l2r_op.h -include/caffe2/operators/load_save_op.h -include/caffe2/operators/load_save_op_util.h -include/caffe2/operators/local_response_normalization_op.h -include/caffe2/operators/locally_connected_op.h -include/caffe2/operators/locally_connected_op_impl.h -include/caffe2/operators/locally_connected_op_util.h -include/caffe2/operators/log1p_op.h -include/caffe2/operators/log_op.h -include/caffe2/operators/logit_op.h -include/caffe2/operators/loss_op.h -include/caffe2/operators/lpnorm_op.h -include/caffe2/operators/lstm_unit_op.h -include/caffe2/operators/lstm_utils.h -include/caffe2/operators/map_ops.h -include/caffe2/operators/margin_loss_l2r_op.h -include/caffe2/operators/margin_ranking_criterion_op.h -include/caffe2/operators/matmul_op.h -include/caffe2/operators/max_pool_with_index_gpu.h -include/caffe2/operators/mean_op.h -include/caffe2/operators/merge_id_lists_op.h -include/caffe2/operators/minmax_ops.h -include/caffe2/operators/mish_op.h -include/caffe2/operators/mod_op.h -include/caffe2/operators/moments_op.h -include/caffe2/operators/multi_class_accuracy_op.h -include/caffe2/operators/negate_gradient_op.h -include/caffe2/operators/negative_op.h -include/caffe2/operators/ngram_ops.h -include/caffe2/operators/no_default_engine_op.h -include/caffe2/operators/normalize_l1_op.h -include/caffe2/operators/normalize_op.h -include/caffe2/operators/numpy_tile_op.h -include/caffe2/operators/one_hot_ops.h -include/caffe2/operators/onnx_while_op.h -include/caffe2/operators/op_utils_cudnn.h -include/caffe2/operators/operator_fallback_gpu.h -include/caffe2/operators/order_switch_ops.h -include/caffe2/operators/pack_rnn_sequence_op.h -include/caffe2/operators/pack_segments.h -include/caffe2/operators/pad_op.h -include/caffe2/operators/partition_ops.h -include/caffe2/operators/percentile_op.h -include/caffe2/operators/perplexity_op.h -include/caffe2/operators/piecewise_linear_transform_op.h -include/caffe2/operators/pool_op.h -include/caffe2/operators/pool_op_util.h -include/caffe2/operators/pow_op.h -include/caffe2/operators/prefetch_op.h -include/caffe2/operators/prelu_op.h -include/caffe2/operators/prepend_dim_op.h -include/caffe2/operators/quant_decode_op.h -include/caffe2/operators/quantile_op.h -include/caffe2/operators/quantized/int8_add_op.h -include/caffe2/operators/quantized/int8_average_pool_op.h -include/caffe2/operators/quantized/int8_channel_shuffle_op.h -include/caffe2/operators/quantized/int8_concat_op.h -include/caffe2/operators/quantized/int8_conv_op.h -include/caffe2/operators/quantized/int8_conv_transpose_op.h -include/caffe2/operators/quantized/int8_dequantize_op.h -include/caffe2/operators/quantized/int8_fc_op.h -include/caffe2/operators/quantized/int8_flatten_op.h -include/caffe2/operators/quantized/int8_given_tensor_fill_op.h -include/caffe2/operators/quantized/int8_leaky_relu_op.h -include/caffe2/operators/quantized/int8_max_pool_op.h -include/caffe2/operators/quantized/int8_quantize_op.h -include/caffe2/operators/quantized/int8_relu_op.h -include/caffe2/operators/quantized/int8_reshape_op.h -include/caffe2/operators/quantized/int8_resize_nearest_op.h -include/caffe2/operators/quantized/int8_roi_align_op.h -include/caffe2/operators/quantized/int8_sigmoid_op.h -include/caffe2/operators/quantized/int8_simd.h -include/caffe2/operators/quantized/int8_slice_op.h -include/caffe2/operators/quantized/int8_softmax_op.h -include/caffe2/operators/quantized/int8_test_utils.h -include/caffe2/operators/quantized/int8_transpose_op.h -include/caffe2/operators/quantized/int8_utils.h -include/caffe2/operators/rank_loss_op.h -include/caffe2/operators/reciprocal_op.h -include/caffe2/operators/reduce_front_back_max_ops.h -include/caffe2/operators/reduce_front_back_sum_mean_ops.h -include/caffe2/operators/reduce_ops.h -include/caffe2/operators/reducer_functors.h -include/caffe2/operators/reduction_ops.h -include/caffe2/operators/relu_n_op.h -include/caffe2/operators/relu_op.h -include/caffe2/operators/remove_data_blocks_op.h -include/caffe2/operators/replace_nan_op.h -include/caffe2/operators/reshape_op.h -include/caffe2/operators/resize_3d_op.h -include/caffe2/operators/resize_op.h -include/caffe2/operators/reverse_packed_segs_op.h -include/caffe2/operators/rmac_regions_op.h -include/caffe2/operators/rms_norm_op.h -include/caffe2/operators/rnn/hip/recurrent_op_miopen.h -include/caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h -include/caffe2/operators/rnn/recurrent_network_executor.h -include/caffe2/operators/rnn/recurrent_network_executor_gpu.h -include/caffe2/operators/rnn/recurrent_network_executor_incl.h -include/caffe2/operators/rnn/recurrent_network_op.h -include/caffe2/operators/rnn/recurrent_op_cudnn.h -include/caffe2/operators/roi_align_gradient_op.h -include/caffe2/operators/roi_align_op.h -include/caffe2/operators/roi_align_rotated_gradient_op.h -include/caffe2/operators/roi_align_rotated_op.h -include/caffe2/operators/roi_pool_op.h -include/caffe2/operators/rowmul_op.h -include/caffe2/operators/rsqrt_op.h -include/caffe2/operators/scale_blobs_op.h -include/caffe2/operators/scale_op.h -include/caffe2/operators/segment_reduction_op.h -include/caffe2/operators/self_binning_histogram_op.h -include/caffe2/operators/selu_op.h -include/caffe2/operators/sequence_ops.h -include/caffe2/operators/shape_op.h -include/caffe2/operators/sigmoid_op.h -include/caffe2/operators/sin_op.h -include/caffe2/operators/sinh_op.h -include/caffe2/operators/sinusoid_position_encoding_op.h -include/caffe2/operators/slice_op.h -include/caffe2/operators/softmax_op.h -include/caffe2/operators/softmax_utils.h -include/caffe2/operators/softmax_with_loss_op.h -include/caffe2/operators/softplus_op.h -include/caffe2/operators/softsign_op.h -include/caffe2/operators/space_batch_op.h -include/caffe2/operators/sparse_dropout_with_replacement_op.h -include/caffe2/operators/sparse_itemwise_dropout_with_replacement_op.h -include/caffe2/operators/sparse_lp_regularizer_op.h -include/caffe2/operators/sparse_normalize_op.h -include/caffe2/operators/sparse_to_dense_mask_op.h -include/caffe2/operators/sparse_to_dense_op.h -include/caffe2/operators/spatial_batch_norm_op.h -include/caffe2/operators/spatial_softmax_with_loss_op.h -include/caffe2/operators/sqr_op.h -include/caffe2/operators/sqrt_op.h -include/caffe2/operators/square_root_divide_op.h -include/caffe2/operators/stats_put_ops.h -include/caffe2/operators/stop_gradient.h -include/caffe2/operators/string_ops.h -include/caffe2/operators/stump_func_op.h -include/caffe2/operators/summarize_op.h -include/caffe2/operators/swish_op.h -include/caffe2/operators/tan_op.h -include/caffe2/operators/tanh_op.h -include/caffe2/operators/tensor_protos_db_input.h -include/caffe2/operators/text_file_reader_utils.h -include/caffe2/operators/thresholded_relu_op.h -include/caffe2/operators/tile_op.h -include/caffe2/operators/top_k.h -include/caffe2/operators/transpose_op.h -include/caffe2/operators/tt_linear_op.h -include/caffe2/operators/unique_ops.h -include/caffe2/operators/unsafe_coalesce.h -include/caffe2/operators/upsample_op.h -include/caffe2/operators/utility_ops.h -include/caffe2/operators/variable_length_sequence_padding.h -include/caffe2/operators/weighted_multi_sampling_op.h -include/caffe2/operators/weighted_sample_op.h -include/caffe2/operators/while_op.h -include/caffe2/operators/zero_gradient_op.h -include/caffe2/opt/annotations.h -include/caffe2/opt/backend_cutting.h -include/caffe2/opt/backend_transformer_base.h -include/caffe2/opt/bound_shape_inferencer.h -include/caffe2/opt/converter.h -include/caffe2/opt/custom/cc_amrc.h -include/caffe2/opt/custom/concat_elim.h -include/caffe2/opt/custom/freeze_quantization_params.h -include/caffe2/opt/custom/in_batch_broadcast.h -include/caffe2/opt/custom/pointwise_elim.h -include/caffe2/opt/device.h -include/caffe2/opt/distributed.h -include/caffe2/opt/fakefp16_transform.h -include/caffe2/opt/fusion.h -include/caffe2/opt/glow_net_transform.h -include/caffe2/opt/mobile.h -include/caffe2/opt/nql/ast.h -include/caffe2/opt/nql/graphmatcher.h -include/caffe2/opt/onnx_convert.h -include/caffe2/opt/onnxifi_op.h -include/caffe2/opt/onnxifi_transformer.h -include/caffe2/opt/optimize_ideep.h -include/caffe2/opt/optimizer.h -include/caffe2/opt/passes.h -include/caffe2/opt/shape_info.h -include/caffe2/opt/tvm_transformer.h include/caffe2/perfkernels/adagrad.h include/caffe2/perfkernels/batch_box_cox.h include/caffe2/perfkernels/common.h @@ -8161,81 +7752,6 @@ include/caffe2/perfkernels/lstm_unit_cpu_common.h include/caffe2/perfkernels/math.h include/caffe2/perfkernels/typed_axpy.h include/caffe2/perfkernels/vectorizer.h -include/caffe2/predictor/InferenceGraph.h -include/caffe2/predictor/ThreadLocalPtr.h -include/caffe2/predictor/emulator/benchmark.h -include/caffe2/predictor/emulator/data_filler.h -include/caffe2/predictor/emulator/emulator.h -include/caffe2/predictor/emulator/net_supplier.h -include/caffe2/predictor/emulator/output_formatter.h -include/caffe2/predictor/emulator/profiler.h -include/caffe2/predictor/emulator/std_output_formatter.h -include/caffe2/predictor/emulator/time_profiler.h -include/caffe2/predictor/emulator/utils.h -include/caffe2/predictor/predictor.h -include/caffe2/predictor/predictor_config.h -include/caffe2/predictor/predictor_utils.h -include/caffe2/predictor/transforms.h -include/caffe2/proto/caffe2.pb.h -include/caffe2/proto/caffe2_pb.h -include/caffe2/proto/torch.pb.h -include/caffe2/python/dlpack.h -include/caffe2/python/pybind_state.h -include/caffe2/python/pybind_state_dlpack.h -include/caffe2/python/pybind_state_registry.h -include/caffe2/python/pybind_workspace.h -include/caffe2/quantization/server/activation_distribution_observer.h -include/caffe2/quantization/server/batch_matmul_dnnlowp_op.h -include/caffe2/quantization/server/batch_permutation_dnnlowp_op.h -include/caffe2/quantization/server/caffe2_dnnlowp_utils.h -include/caffe2/quantization/server/channel_shuffle_dnnlowp_op.h -include/caffe2/quantization/server/compute_equalization_scale.h -include/caffe2/quantization/server/concat_dnnlowp_op.h -include/caffe2/quantization/server/conv_dnnlowp_acc16_op.h -include/caffe2/quantization/server/conv_dnnlowp_op.h -include/caffe2/quantization/server/conv_pool_dnnlowp_op_base.h -include/caffe2/quantization/server/conv_relu_op.h -include/caffe2/quantization/server/dequantize_dnnlowp_op.h -include/caffe2/quantization/server/dnnlowp.h -include/caffe2/quantization/server/dnnlowp_op.h -include/caffe2/quantization/server/dnnlowp_partition.h -include/caffe2/quantization/server/dynamic_histogram.h -include/caffe2/quantization/server/elementwise_dnnlowp_op.h -include/caffe2/quantization/server/elementwise_linear_dnnlowp_op.h -include/caffe2/quantization/server/fb_fc_packed_op.h -include/caffe2/quantization/server/fbgemm_fp16_pack_op.h -include/caffe2/quantization/server/fbgemm_pack_blob.h -include/caffe2/quantization/server/fbgemm_pack_matrix_cache.h -include/caffe2/quantization/server/fbgemm_pack_op.h -include/caffe2/quantization/server/fully_connected_dnnlowp_acc16_op.h -include/caffe2/quantization/server/fully_connected_dnnlowp_op.h -include/caffe2/quantization/server/fully_connected_fake_lowp_op.h -include/caffe2/quantization/server/group_norm_dnnlowp_op.h -include/caffe2/quantization/server/im2col_dnnlowp.h -include/caffe2/quantization/server/int8_gen_quant_params.h -include/caffe2/quantization/server/int8_gen_quant_params_min_max.h -include/caffe2/quantization/server/int8_quant_scheme_blob_fill.h -include/caffe2/quantization/server/kl_minimization.h -include/caffe2/quantization/server/l2_minimization.h -include/caffe2/quantization/server/lstm_unit_dnnlowp_op.h -include/caffe2/quantization/server/mmio.h -include/caffe2/quantization/server/op_wrapper.h -include/caffe2/quantization/server/pool_dnnlowp_op_avx2.h -include/caffe2/quantization/server/quantization_error_minimization.h -include/caffe2/quantization/server/quantize_dnnlowp_op.h -include/caffe2/quantization/server/relu_dnnlowp_op.h -include/caffe2/quantization/server/resize_nearest_3d_dnnlowp_op.h -include/caffe2/quantization/server/resize_nearest_dnnlowp_op.h -include/caffe2/quantization/server/sigmoid.h -include/caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.h -include/caffe2/quantization/server/tanh.h -include/caffe2/quantization/server/transpose.h -include/caffe2/quantization/server/utility_dnnlowp_ops.h -include/caffe2/queue/blobs_queue.h -include/caffe2/queue/blobs_queue_db.h -include/caffe2/queue/queue_ops.h -include/caffe2/queue/rebatching_queue.h -include/caffe2/queue/rebatching_queue_ops.h include/caffe2/serialize/crc_alt.h include/caffe2/serialize/file_adapter.h include/caffe2/serialize/in_memory_adapter.h @@ -8243,61 +7759,9 @@ include/caffe2/serialize/inline_container.h include/caffe2/serialize/istream_adapter.h include/caffe2/serialize/read_adapter_interface.h include/caffe2/serialize/versions.h -include/caffe2/sgd/adadelta_op.h -include/caffe2/sgd/adagrad_fused.h -include/caffe2/sgd/adagrad_op.h -include/caffe2/sgd/adam_op.h -include/caffe2/sgd/clip_tensor_op.h -include/caffe2/sgd/decay_adagrad_op.h -include/caffe2/sgd/fp16_momentum_sgd_op.h -include/caffe2/sgd/fp32_momentum_sgd_op.h -include/caffe2/sgd/ftrl_op.h -include/caffe2/sgd/gftrl_op.h -include/caffe2/sgd/iter_op.h -include/caffe2/sgd/lars_op.h -include/caffe2/sgd/learning_rate_adaption_op.h -include/caffe2/sgd/learning_rate_functors.h -include/caffe2/sgd/learning_rate_op.h -include/caffe2/sgd/math_lp.h -include/caffe2/sgd/momentum_sgd_op.h -include/caffe2/sgd/rmsprop_op.h -include/caffe2/sgd/rowwise_adagrad_fused.h -include/caffe2/sgd/rowwise_counter.h -include/caffe2/sgd/storm_op.h -include/caffe2/sgd/weight_scale_op.h -include/caffe2/sgd/wngrad_op.h -include/caffe2/sgd/yellowfin_op.h -include/caffe2/share/contrib/zstd/quant_decomp_zstd_op.h -include/caffe2/transforms/common_subexpression_elimination.h -include/caffe2/transforms/conv_to_nnpack_transform.h -include/caffe2/transforms/pattern_net_transform.h -include/caffe2/transforms/single_op_transform.h -include/caffe2/utils/bench_utils.h -include/caffe2/utils/cast.h -include/caffe2/utils/cblas.h include/caffe2/utils/conversions.h -include/caffe2/utils/cpu_neon.h -include/caffe2/utils/cpuid.h -include/caffe2/utils/eigen_utils.h -include/caffe2/utils/filler.h include/caffe2/utils/fixed_divisor.h -include/caffe2/utils/knob_patcher.h -include/caffe2/utils/knobs.h -include/caffe2/utils/map_utils.h -include/caffe2/utils/math-detail.h -include/caffe2/utils/math.h -include/caffe2/utils/math/broadcast.h -include/caffe2/utils/math/elementwise.h -include/caffe2/utils/math/half_utils.h -include/caffe2/utils/math/reduce.h -include/caffe2/utils/math/transpose.h -include/caffe2/utils/math/utils.h -include/caffe2/utils/murmur_hash3.h -include/caffe2/utils/proto_utils.h include/caffe2/utils/proto_wrap.h -include/caffe2/utils/signal_handler.h -include/caffe2/utils/simple_queue.h -include/caffe2/utils/smart_tensor_printer.h include/caffe2/utils/string_utils.h include/caffe2/utils/threadpool/ThreadPool.h include/caffe2/utils/threadpool/ThreadPoolCommon.h @@ -8305,11 +7769,6 @@ include/caffe2/utils/threadpool/WorkersPool.h include/caffe2/utils/threadpool/pthreadpool-cpp.h include/caffe2/utils/threadpool/pthreadpool.h include/caffe2/utils/threadpool/thread_pool_guard.h -include/caffe2/utils/zmq_helper.h -include/caffe2/video/optical_flow.h -include/caffe2/video/video_decoder.h -include/caffe2/video/video_input_op.h -include/caffe2/video/video_io.h include/cpuinfo.h %%AMD64%%include/dnnl.h %%AMD64%%include/dnnl.hpp @@ -8346,6 +7805,7 @@ include/kineto/LoggingAPI.h include/kineto/ThreadUtil.h include/kineto/TraceSpan.h include/kineto/libkineto.h +include/kineto/output_base.h include/kineto/time_since_epoch.h include/libshm.h %%AMD64%%include/oneapi/dnnl/dnnl.h @@ -8377,6 +7837,7 @@ include/torch/csrc/DataLoader.h include/torch/csrc/Device.h include/torch/csrc/Dtype.h include/torch/csrc/DynamicTypes.h +include/torch/csrc/Event.h include/torch/csrc/Exceptions.h include/torch/csrc/Export.h include/torch/csrc/Generator.h @@ -8658,6 +8119,7 @@ include/torch/csrc/distributed/c10d/NCCLUtils.hpp include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp include/torch/csrc/distributed/c10d/PrefixStore.hpp include/torch/csrc/distributed/c10d/ProcessGroup.hpp +include/torch/csrc/distributed/c10d/ProcessGroupCudaP2P.hpp include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp @@ -8679,6 +8141,10 @@ include/torch/csrc/distributed/c10d/WinSockUtils.hpp include/torch/csrc/distributed/c10d/Work.hpp include/torch/csrc/distributed/c10d/c10d.h include/torch/csrc/distributed/c10d/comm.hpp +include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp +include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp +include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp +include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp include/torch/csrc/distributed/c10d/debug.h include/torch/csrc/distributed/c10d/default_comm_hooks.hpp include/torch/csrc/distributed/c10d/error.h @@ -8738,6 +8204,8 @@ include/torch/csrc/dynamo/init.h include/torch/csrc/dynamo/python_compiled_autograd.h include/torch/csrc/dynamo/utils.h include/torch/csrc/functorch/init.h +include/torch/csrc/inductor/aoti_eager/kernel_holder.h +include/torch/csrc/inductor/aoti_eager/kernel_meta_info.h include/torch/csrc/inductor/aoti_runner/model_container_runner.h include/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h include/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h @@ -8754,6 +8222,7 @@ include/torch/csrc/inductor/aoti_runtime/utils_cuda.h include/torch/csrc/inductor/aoti_torch/c/shim.h include/torch/csrc/inductor/aoti_torch/generated/c_shim_cpu.h include/torch/csrc/inductor/aoti_torch/generated/c_shim_cuda.h +include/torch/csrc/inductor/aoti_torch/mkldnn_tensor.h include/torch/csrc/inductor/aoti_torch/proxy_executor.h include/torch/csrc/inductor/aoti_torch/tensor_converter.h include/torch/csrc/inductor/aoti_torch/utils.h @@ -9097,7 +8566,6 @@ include/torch/csrc/jit/serialization/import.h include/torch/csrc/jit/serialization/import_export_constants.h include/torch/csrc/jit/serialization/import_export_functions.h include/torch/csrc/jit/serialization/import_export_helpers.h -include/torch/csrc/jit/serialization/import_legacy.h include/torch/csrc/jit/serialization/import_read.h include/torch/csrc/jit/serialization/import_source.h include/torch/csrc/jit/serialization/mobile_bytecode_generated.h @@ -9217,6 +8685,7 @@ include/torch/csrc/monitor/counters.h include/torch/csrc/monitor/events.h include/torch/csrc/monitor/python_init.h include/torch/csrc/mps/Module.h +include/torch/csrc/mtia/Module.h include/torch/csrc/multiprocessing/init.h include/torch/csrc/onnx/back_compat.h include/torch/csrc/onnx/diagnostics/diagnostics.h @@ -9241,13 +8710,21 @@ include/torch/csrc/profiler/python/pybind.h include/torch/csrc/profiler/standalone/execution_trace_observer.h include/torch/csrc/profiler/standalone/itt_observer.h include/torch/csrc/profiler/standalone/nvtx_observer.h +include/torch/csrc/profiler/standalone/privateuse1_observer.h include/torch/csrc/profiler/stubs/base.h include/torch/csrc/profiler/unwind/action.h include/torch/csrc/profiler/unwind/communicate.h +include/torch/csrc/profiler/unwind/debug_info.h include/torch/csrc/profiler/unwind/dwarf_enums.h +include/torch/csrc/profiler/unwind/dwarf_symbolize_enums.h include/torch/csrc/profiler/unwind/eh_frame_hdr.h +include/torch/csrc/profiler/unwind/fast_symbolizer.h include/torch/csrc/profiler/unwind/fde.h include/torch/csrc/profiler/unwind/lexer.h +include/torch/csrc/profiler/unwind/line_number_program.h +include/torch/csrc/profiler/unwind/mem_file.h +include/torch/csrc/profiler/unwind/range_table.h +include/torch/csrc/profiler/unwind/sections.h include/torch/csrc/profiler/unwind/unwind.h include/torch/csrc/profiler/unwind/unwind_error.h include/torch/csrc/profiler/unwind/unwinder.h @@ -9309,13 +8786,13 @@ include/torch/custom_class_detail.h include/torch/extension.h include/torch/library.h include/torch/script.h -%%AMD64%%lib/cmake/dnnl/dnnl-config-version.cmake -%%AMD64%%lib/cmake/dnnl/dnnl-config.cmake -%%AMD64%%lib/cmake/dnnl/dnnl-targets-%%CMAKE_BUILD_TYPE%%.cmake -%%AMD64%%lib/cmake/dnnl/dnnl-targets.cmake -%%AMD64%%lib/libCaffe2_perfkernels_avx.a -%%AMD64%%lib/libCaffe2_perfkernels_avx2.a -%%AMD64%%lib/libCaffe2_perfkernels_avx512.a +lib/cmake/dnnl/dnnl-config-version.cmake +lib/cmake/dnnl/dnnl-config.cmake +lib/cmake/dnnl/dnnl-targets-%%CMAKE_BUILD_TYPE%%.cmake +lib/cmake/dnnl/dnnl-targets.cmake +lib/libCaffe2_perfkernels_avx.a +lib/libCaffe2_perfkernels_avx2.a +lib/libCaffe2_perfkernels_avx512.a lib/libc10.so lib/libcpuinfo.a %%AMD64%%lib/libdnnl.a @@ -9325,8 +8802,6 @@ lib/libtorch.so lib/libtorch_cpu.so lib/libtorch_global_deps.so lib/libtorch_python.so -%%PYTHON_SITELIBDIR%%/caffe2/proto/caffe2_pb2.py -%%PYTHON_SITELIBDIR%%/caffe2/proto/torch_pb2.py libdata/pkgconfig/libcpuinfo.pc share/ATen/Declarations.yaml share/cmake/ATen/ATenConfig.cmake