codegen/native_functions.yaml in torch-rb-0.9.2 vs codegen/native_functions.yaml in torch-rb-0.10.0
- old
+ new
@@ -98,14 +98,53 @@
dispatch:
CompositeExplicitAutograd: _fw_primal
- func: _make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)
variants: function
+ dispatch:
+ CompositeExplicitAutograd: _make_dual
- func: _unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)
variants: function
+# NOTE: [_new_zeros_with_same_feature_meta]
+# This function creates a new tensor with the layout and TensorOptions
+# of `other` but also takes into account the batch dimensions of `self`
+#
+# This function has a couple extra constraints because it is also used for `jvp`
+# in functorch.
+# - is used for forward AD because there is the restriction
+# that the primal and tangent must have the same layout
+# - We cannot assume that `self` and `other` have the same sizes or even dim
+# because in the inplace over view case, `other` is the base tensor, and
+# `self` is the forward grad with respect to the view, which can have an
+# entirely different shape
+# - takes the number of batch dims for `self` because we also handle
+# some batching logic. We handle that here instead of a batching rule because
+# we'd like to avoid calling as_strided in the batching rule (as to enable
+# nested vmap in functorch).
+# - needs to be CompositeExplicitAutograd for jvp support in functorch.
+# functorch currently relies on TensorWrapper which does not have storage
+# CompositeExplicitAutograd makes sure the TensorWrapper is unwrapped.
+# - this function may eventually take on another int argument to store the
+# the number of batch dims for other once we support that use case
+- func: _new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor
+ variants: function
+ dispatch:
+ CompositeExplicitAutograd: _new_zeros_with_same_feature_meta
+
+# This function compares the storage numel of self with that of other, where
+# storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`.
+# We create this function for composite compliance purposes. The batching rule
+# always returns true because vmapped as_strided does not support accessing
+# storage locations not indexable by the input tensor.
+# See the note above for more information.
+- func: _has_same_storage_numel(Tensor self, Tensor other) -> bool
+ variants: function
+ dispatch:
+ CompositeExplicitAutograd: _has_same_storage_numel
+
- func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
variants: method
- func: rename(Tensor(a) self, Dimname[]? names) -> Tensor(a)
variants: method
@@ -174,10 +213,21 @@
- func: _masked_scale(Tensor self, Tensor mask, float scale) -> Tensor
variants: function
dispatch:
CUDA: masked_scale_cuda
+- func: native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor)
+ variants: function
+ dispatch:
+ CPU: native_dropout_cpu
+ CUDA: native_dropout_cuda
+
+- func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
+ dispatch:
+ CPU: native_dropout_backward_cpu
+ CUDA: native_dropout_backward_cuda
+
- func: _sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)
- func: _sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)
- func: _sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!)
@@ -207,21 +257,27 @@
- func: abs(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: abs
+ SparseCPU, SparseCUDA: abs_sparse
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
- func: abs_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: abs_
+ SparseCPU, SparseCUDA: abs_sparse_
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: abs_out
+ SparseCPU, SparseCUDA: abs_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
# Note [Adding an alias]
# To add an alias do the following:
#
# 1) Copy the original functions native_functions.yaml entry, but replace the
@@ -229,22 +285,19 @@
# keys for the aliases. Specifying a dispatch key will prevent
# autograd from recording the operations the alias performs, which
# will stop it from "inheriting" the original operation's autograd behavior.
# 2) Implement the corresponding functions and have them redispatch to the
# original function.
-# 3) Add entries for the alias (and original function, if needed) to
-# aten/src/ATen/core/interned_strings.h
-# (This may require removing an entry from ATen/core/aten_interned_strings.h.)
-# 4) Add docstrings to the new function that reference the original function,
+# 3) Add docstrings to the new function that reference the original function,
# and document the method as usual (if it exists.)
# (See torch/_torch_docs.py and docs/source/torch.rst if adding a function,
# torch/_tensor_docs.py and docs/source/tensors.rst if adding a method,
# or module-specific doc bindings (like torch/linalg/__init__.py) if
# adding an alias in a namespace.)
-# 5) Update torch/overrides.py consistent with the original function.
-# 6) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
-# 7) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry
+# 4) Update torch/overrides.py consistent with the original function.
+# 5) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
+# 6) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry
# in op_db list in torch/testing/_internal/common_methods_invocations.py
#
# See torch.absolute, an alias for torch.abs, as an example.
# Absolute, alias for abs
@@ -262,15 +315,17 @@
- func: angle(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CPU, CUDA: angle
+ SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
- func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: angle_out
+ SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
- func: view_as_real(Tensor(a) self) -> Tensor(a)
variants: function
dispatch:
CPU, CUDA: view_as_real
@@ -281,20 +336,28 @@
CPU, CUDA: view_as_complex
- func: sgn(Tensor self) -> Tensor
variants: function, method
structured_delegate: sgn.out
+ dispatch:
+ SparseCPU, SparseCUDA: sgn_sparse
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
variants: method
structured_delegate: sgn.out
+ dispatch:
+ SparseCPU, SparseCUDA: sgn_sparse_
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sgn_out
+ SparseCPU, SparseCUDA: sgn_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
- func: real(Tensor(a) self) -> Tensor(a)
device_check: NoCheck # TensorIterator
variants: function
@@ -313,23 +376,26 @@
- func: _conj_physical(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: _conj_physical
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
- func: conj_physical(Tensor self) -> Tensor
variants: function, method
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, CUDA: conj_physical_out
SparseCPU, SparseCUDA: conj_physical_out_sparse
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
- func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
dispatch:
CompositeExplicitAutograd: conj_physical_
+ SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
- func: resolve_conj(Tensor(a) self) -> Tensor(a)
variants: function, method
- func: resolve_neg(Tensor(a) self) -> Tensor(a)
@@ -379,10 +445,11 @@
variants: function, method
dispatch:
SparseCPU, SparseCUDA: add_sparse
SparseCsrCPU, SparseCsrCUDA: add_sparse_csr
MkldnnCPU: mkldnn_add
+ ZeroTensor: add_zerotensor
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
structured_delegate: add.out
@@ -452,10 +519,12 @@
- func: addmv.out(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
CPU: addmv_out_cpu
CUDA: addmv_out_cuda
+ SparseCsrCPU: addmv_out_sparse_csr
+ SparseCsrCUDA: addmv_out_sparse_csr_cuda
- func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
variants: function, method
dispatch:
CPU, CUDA: addr
@@ -530,11 +599,11 @@
- func: arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
- func: arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: arange_cpu_out
+ CPU, Meta: arange_out
CUDA: arange_cuda_out
# This function is a temporary hack to allow tracing of arange like constructs with dynamic
# bounds on arange. Normal arange is not traceable because it does not take any tensor inputs;
# if the range you need is based on another tensor, calling this function directly will
@@ -586,20 +655,28 @@
- func: arccosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- func: asinh(Tensor self) -> Tensor
variants: function, method
structured_delegate: asinh.out
+ dispatch:
+ SparseCPU, SparseCUDA: asinh_sparse
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
- func: asinh_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
structured_delegate: asinh.out
+ dispatch:
+ SparseCPU, SparseCUDA: asinh_sparse_
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
- func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: asinh_out
+ SparseCPU, SparseCUDA: asinh_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
# arcsinh, alias for asinh
- func: arcsinh(Tensor self) -> Tensor
variants: function, method
@@ -609,20 +686,29 @@
- func: arcsinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- func: atanh(Tensor self) -> Tensor
structured_delegate: atanh.out
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: atanh
+ SparseCPU, SparseCUDA: atanh_sparse
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
- func: atanh_(Tensor(a!) self) -> Tensor(a!)
structured_delegate: atanh.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: atanh_sparse_
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
- func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: atanh_out
+ SparseCPU, SparseCUDA: atanh_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
# arctanh, alias for atanh
- func: arctanh(Tensor self) -> Tensor
variants: function, method
@@ -632,44 +718,48 @@
- func: arctanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a)
variants: function, method
dispatch:
- CPU, CUDA, Meta: as_strided_tensorimpl
+ ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl
QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
device_check: NoCheck
device_guard: False
- func: as_strided_(Tensor(a!) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a!)
use_const_ref_for_mutable_tensors: True
variants: function, method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: as_strided_
- func: asin(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: asin.out
dispatch:
SparseCPU, SparseCUDA: asin_sparse
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
- func: asin_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: asin.out
dispatch:
SparseCPU, SparseCUDA: asin_sparse_
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
- func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: asin_out
- SparseCPU, SparseCUDA: asin_out_sparse
+ SparseCPU, SparseCUDA: asin_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
# arcsin, alias of asin
- func: arcsin(Tensor self) -> Tensor
variants: function, method
@@ -680,22 +770,30 @@
- func: atan(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: atan.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: atan_sparse
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
- func: atan_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: atan.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: atan_sparse_
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
- func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: atan_out
+ SparseCPU, SparseCUDA: atan_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
# arctan, alias of atan
- func: arctan(Tensor self) -> Tensor
variants: function, method
@@ -721,28 +819,23 @@
- func: atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]
variants: function
- func: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
variants: function, method
- dispatch:
- CPU: baddbmm_cpu
- CUDA: baddbmm_cuda
+ structured_delegate: baddbmm.out
- func: baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
variants: method
- dispatch:
- CPU: baddbmm__cpu
- CUDA: baddbmm__cuda
+ structured_delegate: baddbmm.out
-- func: _baddbmm_mkl_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
- variants: function
-
- func: baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+ structured: True
variants: function
dispatch:
CPU: baddbmm_out_cpu
CUDA: baddbmm_out_cuda
+ SparseCsrCUDA: baddbmm_out_sparse_csr_cuda
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: bartlett_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -786,11 +879,11 @@
# with `bernoulli(Tensor self, *, Generator? generator=None)` declaration.
- func: bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
-- func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias) -> Tensor
+- func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor
- func: binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
device_check: NoCheck # TensorIterator
python_module: nn
variants: function
@@ -884,53 +977,69 @@
CompositeExplicitAutograd: copysign_out
- func: logical_not(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: logical_not
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: logical_not_
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: logical_not_out
- func: logical_xor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: logical_xor
- func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: logical_xor_
- func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: logical_xor_out
- func: logical_and(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: logical_and
- func: logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: logical_and_
- func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: logical_and_out
- func: logical_or(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: logical_or
- func: logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: logical_or_
- func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: logical_or_out
@@ -938,32 +1047,38 @@
- func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: bmm(Tensor self, Tensor mat2) -> Tensor
+ structured_delegate: bmm.out
variants: function, method
dispatch:
- CPU: bmm_cpu
- CUDA: bmm_cuda
SparseCPU: bmm_sparse_cpu
SparseCUDA: bmm_sparse_cuda
- func: bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
+ structured: True
variants: function
dispatch:
CPU: bmm_out_cpu
CUDA: bmm_out_cuda
SparseCPU: bmm_out_sparse_cpu
SparseCUDA: bmm_out_sparse_cuda
+ SparseCsrCUDA: bmm_out_sparse_csr_cuda
- func: broadcast_tensors(Tensor[] tensors) -> Tensor[]
device_check: NoCheck
device_guard: False
- func: broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
variants: function, method
+- func: _sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a)
+ variants: function
+ dispatch:
+ SparseCPU, SparseCUDA: sparse_broadcast_to
+
- func: cat(Tensor[] tensors, int dim=0) -> Tensor
dispatch:
CompositeExplicitAutograd: cat
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
@@ -990,24 +1105,30 @@
device_check: NoCheck # TensorIterator
structured_delegate: ceil.out
variants: function, method
dispatch:
CompositeExplicitAutograd: ceil
+ SparseCPU, SparseCUDA: ceil_sparse
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
- func: ceil_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: ceil.out
variants: function, method
dispatch:
CompositeExplicitAutograd: ceil_
+ SparseCPU, SparseCUDA: ceil_sparse_
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
- func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: ceil_out
+ SparseCPU, SparseCUDA: ceil_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
# alias for torch.linalg.multi_dot
- func: chain_matmul(Tensor[] matrices) -> Tensor
variants: function
@@ -1017,22 +1138,22 @@
- func: unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[]
variants: function, method
device_check: NoCheck
device_guard: False
-- func: chunk(Tensor(a) self, int chunks, int dim=0) -> Tensor(a)[]
+- func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[]
variants: function, method
device_check: NoCheck
device_guard: False
-- func: tensor_split.sections(Tensor(a) self, int sections, int dim=0) -> Tensor(a)[]
+- func: tensor_split.sections(Tensor(a -> *) self, int sections, int dim=0) -> Tensor(a)[]
variants: function, method
-- func: tensor_split.indices(Tensor(a) self, int[] indices, int dim=0) -> Tensor(a)[]
+- func: tensor_split.indices(Tensor(a -> *) self, int[] indices, int dim=0) -> Tensor(a)[]
variants: function, method
-- func: tensor_split.tensor_indices_or_sections(Tensor(a) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]
+- func: tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[]
variants: function, method
- func: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
@@ -1184,29 +1305,35 @@
- func: contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)
variants: method
manual_cpp_binding: True
- func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: convolution
+- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, int[]? bias_sizes, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+ dispatch:
+ CompositeExplicitAutograd, CUDA: convolution_backward
+
- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
dispatch:
CompositeExplicitAutograd: convolution_overrideable
- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
dispatch:
CompositeExplicitAutograd: convolution_backward_overrideable
- func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: _convolution
- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
-- func: _convolution_nogroup(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding) -> Tensor
+- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
-- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
-
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
@@ -1237,11 +1364,13 @@
variants: method
device_check: NoCheck
device_guard: False
dispatch:
MkldnnCPU: copy_mkldnn_
+ SparseCPU, SparseCUDA, SparseHIP: copy_sparse_wrapper_
CompositeExplicitAutograd: copy_
+ SparseCsrCPU, SparseCsrCUDA: copy_sparse_csr_
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
dispatch: {}
# We need this to be able to properly copy from a CPU to an XLA tensor with different sizes.
@@ -1318,60 +1447,18 @@
# NB: You can only use this if you used cudnn_batch_norm training=True
- func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
dispatch:
CUDA: cudnn_batch_norm_backward
-- func: cudnn_convolution.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_deprecated
-
-- func: cudnn_convolution.deprecated2(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_deprecated2
-
- func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
dispatch:
CUDA: cudnn_convolution
-- func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_backward_input
-
-- func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
- dispatch:
- CUDA: cudnn_convolution_backward
-
-- func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_backward_weight
-
-- func: cudnn_convolution_transpose.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_transpose_deprecated
-
-- func: cudnn_convolution_transpose.deprecated2(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_transpose_deprecated2
-
- func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
dispatch:
CUDA: cudnn_convolution_transpose
-# NB: output_padding not strictly needed here, but it's helpful for the float
-# backwards
-- func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
- dispatch:
- CUDA: cudnn_convolution_transpose_backward
-
-- func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_transpose_backward_input
-
-- func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
- dispatch:
- CUDA: cudnn_convolution_transpose_backward_weight
-
- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
dispatch:
CUDA: cudnn_convolution_relu
- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
@@ -1514,19 +1601,25 @@
CPU: ctc_loss_backward_cpu
CUDA: ctc_loss_backward_gpu
- func: diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: diag_embed
- func: diagflat(Tensor self, int offset=0) -> Tensor
variants: function, method
- func: diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)
variants: function, method
dispatch:
CompositeExplicitAutograd: diagonal
+- func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
+ python_module: linalg
+ variants: function
+
- func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)
variants: function, method
- func: diagonal_backward(Tensor grad_output, int[] input_sizes, int offset, int dim1, int dim2) -> Tensor
variants: function
@@ -1569,10 +1662,11 @@
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: div.out
dispatch:
SparseCPU, SparseCUDA: div_sparse
+ ZeroTensor: div_zerotensor
- func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
structured_delegate: div.out
@@ -1779,16 +1873,23 @@
CPU: empty_cpu
CUDA: empty_cuda
Meta: empty_meta
MkldnnCPU: empty_mkldnn
SparseCPU, SparseCUDA: empty_sparse
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_csr
+# We do not make new_empty a composite that calls into new_empty_strided, as the strided version
+# is significantly more difficult to implement by different backends
- func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
variants: method
+ dispatch:
+ CompositeExplicitAutograd: new_empty
- func: new_empty_strided(Tensor self, int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
variants: method
+ dispatch:
+ CompositeExplicitAutograd: new_empty_strided
- func: new_full(Tensor self, int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
variants: method
- func: new_zeros(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -1818,10 +1919,11 @@
device_guard: False
dispatch:
CPU, Meta: resize_
CUDA: resize_cuda_
QuantizedCPU: quantized_resize_cpu_
+ SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
- func: empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
category_override: factory
variants: function
dispatch:
@@ -1832,10 +1934,14 @@
device_guard: False
- func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
device_check: NoCheck
device_guard: False
+ dispatch:
+ CompositeExplicitAutograd: empty_like
+ SparseCPU, SparseCUDA: empty_like_sparse_coo
+ SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr
- func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
CPU: empty_strided_cpu
CUDA: empty_strided_cuda
@@ -1843,22 +1949,30 @@
- func: erf(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: erf.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: erf_sparse
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
- func: erf_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: erf.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: erf_sparse_
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
- func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: erf_out
+ SparseCPU, SparseCUDA: erf_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
- func: erfc(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: erfc.out
variants: function, method
@@ -1908,22 +2022,30 @@
- func: expm1(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: expm1.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: expm1_sparse
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
- func: expm1_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: expm1.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: expm1_sparse_
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
- func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: expm1_out
+ SparseCPU, SparseCUDA: expm1_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
- func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
device_check: NoCheck
device_guard: False
@@ -1969,40 +2091,48 @@
- func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_
+ CPU, CUDA: fill_
+ QuantizedCPU, QuantizedCUDA: fill_quantized_
Meta: fill_meta_
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_
+ CPU, CUDA: fill_
+ QuantizedCPU, QuantizedCUDA: fill_quantized_
Meta: fill_meta_
- func: floor(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: floor.out
variants: function, method
dispatch:
CompositeExplicitAutograd: floor
+ SparseCPU, SparseCUDA: floor_sparse
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
- func: floor_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: floor.out
variants: function, method
dispatch:
CompositeExplicitAutograd: floor_
+ SparseCPU, SparseCUDA: floor_sparse_
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
- func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: floor_out
+ SparseCPU, SparseCUDA: floor_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
- func: floor_divide(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
@@ -2106,14 +2236,17 @@
# `align_corners = True`.
- func: grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
- func: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
dispatch:
- CPU: grid_sampler_2d_cpu
+ CPU, QuantizedCPU: grid_sampler_2d_cpu
CUDA: grid_sampler_2d_cuda
-- func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
+# `grid_sampler_2d_backward` takes in `output_mask` to optimize performance for
+# the case where `input` doesn't require gradient. Gradient for `grid` is always
+# computed (only `output_mask[0]` is checked by the implementations).
+- func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor)
dispatch:
CPU: grid_sampler_2d_backward_cpu
CUDA: grid_sampler_2d_backward_cuda
# See NOTE [ grid_sample CPU fallback ]
@@ -2227,10 +2360,12 @@
dispatch:
CompositeExplicitAutograd: index_copy_
- func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: index_copy
- func: index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!)
variants: method
- func: index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor
@@ -2248,10 +2383,12 @@
# - Tensor & Tensor::index_put_(std::initializer_list<TensorIndex> indices, Scalar v)
- func: index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
device_check: NoCheck # delegate to _index_put_impl_ after clone, which leverages TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: index_put
- func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function
dispatch:
@@ -2267,16 +2404,10 @@
- func: inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CompositeExplicitAutograd: inverse_out
-- func: _inverse_helper(Tensor self) -> Tensor
- variants: function
- dispatch:
- CPU: _inverse_helper_cpu
- CUDA: _inverse_helper_cuda
-
- func: isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor
variants: function, method
- func: isin.Tensor_Tensor_out(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!)
variants: function
@@ -2313,10 +2444,11 @@
device_check: NoCheck
device_guard: False
dispatch:
CPU, CUDA: isnan
SparseCPU, SparseCUDA: isnan_sparse
+ SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
- func: is_distributed(Tensor self) -> bool
variants: function, method
device_check: NoCheck
device_guard: False
@@ -2336,10 +2468,15 @@
- func: is_conj(Tensor self) -> bool
variants: function, method
device_guard: False
manual_cpp_binding: True
+- func: _is_zerotensor(Tensor self) -> bool
+ variants: function, method
+ device_guard: False
+ manual_cpp_binding: True
+
- func: is_neg(Tensor self) -> bool
variants: function, method
device_guard: False
manual_cpp_binding: True
@@ -2403,28 +2540,36 @@
dispatch:
CPU: layer_norm_cpu
CUDA: layer_norm_cuda
CompositeImplicitAutograd: math_native_layer_norm
+- func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor
+ dispatch:
+ CPU: multi_head_self_attention_cpu
+ CUDA: multi_head_self_attention_cuda
+
- func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
dispatch:
CPU: layer_norm_backward_cpu
CUDA: layer_norm_backward_cuda
- func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: nan_to_num
+ SparseCPU, SparseCUDA: nan_to_num_sparse
- func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
variants: function, method
dispatch:
CompositeExplicitAutograd: nan_to_num_
+ SparseCPU, SparseCUDA: nan_to_num_sparse_
- func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, CUDA: nan_to_num_out
+ SparseCPU, SparseCUDA: nan_to_num_sparse_out
- func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
python_module: nn
- func: linear.out(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!)
@@ -2469,15 +2614,15 @@
- func: ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!)
variants: function, method
- func: ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
-- func: linspace(Scalar start, Scalar end, int? steps=None, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+- func: linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
-- func: linspace.out(Scalar start, Scalar end, int? steps=None, *, Tensor(a!) out) -> Tensor(a!)
+- func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: linspace_cpu_out
+ CPU, Meta: linspace_out
CUDA: linspace_cuda_out
- func: log(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log.out
@@ -2497,10 +2642,12 @@
- func: log10(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log10.out
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: log10
- func: log10_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: log10.out
variants: function, method
@@ -2516,25 +2663,28 @@
device_check: NoCheck # TensorIterator
structured_delegate: log1p.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: log1p_sparse
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
- func: log1p_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: log1p.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: log1p_sparse_
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
- func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: log1p_out
- SparseCPU, SparseCUDA: log1p_out_sparse
+ SparseCPU, SparseCUDA: log1p_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
- func: log2(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log2.out
variants: function, method
@@ -2628,15 +2778,15 @@
- func: logdet(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: logdet
-- func: logspace(Scalar start, Scalar end, int? steps=None, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+- func: logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
-- func: logspace.out(Scalar start, Scalar end, int? steps=None, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
+- func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: logspace_cpu_out
+ CPU, Meta: logspace_out
CUDA: logspace_cuda_out
# log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
variants: function, method
@@ -2651,14 +2801,14 @@
structured: True
dispatch:
CPU: log_softmax_cpu_out
CUDA: log_softmax_cuda_out
-- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
+- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
structured_delegate: _log_softmax_backward_data.out
-- func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+- func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
CPU: log_softmax_backward_cpu_out
CUDA: log_softmax_backward_cuda_out
@@ -2720,15 +2870,15 @@
variants: function, method
# Alias to linalg.matrix_power
- func: matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)
+# Alias to linalg.matrix_exp
- func: matrix_exp(Tensor self) -> Tensor
variants: function, method
- dispatch:
- CPU, CUDA: matrix_exp
+# This function should be deprecated in favor of differential_analytic_matrix_function in FunctionsManual.cpp
- func: matrix_exp_backward(Tensor self, Tensor grad) -> Tensor
# DEPRECATED: Use torch.aminmax instead
- func: _aminmax(Tensor self) -> (Tensor, Tensor)
dispatch:
@@ -2758,16 +2908,20 @@
dispatch:
CPU, CUDA: _compute_linear_combination_out
- func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
device_check: NoCheck # TensorIterator
+ structured_delegate: max.dim_max
variants: function, method
dispatch:
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: max
+ QuantizedCPU, QuantizedCUDA: qmax
- func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
device_check: NoCheck # TensorIterator
+ structured: True
+ precomputed:
+ - dim -> int dim
dispatch:
CPU, CUDA: max_out
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
device_check: NoCheck # TensorIterator
@@ -2901,16 +3055,20 @@
- func: nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
- func: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
device_check: NoCheck # TensorIterator
+ structured_delegate: min.dim_min
variants: function, method
dispatch:
- CPU, CUDA, QuantizedCPU, QuantizedCUDA: min
+ QuantizedCPU, QuantizedCUDA: qmin
- func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
device_check: NoCheck # TensorIterator
+ structured: True
+ precomputed:
+ - dim -> int dim
dispatch:
CPU, CUDA: min_out
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
device_check: NoCheck # TensorIterator
@@ -2930,18 +3088,10 @@
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
dispatch:
CompositeExplicitAutograd: mkldnn_convolution
-- func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor
-
-- func: mkldnn_convolution_backward_weights(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> (Tensor, Tensor)
-
-- func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
- dispatch:
- CompositeExplicitAutograd: mkldnn_convolution_backward
-
- func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
dispatch:
CUDA: miopen_batch_norm
- func: miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)
@@ -2950,60 +3100,18 @@
- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
dispatch:
CUDA: miopen_convolution
-- func: miopen_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: miopen_convolution_backward_input
-
-- func: miopen_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
- dispatch:
- CUDA: miopen_convolution_backward
-
-- func: miopen_convolution_backward_bias(Tensor grad_output) -> Tensor
- dispatch:
- CUDA: miopen_convolution_backward_bias
-
-- func: miopen_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: miopen_convolution_backward_weight
-
- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
dispatch:
CUDA: miopen_convolution_transpose
-# NB: output_padding not strictly needed here, but it's helpful for the float
-# backwards
-- func: miopen_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
- dispatch:
- CUDA: miopen_convolution_transpose_backward
-
-- func: miopen_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: miopen_convolution_transpose_backward_input
-
-- func: miopen_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: miopen_convolution_transpose_backward_weight
-
- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
dispatch:
CUDA: miopen_depthwise_convolution
-- func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: miopen_depthwise_convolution_backward_input
-
-- func: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
- dispatch:
- CUDA: miopen_depthwise_convolution_backward
-
-- func: miopen_depthwise_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
- dispatch:
- CUDA: miopen_depthwise_convolution_backward_weight
-
- func: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
dispatch:
CUDA: miopen_rnn
- func: miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
@@ -3012,11 +3120,12 @@
- func: mm(Tensor self, Tensor mat2) -> Tensor
structured_delegate: mm.out
variants: function, method
dispatch:
- SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: _sparse_mm
+ SparseCPU, SparseCUDA: _sparse_mm
+ SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
- func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
CPU: mm_out_cpu
@@ -3055,10 +3164,11 @@
structured_delegate: mul.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: mul_sparse
MkldnnCPU: mkldnn_mul
+ ZeroTensor: mul_zerotensor
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: mul.out
variants: method
@@ -3105,12 +3215,12 @@
variants: method
- func: mv(Tensor self, Tensor vec) -> Tensor
variants: function, method
dispatch:
- CPU, CUDA: mv
- SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: mv_sparse
+ CompositeExplicitAutograd: mv
+ SparseCPU, SparseCUDA: mv_sparse
- func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CompositeExplicitAutograd: mv_out
@@ -3208,19 +3318,10 @@
- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor
variants: function
dispatch:
CompositeExplicitAutograd: _nnpack_spatial_convolution
-- func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
- variants: function
-
-- func: _nnpack_spatial_convolution_backward_input(Tensor input, Tensor grad_output, Tensor weight, int[2] padding) -> Tensor
- variants: function
-
-- func: _nnpack_spatial_convolution_backward_weight(Tensor input, int[] weightsize, Tensor grad_output, int[2] padding) -> Tensor
- variants: function
-
- func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_check: NoCheck
device_guard: False
- func: ones(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
@@ -3284,19 +3385,39 @@
# behavior on Windows, for reasons I don't understand
# (maybe related to capital letter collation somehow...)
- func: numpy_T(Tensor(a) self) -> Tensor(a)
variants: method
+# Exposed on Python as an attribute 'H'
+- func: matrix_H(Tensor(a) self) -> Tensor(a)
+ variants: method
+
+# Exposed on Python as an attribute 'mT'
+- func: mT(Tensor(a) self) -> Tensor(a)
+ variants: method
+
+# Exposed on Python as an attribute 'mH'
+- func: mH(Tensor(a) self) -> Tensor(a)
+ variants: method
+
+- func: adjoint(Tensor(a) self) -> Tensor(a)
+ variants: function, method
+
- func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
- func: channel_shuffle(Tensor self, int groups) -> Tensor
dispatch:
CPU: channel_shuffle
QuantizedCPU: channel_shuffle_quantized_cpu
+- func: native_channel_shuffle(Tensor self, int groups) -> Tensor
+ dispatch:
+ CPU: channel_shuffle_cpu
+ CompositeImplicitAutograd: math_channel_shuffle
+
- func: is_pinned(Tensor self, Device? device=None) -> bool
variants: method
dispatch:
CUDA: is_pinned_cuda
CompositeExplicitAutograd: is_pinned_default
@@ -3319,19 +3440,22 @@
- func: rad2deg(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: rad2deg
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
- func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
dispatch:
CompositeExplicitAutograd: rad2deg_
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
- func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CompositeExplicitAutograd: rad2deg_out
+ SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
- func: deg2rad(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: deg2rad
@@ -3418,11 +3542,11 @@
- func: range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: range_cpu_out
+ CPU, Meta: range_out
CUDA: range_cuda_out
- func: ravel(Tensor(a) self) -> Tensor(a)
variants: function, method
@@ -3447,25 +3571,28 @@
device_check: NoCheck # TensorIterator
structured_delegate: neg.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: neg_sparse
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
- func: neg_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: neg.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: neg_sparse_
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
- func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: neg_out
SparseCPU, SparseCUDA: neg_out_sparse
+ SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
# Alias for neg
- func: negative(Tensor self) -> Tensor
variants: function, method
@@ -3502,11 +3629,11 @@
- func: _reshape_alias(Tensor(a) self, int[] size, int[] stride) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: _reshape_alias
+ CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias
# We don't need to support mkldnn since this is handled explicitly by the reshape operator.
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
device_check: NoCheck
device_guard: False
@@ -3520,24 +3647,50 @@
- func: round(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: round.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: round_sparse
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
- func: round_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: round.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: round_sparse_
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
- func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU: round_out
CUDA: round_out
+ SparseCPU, SparseCUDA: round_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
+- func: round.decimals(Tensor self, *, int decimals) -> Tensor
+ device_check: NoCheck # TensorIterator
+ structured_delegate: round.decimals_out
+ variants: function, method
+
+- func: round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+ structured_delegate: round.decimals_out
+ variants: function, method
+
+- func: round.decimals_out(Tensor self, *, int decimals, Tensor(a!) out) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+ structured: True
+ structured_inherits: TensorIteratorBase
+ dispatch:
+ CPU: round_decimals_out
+ CUDA: round_decimals_out
+
- func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
device_check: NoCheck # TensorIterator
- func: rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
device_check: NoCheck # TensorIterator
@@ -3589,10 +3742,11 @@
structured_delegate: gelu.out
device_check: NoCheck # TensorIterator
python_module: nn
dispatch:
MkldnnCPU: mkldnn_gelu
+ QuantizedCPU: gelu_quantized_cpu
- func: gelu_backward.grad_input(Tensor grad, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
python_module: nn
@@ -3781,22 +3935,30 @@
- func: sin(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: sin.out
variants: function, method
+ dispatch:
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
+ SparseCPU, SparseCUDA: sin_sparse
- func: sin_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sin.out
variants: function, method
+ dispatch:
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
+ SparseCPU, SparseCUDA: sin_sparse_
- func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sin_out
+ SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
+ SparseCPU, SparseCUDA: sin_sparse_out
- func: sinc(Tensor self) -> Tensor
structured_delegate: sinc.out
variants: function, method
@@ -3812,22 +3974,30 @@
- func: sinh(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: sinh.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: sinh_sparse
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
- func: sinh_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sinh.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: sinh_sparse_
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
- func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sinh_out
+ SparseCPU, SparseCUDA: sinh_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
# Returns a copy of this `Variable` that is detached from its autograd graph.
# This method is OK to call if the `Variable` is a view.
#
# NOTE: Previously, if we change the tensor metadata (e.g. sizes / strides /
@@ -3846,10 +4016,11 @@
# Like `detach()`, but modifies this `Variable` in-place. This method may
# only be called on non-view `Variable`s. You can use `is_view()` to check
# this. If this `Variable` is a view, throws an `std::runtime_error()`.
- func: detach_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: detach_
- func: size.int(Tensor self, int dim) -> int
variants: function
@@ -3874,10 +4045,31 @@
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: slice_backward
+- func: slice_scatter(Tensor self, Tensor src, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor
+ variants: function, method
+ device_check: NoCheck
+ device_guard: False
+ dispatch:
+ CompositeExplicitAutograd: slice_scatter
+
+- func: select_scatter(Tensor self, Tensor src, int dim, int index) -> Tensor
+ variants: function, method
+ device_check: NoCheck
+ device_guard: False
+ dispatch:
+ CompositeExplicitAutograd: select_scatter
+
+- func: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor
+ variants: function, method
+ device_check: NoCheck
+ device_guard: False
+ dispatch:
+ CompositeExplicitAutograd: diagonal_scatter
+
- func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
variants: function, method
dispatch:
CompositeExplicitAutograd: slogdet
@@ -3900,14 +4092,14 @@
structured: True
dispatch:
CPU: softmax_cpu_out
CUDA: softmax_cuda_out
-- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
+- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
structured_delegate: _softmax_backward_data.out
-- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
+- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!)
structured: True
dispatch:
CPU: softmax_backward_cpu_out
CUDA: softmax_backward_cuda_out
@@ -3916,11 +4108,11 @@
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: unsafe_split
-- func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[]
+- func: split.Tensor(Tensor(a -> *) self, int split_size, int dim=0) -> Tensor(a)[]
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: split
@@ -3930,72 +4122,77 @@
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: unsafe_split_with_sizes
-- func: split_with_sizes(Tensor(a) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
+- func: split_with_sizes(Tensor(a -> *) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: split_with_sizes
-- func: hsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
+- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
variants: function, method
-- func: hsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
+- func: hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
variants: function, method
-- func: vsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
+- func: vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
variants: function, method
-- func: vsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
+- func: vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
variants: function, method
-- func: dsplit.int(Tensor(a) self, int sections) -> Tensor(a)[]
+- func: dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[]
variants: function, method
-- func: dsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[]
+- func: dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[]
variants: function, method
- func: squeeze(Tensor(a) self) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
- CompositeExplicitAutograd: squeeze
+ CPU, CUDA: squeeze
+ QuantizedCPU, QuantizedCUDA: squeeze_quantized
- func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
- CompositeExplicitAutograd: squeeze
+ CPU, CUDA: squeeze
+ QuantizedCPU, QuantizedCUDA: squeeze_quantized
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
- func: squeeze_(Tensor(a!) self) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: squeeze_
- func: squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: squeeze_
- func: squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
- func: sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
variants: function, method
- func: sspaddmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
@@ -4103,23 +4300,28 @@
device_check: NoCheck # TensorIterator
structured_delegate: sqrt.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: sqrt_sparse
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
- func: sqrt_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sqrt.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: sqrt_sparse_
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
- func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sqrt_out
- SparseCPU, SparseCUDA: sqrt_out_sparse
+ SparseCPU, SparseCUDA: sqrt_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
- func: square(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
@@ -4223,50 +4425,66 @@
- func: t_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck
device_guard: False
variants: method
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: t_
- func: tan(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: tan.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: tan_sparse
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
- func: tan_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: tan.out
variants: function, method
+ dispatch:
+ SparseCPU, SparseCUDA: tan_sparse_
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
- func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: tan_out
+ SparseCPU, SparseCUDA: tan_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
- func: tanh(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: tanh.out
variants: function, method
dispatch:
QuantizedCPU: tanh_quantized_cpu
MkldnnCPU: mkldnn_tanh
+ SparseCPU, SparseCUDA: tanh_sparse
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
- func: tanh_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: tanh.out
variants: function, method
dispatch:
MkldnnCPU: mkldnn_tanh_
+ SparseCPU, SparseCUDA: tanh_sparse_
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
+
- func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: tanh_out
+ SparseCPU, SparseCUDA: tanh_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
- func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
variants: function
- func: tensordot.out(Tensor self, Tensor other, int[] dims_self, int[] dims_other, *, Tensor(a!) out) -> Tensor(a!)
@@ -4329,10 +4547,11 @@
- func: transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: transpose_
- func: _mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
device_check: NoCheck
@@ -4386,24 +4605,30 @@
structured_delegate: trunc.out
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: trunc
+ SparseCPU, SparseCUDA: trunc_sparse
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
- func: trunc_(Tensor(a!) self) -> Tensor(a!)
structured_delegate: trunc.out
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: trunc_
+ SparseCPU, SparseCUDA: trunc_sparse_
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
- func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: trunc_out
+ SparseCPU, SparseCUDA: trunc_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
# Alias for trunc
- func: fix(Tensor self) -> Tensor
variants: function, method
@@ -4459,16 +4684,19 @@
- func: unsqueeze(Tensor(a) self, int dim) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
- CompositeExplicitAutograd: unsqueeze
+ CPU, CUDA: unsqueeze
+ SparseCPU, SparseCUDA: unsqueeze_sparse
+ QuantizedCPU, QuantizedCUDA: unsqueeze_quantized
- func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
dispatch:
CompositeExplicitAutograd: unsqueeze_
- func: vander(Tensor x, int? N=None, bool increasing=False) -> Tensor
@@ -4584,10 +4812,15 @@
- func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_check: NoCheck
device_guard: False
+- func: _efficientzerotensor(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ dispatch:
+ CPU: _efficientzerotensor
+ CUDA: _efficientzerotensor_cuda
+
- func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
- func: zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
@@ -4653,32 +4886,38 @@
dispatch:
SparseCPU: _sparse_sum_backward_cpu
SparseCUDA: _sparse_sum_backward_cuda
- func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ python_module: sparse
variants: function
- func: _sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
+ python_module: sparse
variants: function
- func: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+ python_module: sparse
dispatch:
SparseCPU: softmax_sparse_cpu
SparseCUDA: softmax_sparse_cuda
- func: _sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
dispatch:
SparseCPU: softmax_backward_sparse_cpu
SparseCUDA: softmax_backward_sparse_cuda
- func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ python_module: sparse
variants: function
- func: _sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
+ python_module: sparse
variants: function
- func: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+ python_module: sparse
dispatch:
SparseCPU: log_softmax_sparse_cpu
SparseCUDA: log_softmax_sparse_cuda
- func: _sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
@@ -4772,10 +5011,11 @@
- func: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: clone
SparseCPU, SparseCUDA: clone_sparse
+ SparseCsrCPU, SparseCsrCUDA: clone_sparse_csr
MkldnnCPU: mkldnn_clone
QuantizedCPU, QuantizedCUDA: quantized_clone
- func: positive(Tensor(a) self) -> Tensor(a)
variants: function, method
@@ -4884,22 +5124,33 @@
CompositeExplicitAutograd: rsub
# Functionally the same as addmm, but we give it a different derivative formula
# that doesn't propagate gradients to non-present entries on sparse.
- func: _sparse_addmm(Tensor self, Tensor sparse, Tensor dense, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+ python_module: sparse
dispatch:
CompositeExplicitAutograd: _sparse_addmm
+- func: sparse_sampled_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+ python_module: sparse
+ dispatch:
+ SparseCsrCUDA: sparse_sampled_addmm_out_sparse_csr_cuda
+
+- func: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+ python_module: sparse
+ dispatch:
+ SparseCsrCUDA: sparse_sampled_addmm_sparse_csr_cuda
+
- func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
CPU: addmm_out_cpu
CUDA: addmm_out_cuda
SparseCPU: addmm_out_sparse_dense_cpu
SparseCUDA: addmm_out_sparse_dense_cuda
- SparseCsrCPU: addmm_out_sparse_csr_dense_cpu
- SparseCsrCUDA: addmm_out_sparse_csr_dense_cuda
+ SparseCsrCPU: addmm_out_sparse_csr_cpu
+ SparseCsrCUDA: addmm_out_sparse_csr_cuda
- func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
structured_delegate: addmm.out
variants: function, method
dispatch:
@@ -5207,16 +5458,16 @@
device_check: NoCheck # Allows copy into different device
variants: function
dispatch:
SparseCPU, SparseCUDA: copy_sparse_
-- func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]
+- func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
variants: function, method
dispatch:
CompositeExplicitAutograd: unbind
-- func: unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[]
+- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
variants: function, method
- func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
variants: method
dispatch:
@@ -5244,10 +5495,15 @@
dispatch:
MkldnnCPU: mkldnn_reorder_conv3d_weight
- func: to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor
+- func: quantize_per_tensor_dynamic(Tensor self, ScalarType dtype, bool reduce_range) -> Tensor
+ variants: function
+ dispatch:
+ CPU, CUDA: quantize_per_tensor_dynamic
+
- func: quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor
variants: function
dispatch:
CPU, CUDA: quantize_per_tensor
@@ -5267,11 +5523,11 @@
CPU, CUDA: quantize_per_channel
- func: dequantize.self(Tensor self) -> Tensor
variants: function, method
dispatch:
- CPU: dequantize_cpu
+ CPU, CUDA: dequantize_cpu_or_cuda
QuantizedCPU, QuantizedCUDA: dequantize_quantized
- func: dequantize.tensors(Tensor[] tensors) -> Tensor[]
variants: function
dispatch:
@@ -5389,10 +5645,18 @@
variants: function
- func: choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor)
variants: function
+- func: _autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a)
+ variants: method
+ device_guard: False
+
+- func: _autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a)
+ variants: method
+ device_guard: False
+
- func: _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: _to_copy
@@ -5587,10 +5851,12 @@
CUDA: masked_fill__cuda
- func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: masked_fill
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
@@ -5598,26 +5864,35 @@
CUDA: masked_fill__cuda
- func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: masked_fill
- func: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)
variants: method
dispatch:
CPU: masked_scatter__cpu
CUDA: masked_scatter__cuda
- func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: masked_scatter
+- func: _masked_softmax(Tensor self, Tensor mask) -> Tensor
+ dispatch:
+ CUDA: masked_softmax_cuda
+ CPU: masked_softmax_cpu
+
- func: view(Tensor(a) self, int[] size) -> Tensor(a)
variants: method
device_check: NoCheck
device_guard: False
dispatch:
- CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
+ ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view
MkldnnCPU: mkldnn_view
# Warning: If you want to change the name or overload name of this
# operator, you might also want to change the `isBlockListedSchema`
# function in `torch/csrc/jit/frontend/schema_catching.cpp`.
@@ -5637,25 +5912,27 @@
CPU, CUDA: put_
- func: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor
variants: function, method
-- func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
- variants: method
+- func: index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+ structured: True
+ variants: function
+ precomputed:
+ - dim -> int dim
+ dispatch:
+ CPU: index_add_cpu_out
+ CUDA: index_add_cuda_out
-- func: index_add_.alpha(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor(a!)
+- func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!)
+ structured_delegate: index_add.out
variants: method
- dispatch:
- CPU: index_add_cpu_
- CUDA: index_add_cuda_
-- func: index_add(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
+- func: index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
+ structured_delegate: index_add.out
variants: function, method
-- func: index_add.alpha(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor
- variants: function, method
-
- func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
variants: function, method
- func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
@@ -5665,20 +5942,24 @@
CUDA: index_fill_
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: index_fill
- func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
CPU, CUDA: index_fill_
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: index_fill
- func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
@@ -5771,10 +6052,15 @@
CPU, CUDA: scatter_add
- func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
variants: function, method
+- func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor
+ variants: function, method
+ dispatch:
+ CPU: scatter_reduce_two_cpu
+
- func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
structured_delegate: eq.Scalar_out
device_check: NoCheck # TensorIterator
variants: method
dispatch:
@@ -6062,39 +6348,31 @@
variants: function
dispatch:
CPU, CUDA: bitwise_right_shift
- func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
+ structured_delegate: tril.out
variants: method
- dispatch:
- CPU: tril_cpu_
- CUDA: tril_cuda_
- func: triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
+ structured_delegate: triu.out
variants: method
- dispatch:
- CPU: triu_cpu_
- CUDA: triu_cuda_
- func: digamma_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: digamma.out
variants: method
- func: lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
- dispatch:
- CPU: lerp_cpu_scalar_
- CUDA: lerp_cuda_scalar_
+ structured_delegate: lerp.Scalar_out
- func: lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
- dispatch:
- CPU: lerp_cpu_tensor_
- CUDA: lerp_cuda_tensor_
+ structured_delegate: lerp.Tensor_out
- func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
variants: method
dispatch:
CPU, CUDA: addbmm_
@@ -6176,37 +6454,33 @@
variants: function
device_check: NoCheck
device_guard: False
- func: cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)
- dispatch:
- CPU, CUDA: cross_out
- func: cross(Tensor self, Tensor other, int? dim=None) -> Tensor
variants: method, function
- dispatch:
- CPU, CUDA: cross
- func: triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
+ structured: True
dispatch:
- CPU: triu_cpu_out
- CUDA: triu_cuda_out
+ CPU: triu_cpu
+ CUDA: triu_cuda
- func: triu(Tensor self, int diagonal=0) -> Tensor
+ structured_delegate: triu.out
variants: method, function
- dispatch:
- CompositeExplicitAutograd: triu
- func: tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
+ structured: True
dispatch:
- CPU: tril_cpu_out
- CUDA: tril_cuda_out
+ CPU: tril_cpu
+ CUDA: tril_cuda
- func: tril(Tensor self, int diagonal=0) -> Tensor
+ structured_delegate: tril.out
variants: method, function
- dispatch:
- CompositeExplicitAutograd: tril
- func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
CPU: tril_indices_cpu
CUDA: tril_indices_cuda
@@ -6582,11 +6856,12 @@
CUDA, QuantizedCUDA: index_select_out_cuda
- func: index_select(Tensor self, int dim, Tensor index) -> Tensor
variants: method, function
dispatch:
- CPU, QuantizedCPU: index_select_cpu_
+ CPU: index_select_cpu_
+ QuantizedCPU: index_select_quantized_cpu_
CUDA, QuantizedCUDA: index_select_cuda
SparseCPU: index_select_sparse
SparseCUDA: index_select_sparse
- func: index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
@@ -6627,10 +6902,13 @@
CUDA: nonzero_cuda
- func: nonzero_numpy(Tensor self) -> Tensor[]
variants: method, function
+- func: argwhere(Tensor self) -> Tensor
+ variants: method, function
+
- func: gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
CPU, CUDA: gather_out
@@ -6697,18 +6975,35 @@
dispatch:
CPU: legacy_lstsq
CUDA: legacy_lstsq_cuda
- func: triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient)
+ structured: True
dispatch:
CPU, CUDA: triangular_solve_out
+ SparseCsrCPU: triangular_solve_out_sparse_csr_cpu
+ SparseCsrCUDA: triangular_solve_out_sparse_csr_cuda
- func: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)
+ structured_delegate: triangular_solve.X
variants: method, function
+
+- func: _linalg_check_errors(Tensor info, str api_name, *, bool is_matrix) -> ()
dispatch:
- CPU, CUDA: triangular_solve
+ CompositeExplicitAutograd: _linalg_check_errors
+- func: linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!)
+ python_module: linalg
+ dispatch:
+ CPU, CUDA: linalg_solve_triangular_out
+
+- func: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor
+ python_module: linalg
+ variants: method, function
+ dispatch:
+ CPU, CUDA: linalg_solve_triangular
+
- func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
dispatch:
CompositeExplicitAutograd: symeig_out
- func: symeig(Tensor self, bool eigenvectors=False, bool upper=True) -> (Tensor eigenvalues, Tensor eigenvectors)
@@ -6734,26 +7029,21 @@
- func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)
- func: svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V)
variants: method, function
-- func: _svd_helper(Tensor self, bool some, bool compute_uv) -> (Tensor U, Tensor S, Tensor V)
- variants: function
- dispatch:
- CPU: _svd_helper_cpu
- CUDA: _svd_helper_cuda
-
# swapaxes, alias for transpose
- func: swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
- func: swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
# swapdims, alias for transpose
- func: swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
variants: function, method
device_check: NoCheck
@@ -6761,10 +7051,11 @@
- func: swapdims_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
+ tags: inplace_view
- func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, CUDA: cholesky_out
@@ -6841,12 +7132,10 @@
dispatch:
CPU, CUDA: ormqr
- func: _lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor LU, Tensor pivots, Tensor info)
variants: function
- dispatch:
- CPU, CUDA: _lu_with_info
- func: lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, CUDA: lu_solve_out
@@ -6924,22 +7213,30 @@
- func: erfinv(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: erfinv.out
variants: method, function
+ dispatch:
+ SparseCPU, SparseCUDA: erfinv_sparse
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
- func: erfinv_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: erfinv.out
variants: method
+ dispatch:
+ SparseCPU, SparseCUDA: erfinv_sparse_
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
- func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: erfinv_out
+ SparseCPU, SparseCUDA: erfinv_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
- func: i0(Tensor self) -> Tensor
structured_delegate: i0.out
variants: function, method
@@ -6957,35 +7254,46 @@
device_check: NoCheck # TensorIterator
structured_delegate: sign.out
variants: function, method
dispatch:
CompositeExplicitAutograd: sign
+ SparseCPU, SparseCUDA: sign_sparse
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
- func: sign_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sign.out
variants: method
dispatch:
CompositeExplicitAutograd: sign_
+ SparseCPU, SparseCUDA: sign_sparse_
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
- func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sign_out
+ SparseCPU, SparseCUDA: sign_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
- func: signbit(Tensor self) -> Tensor
variants: function, method
structured_delegate: signbit.out
+ dispatch:
+ SparseCPU, SparseCUDA: signbit_sparse
+ SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
- func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU: signbit_out
CUDA: signbit_out
+ SparseCPU, SparseCUDA: signbit_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
- func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
@@ -7006,35 +7314,43 @@
- func: atan2(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: atan2.out
variants: method, function
+# arctan2, alias of atan2
+- func: arctan2(Tensor self, Tensor other) -> Tensor
+ variants: method, function
+
+- func: arctan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+
+- func: arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ variants: method
+
- func: lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
+ structured: True
+ structured_inherits: TensorIteratorBase
dispatch:
- CPU: lerp_cpu_scalar_out
- CUDA: lerp_cuda_scalar_out
+ CPU, CUDA: lerp_Scalar
- func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
+ structured: True
+ structured_inherits: TensorIteratorBase
dispatch:
- CPU: lerp_cpu_tensor_out
- CUDA: lerp_cuda_tensor_out
+ CPU, CUDA: lerp_Tensor
- func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
- dispatch:
- CPU: lerp_cpu_scalar
- CUDA: lerp_cuda_scalar
+ structured_delegate: lerp.Scalar_out
- func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
- dispatch:
- CPU: lerp_cpu_tensor
- CUDA: lerp_cuda_tensor
+ structured_delegate: lerp.Tensor_out
- func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: histogram_histc_cpu_out
CUDA: _histc_out_cuda
@@ -7061,10 +7377,22 @@
- func: histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges)
variants: method, function
dispatch:
CPU: histogram_cpu
+- func: _histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[]
+ dispatch:
+ CPU: histogramdd_bin_edges_cpu
+
+- func: _histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor
+ dispatch:
+ CPU: histogramdd_cpu
+
+- func: _histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor
+ dispatch:
+ CPU: histogramdd_cpu
+
- func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CompositeExplicitAutograd: fmod_out
@@ -7273,53 +7601,30 @@
- func: min.other(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
-# The following quantile signatures are DEPRECATED in favor of the new ones with the interpolation kwarg.
-- func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
-
-- func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
+- func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
variants: method, function
-- func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
-- func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
+- func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
variants: method, function
-- func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
-- func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
+- func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
variants: method, function
-- func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
-- func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
+- func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor
variants: method, function
-# To keep backward and forward compatibility, and to avoid ambiguity with the original signatures, dim, keepdim and interpolation
-# parameters are required for now. Once the deprecated signatures are removed they will be made optional.
-- func: quantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
+- func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!)
-- func: quantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor
- variants: method, function
-
-- func: quantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
-
-- func: quantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor
- variants: method, function
-
-- func: nanquantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
-
-- func: nanquantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor
- variants: method, function
-
-- func: nanquantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!)
-
-- func: nanquantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor
- variants: method, function
-
- func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
device_check: NoCheck # TensorIterator
dispatch:
CPU: sort_out_cpu
CUDA: sort_out_cuda
@@ -7509,10 +7814,11 @@
device_check: NoCheck # TensorIterator
variants: method
dispatch:
CPU, CUDA: normal_
Meta: normal_meta_
+ SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
- func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, CUDA: normal_out
@@ -8207,10 +8513,17 @@
variants: function
dispatch:
CPU: foreach_tensor_minimum_slow
CUDA: foreach_tensor_minimum_cuda
+- func: _foreach_norm.Scalar(Tensor[] tensors, Scalar ord=2) -> Tensor[]
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_norm_slow
+ CUDA: foreach_tensor_norm_cuda
+
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
dispatch:
CPU: bucketize_cpu
CUDA: bucketize_cuda
@@ -8222,21 +8535,31 @@
- func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
dispatch:
CPU: bucketize_cpu
CUDA: bucketize_cuda
-- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False) -> Tensor
+- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
dispatch:
CPU: searchsorted_cpu
CUDA: searchsorted_cuda
-- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
+# [Note about _torch_cuda_cu_linker_symbol_op and torch_cuda_cu]
+# This is a DUMMY function to force the linking against torch_cuda_cu on Windows.
+# Otherwise, the Windows linker will optimize and not include torch_cuda_cu even when we
+# want it to be included. This is similar to what we do with warp_size for torch_cuda_cpp,
+# described as the solution to this issue: https://github.com/pytorch/pytorch/issues/31611
+# This op should NOT be used or exposed or edited or else Windows builds (with BUILD_SPLIT_CUDA) will break.
+- func: _torch_cuda_cu_linker_symbol_op(Tensor self) -> Tensor
dispatch:
+ CUDA: _torch_cuda_cu_linker_symbol_op_cuda
+
+- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
CPU: searchsorted_out_cpu
CUDA: searchsorted_out_cuda
-- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False) -> Tensor
+- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
dispatch:
CPU: searchsorted_cpu
CUDA: searchsorted_cuda
- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
@@ -8246,10 +8569,19 @@
structured: True
dispatch:
CPU: _convert_indices_from_coo_to_csr_structured_cpu
CUDA: _convert_indices_from_coo_to_csr_structured_cuda
+- func: _convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor
+ structured_delegate: _convert_indices_from_csr_to_coo.out
+
+- func: _convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!)
+ structured: True
+ dispatch:
+ CPU: _convert_indices_from_csr_to_coo_structured_cpu
+ CUDA: _convert_indices_from_csr_to_coo_structured_cuda
+
## NN wrappers
- func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
python_module: nn
@@ -8407,20 +8739,20 @@
CPU: nll_loss2d_backward_cpu
CUDA: nll_loss2d_backward_cuda
- func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
+ structured: True
+ structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
- CPU: smooth_l1_loss_out
- CUDA: smooth_l1_loss_out
+ CPU, CUDA: smooth_l1_loss_out
- func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
device_check: NoCheck # TensorIterator
+ structured_delegate: smooth_l1_loss.out
python_module: nn
- dispatch:
- CPU, CUDA: smooth_l1_loss
- func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: smooth_l1_loss_backward_out
@@ -8531,10 +8863,11 @@
structured_inherits: TensorIteratorBase
device_check: NoCheck # TensorIterator
python_module: nn
dispatch:
CPU, CUDA: hardsigmoid_out
+ QuantizedCPU: hardsigmoid_out_quantized_cpu
- func: hardsigmoid(Tensor self) -> Tensor
structured_delegate: hardsigmoid.out
device_check: NoCheck # TensorIterator
python_module: nn
@@ -8713,18 +9046,18 @@
- func: softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor
structured_delegate: softplus.out
device_check: NoCheck # TensorIterator
python_module: nn
-- func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
+- func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: softplus_backward_out
-- func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output) -> Tensor
+- func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor
structured_delegate: softplus_backward.grad_input
python_module: nn
- func: softshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!)
structured: True
@@ -8931,33 +9264,34 @@
python_module: nn
structured_delegate: fractional_max_pool2d.output
- func: fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
+ structured: True
dispatch:
- CPU: fractional_max_pool2d_backward_out_cpu
- CUDA: fractional_max_pool2d_backward_out_cuda
+ CPU: fractional_max_pool2d_backward_cpu
+ CUDA: fractional_max_pool2d_backward_cuda
- func: fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
python_module: nn
- dispatch:
- CPU: fractional_max_pool2d_backward_cpu
- CUDA: fractional_max_pool2d_backward_cuda
+ structured_delegate: fractional_max_pool2d_backward.grad_input
# Return: (Tensor output, Tensor indices)
- func: fractional_max_pool3d.output(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))
python_module: nn
+ structured: True
+ precomputed:
+ - kernel_size -> int poolSizeT, int poolSizeH, int poolSizeW
+ - output_size -> int outputT, int outputH, int outputW
dispatch:
CPU: fractional_max_pool3d_out_cpu
CUDA: fractional_max_pool3d_out_cuda
# Return: (Tensor output, Tensor indices)
- func: fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)
python_module: nn
- dispatch:
- CPU: fractional_max_pool3d_cpu
- CUDA: fractional_max_pool3d_cuda
+ structured_delegate: fractional_max_pool3d.output
- func: fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: fractional_max_pool3d_backward_out_cpu
@@ -9223,10 +9557,20 @@
- func: upsample_bilinear2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_bilinear2d_backward
+- func: _upsample_bilinear2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_bilinear2d_aa
+
+- func: _upsample_bilinear2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_bilinear2d_aa_backward
+
- func: upsample_trilinear3d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_trilinear3d
@@ -9243,43 +9587,86 @@
- func: upsample_bicubic2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_bicubic2d_backward
+- func: _upsample_bicubic2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_bicubic2d_aa
+
+- func: _upsample_bicubic2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_bicubic2d_aa_backward
+
- func: upsample_nearest1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_nearest1d
+- func: _upsample_nearest_exact1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_nearest_exact1d
+
- func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_nearest1d_backward
+- func: _upsample_nearest_exact1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_nearest_exact1d_backward
+
- func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_nearest2d
+- func: _upsample_nearest_exact2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_nearest_exact2d
+
- func: upsample_nearest2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CompositeExplicitAutograd: upsample_nearest2d_backward
+- func: _upsample_nearest_exact2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _upsample_nearest_exact2d_backward
+
- func: upsample_nearest3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CPU: upsample_nearest3d_cpu
CUDA: upsample_nearest3d_cuda
QuantizedCPU: upsample_nearest3d_quantized_cpu
+- func: _upsample_nearest_exact3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CPU: _upsample_nearest_exact3d_cpu
+ CUDA: _upsample_nearest_exact3d_cuda
+ QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu
+
- func: upsample_nearest3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
python_module: nn
dispatch:
CPU: upsample_nearest3d_backward_cpu
CUDA: upsample_nearest3d_backward_cuda
+- func: _upsample_nearest_exact3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
+ python_module: nn
+ dispatch:
+ CPU: _upsample_nearest_exact3d_backward_cpu
+ CUDA: _upsample_nearest_exact3d_backward_cuda
+
# NOTE: all of the non-"vec" upsample overloads are only kept for backward compatibility.
- func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
@@ -9323,10 +9710,32 @@
- func: upsample_bilinear2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
python_module: nn
structured_delegate: upsample_bilinear2d_backward.grad_input
+- func: _upsample_bilinear2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_bilinear2d_aa_out_cpu
+ CUDA: _upsample_bilinear2d_aa_out_cuda
+
+- func: _upsample_bilinear2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_bilinear2d_aa.out
+
+- func: _upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_bilinear2d_aa_backward_out_cpu
+ CUDA: _upsample_bilinear2d_aa_backward_out_cuda
+
+- func: _upsample_bilinear2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_bilinear2d_aa_backward.grad_input
+
- func: upsample_bicubic2d.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_bicubic2d_out_cpu
@@ -9345,10 +9754,32 @@
- func: upsample_bicubic2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
python_module: nn
structured_delegate: upsample_bicubic2d_backward.grad_input
+- func: _upsample_bicubic2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_bicubic2d_aa_out_cpu
+ CUDA: _upsample_bicubic2d_aa_out_cuda
+
+- func: _upsample_bicubic2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_bicubic2d_aa.out
+
+- func: _upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_bicubic2d_aa_backward_out_cpu
+ CUDA: _upsample_bicubic2d_aa_backward_out_cuda
+
+- func: _upsample_bicubic2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_bicubic2d_aa_backward.grad_input
+
- func: upsample_trilinear3d.out(Tensor self, int[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_trilinear3d_out_cpu
@@ -9374,73 +9805,143 @@
structured: True
dispatch:
CPU: upsample_nearest1d_out_cpu
CUDA: upsample_nearest1d_out_cuda
+- func: _upsample_nearest_exact1d.out(Tensor self, int[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_nearest_exact1d_out_cpu
+ CUDA: _upsample_nearest_exact1d_out_cuda
+
- func: upsample_nearest1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
python_module: nn
structured_delegate: upsample_nearest1d.out
+- func: _upsample_nearest_exact1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_nearest_exact1d.out
+
- func: upsample_nearest1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest1d_backward_out_cpu
CUDA: upsample_nearest1d_backward_out_cuda
+- func: _upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_nearest_exact1d_backward_out_cpu
+ CUDA: _upsample_nearest_exact1d_backward_out_cuda
+
- func: upsample_nearest1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
python_module: nn
structured_delegate: upsample_nearest1d_backward.grad_input
+- func: _upsample_nearest_exact1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_nearest_exact1d_backward.grad_input
+
- func: upsample_nearest2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest2d_out_cpu
CUDA: upsample_nearest2d_out_cuda
+- func: _upsample_nearest_exact2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_nearest_exact2d_out_cpu
+ CUDA: _upsample_nearest_exact2d_out_cuda
+
- func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
python_module: nn
structured_delegate: upsample_nearest2d.out
dispatch:
QuantizedCPU: upsample_nearest2d_quantized_cpu
+- func: _upsample_nearest_exact2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_nearest_exact2d.out
+ dispatch:
+ QuantizedCPU: _upsample_nearest_exact2d_quantized_cpu
+
- func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest2d_backward_out_cpu
CUDA: upsample_nearest2d_backward_out_cuda
+- func: _upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_nearest_exact2d_backward_out_cpu
+ CUDA: _upsample_nearest_exact2d_backward_out_cuda
+
- func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
python_module: nn
structured_delegate: upsample_nearest2d_backward.grad_input
+- func: _upsample_nearest_exact2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_nearest_exact2d_backward.grad_input
+
- func: upsample_nearest3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest3d_out_cpu
CUDA: upsample_nearest3d_out_cuda
+- func: _upsample_nearest_exact3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_nearest_exact3d_out_cpu
+ CUDA: _upsample_nearest_exact3d_out_cuda
+
- func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
python_module: nn
structured_delegate: upsample_nearest3d.out
dispatch:
QuantizedCPU: upsample_nearest3d_quantized_cpu
+- func: _upsample_nearest_exact3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_nearest_exact3d.out
+ dispatch:
+ QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu
+
- func: upsample_nearest3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: upsample_nearest3d_backward_out_cpu
CUDA: upsample_nearest3d_backward_out_cuda
+- func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+ python_module: nn
+ structured: True
+ dispatch:
+ CPU: _upsample_nearest_exact3d_backward_out_cpu
+ CUDA: _upsample_nearest_exact3d_backward_out_cuda
+
- func: upsample_nearest3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
python_module: nn
structured_delegate: upsample_nearest3d_backward.grad_input
+- func: _upsample_nearest_exact3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+ python_module: nn
+ structured_delegate: _upsample_nearest_exact3d_backward.grad_input
+
- func: sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
structured: True
structured_inherits: TensorIteratorBase
dispatch:
@@ -9499,22 +10000,10 @@
- func: slow_conv_transpose2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] output_padding=0, int[2] dilation=1) -> Tensor
python_module: nn
structured_delegate: slow_conv_transpose2d.out
-- func: slow_conv_transpose2d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
- python_module: nn
- dispatch:
- CPU: slow_conv_transpose2d_backward_out_cpu
- CUDA: slow_conv_transpose2d_backward_out_cuda
-
-- func: slow_conv_transpose2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
- python_module: nn
- dispatch:
- CPU: slow_conv_transpose2d_backward_cpu
- CUDA: slow_conv_transpose2d_backward_cuda
-
- func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: slow_conv_transpose3d_out_cpu
CUDA: slow_conv_transpose3d_out_cuda
@@ -9523,47 +10012,35 @@
python_module: nn
dispatch:
CPU: slow_conv_transpose3d_cpu
CUDA: slow_conv_transpose3d_cuda
-- func: slow_conv_transpose3d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
- python_module: nn
- dispatch:
- CPU: slow_conv_transpose3d_backward_out_cpu
- CUDA: slow_conv_transpose3d_backward_out_cuda
-
-- func: slow_conv_transpose3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
- python_module: nn
- dispatch:
- CPU: slow_conv_transpose3d_backward_cpu
- CUDA: slow_conv_transpose3d_backward_cuda
-
- func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
python_module: nn
-- func: thnn_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
+- func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!)
python_module: nn
dispatch:
CPU: slow_conv2d_forward_out_cpu
CUDA: slow_conv2d_forward_out_cuda
-- func: thnn_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
+- func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv2d_forward_cpu
CUDA: slow_conv2d_forward_cuda
-- func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
+- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
python_module: nn
dispatch:
CPU: slow_conv2d_backward_out_cpu
CUDA: slow_conv2d_backward_out_cuda
-- func: thnn_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
python_module: nn
dispatch:
CPU: slow_conv2d_backward_cpu
CUDA: slow_conv2d_backward_cuda
@@ -9576,85 +10053,43 @@
- func: _conv_depthwise2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, int[2] dilation) -> Tensor
python_module: nn
dispatch:
CUDA: conv_depthwise2d_cuda
-- func: _conv_depthwise2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight) -> (Tensor(a!), Tensor(b!))
- python_module: nn
- dispatch:
- CUDA: conv_depthwise2d_backward_cuda_out
-
-- func: _conv_depthwise2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[2] output_mask) -> (Tensor grad_input, Tensor grad_weight)
- python_module: nn
- dispatch:
- CUDA: conv_depthwise2d_backward_cuda
-
- func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, int[3] dilation) -> Tensor
python_module: nn
dispatch:
CUDA: conv_depthwise3d_cuda
-- func: conv_depthwise3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
- python_module: nn
- dispatch:
- CUDA: conv_depthwise3d_backward_cuda_out
-
-- func: conv_depthwise3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
- python_module: nn
- dispatch:
- CUDA: conv_depthwise3d_backward_cuda
-
- func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0) -> Tensor
python_module: nn
-- func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
+- func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output) -> Tensor(a!)
python_module: nn
dispatch:
CPU: slow_conv3d_forward_out_cpu
-- func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
+- func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv3d_forward_cpu
-- func: slow_conv3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
- python_module: nn
- dispatch:
- CPU: slow_conv3d_backward_out_cpu
-
-- func: slow_conv3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
- python_module: nn
- dispatch:
- CPU: slow_conv3d_backward_cpu
-
- func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv_dilated2d_cpu
CUDA: slow_conv_dilated2d_cuda
-- func: slow_conv_dilated2d_backward(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
- python_module: nn
- dispatch:
- CPU: slow_conv_dilated2d_backward_cpu
- CUDA: slow_conv_dilated2d_backward_cuda
-
- func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv_dilated3d_cpu
CUDA: slow_conv_dilated3d_cuda
-- func: slow_conv_dilated3d_backward(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
- python_module: nn
- dispatch:
- CPU: slow_conv_dilated3d_backward_cpu
- CUDA: slow_conv_dilated3d_backward_cuda
-
- func: col2im.out(Tensor self, int[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: col2im_out_cpu
CUDA: col2im_out_cuda
@@ -9712,35 +10147,49 @@
- func: isinf(Tensor self) -> Tensor
variants: function, method
device_check: NoCheck
device_guard: False
+ dispatch:
+ CompositeExplicitAutograd: isinf
+ SparseCPU, SparseCUDA: isinf_sparse
+ SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
- func: record_stream(Tensor(a!) self, Stream s) -> ()
variants: method
dispatch:
CUDA: record_stream_cuda
- func: isposinf(Tensor self) -> Tensor
variants: function, method
structured_delegate: isposinf.out
+ dispatch:
+ SparseCPU, SparseCUDA: isposinf_sparse
+ SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
- func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: isposinf_out
+ SparseCPU, SparseCUDA: isposinf_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
- func: isneginf(Tensor self) -> Tensor
variants: function, method
structured_delegate: isneginf.out
+ dispatch:
+ SparseCPU, SparseCUDA: isneginf_sparse
+ SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
- func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: isneginf_out
+ SparseCPU, SparseCUDA: isneginf_sparse_out
+ SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
# NOTE [_add_batch_dim and _remove_batch_dim]
# _add_batch_dim and _remove_batch_dim are meant to be used in the implementation
# of the vmap frontend API (see torch/_vmap_internals.py). They are not
# user-facing, hence the leading underscore. Please don't use them them anywhere else.
@@ -10063,15 +10512,15 @@
- func: special_sinc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
python_module: special
variants: function
-- func: special_round(Tensor self) -> Tensor
+- func: special_round(Tensor self, *, int decimals=0) -> Tensor
python_module: special
variants: function
-- func: special_round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+- func: special_round.out(Tensor self, *, int decimals=0, Tensor(a!) out) -> Tensor(a!)
python_module: special
variants: function
- func: special_log1p(Tensor self) -> Tensor
python_module: special
@@ -10107,10 +10556,14 @@
- func: special_multigammaln.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!)
python_module: special
variants: function
+- func: special_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ python_module: special
+ variants: function
+
## Functions related to the fast Fourier transform and the torch.fft namespace
# Note [FFT namespace binding]
# Functions in the fft python module should have their names start with
# "fft_" underscore and be bound to the desired Python name in
# torch/fft/__init__.py, and the desired C++ name in torch/csrc/api/include/torch/fft.h.
@@ -10198,10 +10651,30 @@
- func: fft_irfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: fft
variants: function
+- func: fft_hfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
+- func: fft_hfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
+- func: fft_ihfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
+- func: fft_ihfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
- func: fft_fftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
python_module: fft
variants: function
- func: fft_fftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
@@ -10230,10 +10703,30 @@
- func: fft_irfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: fft
variants: function
+- func: fft_hfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
+- func: fft_hfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
+- func: fft_ihfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
+- func: fft_ihfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!)
+ use_const_ref_for_mutable_tensors: True
+ python_module: fft
+ variants: function
+
- func: fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
python_module: fft
variants: function
- func: fft_fftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!)
@@ -10284,10 +10777,42 @@
- func: linalg_cholesky.out(Tensor self, *, bool upper=False, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
variants: function
+- func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
+ python_module: linalg
+ variants: function
+ dispatch:
+ CPU, CUDA: linalg_cross
+
+- func: linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!)
+ python_module: linalg
+ dispatch:
+ CPU, CUDA: linalg_cross_out
+
+# linalg.lu_factor
+- func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
+ python_module: linalg
+ variants: function
+
+- func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
+ python_module: linalg
+ variants: function
+
+- func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
+ python_module: linalg
+ structured_delegate: linalg_lu_factor_ex.out
+ variants: function
+
+- func: linalg_lu_factor_ex.out(Tensor A, *, bool pivot=True, bool check_errors=False, Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info)
+ python_module: linalg
+ variants: function
+ structured: True
+ dispatch:
+ CPU, CUDA: linalg_lu_factor_ex_out
+
- func: linalg_det(Tensor self) -> Tensor
python_module: linalg
variants: function
- func: linalg_det.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
@@ -10325,10 +10850,16 @@
variants: function
- func: linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
+- func: linalg_matrix_exp(Tensor self) -> Tensor
+ python_module: linalg
+ variants: function
+ dispatch:
+ CPU, CUDA: linalg_matrix_exp
+
- func: linalg_slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
python_module: linalg
variants: function
dispatch:
CPU, CUDA: linalg_slogdet
@@ -10465,22 +10996,34 @@
python_module: linalg
- func: linalg_matrix_norm.str_ord_out(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
-- func: linalg_svd.U(Tensor self, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
+# This function is exposes the `compute_uv` flag, which is then used to implement `linalg.svd` and
+# `linalg.svdvals` as composite functions that call this one
+- func: _linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor Vh)
+ variants: function
+ structured_delegate: _linalg_svd.U
+
+- func: _linalg_svd.U(Tensor A, bool full_matrices=False, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
+ structured: True
+ dispatch:
+ CPU, CUDA: _linalg_svd_out
+
+- func: linalg_svd(Tensor A, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh)
python_module: linalg
+ variants: function
-- func: linalg_svd(Tensor self, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh)
+- func: linalg_svd.U(Tensor A, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh)
python_module: linalg
variants: function
-- func: linalg_svdvals(Tensor input) -> Tensor
+- func: linalg_svdvals(Tensor A) -> Tensor
python_module: linalg
variants: function
-- func: linalg_svdvals.out(Tensor input, *, Tensor(a!) out) -> Tensor(a!)
+- func: linalg_svdvals.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
variants: function
- func: linalg_cond(Tensor self, Scalar? p=None) -> Tensor
python_module: linalg
@@ -10496,19 +11039,41 @@
- func: linalg_cond.p_str_out(Tensor self, str p, *, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
variants: function
-- func: linalg_pinv(Tensor self, float rcond=1e-15, bool hermitian=False) -> Tensor
+- func: linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
python_module: linalg
variants: function
+ dispatch:
+ CompositeExplicitAutograd: linalg_pinv
+- func: linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
+ python_module: linalg
+ variants: function
+ dispatch:
+ CompositeExplicitAutograd: linalg_pinv_out
+
+- func: linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor
+ cpp_no_default_args: ['atol', 'rtol']
+ python_module: linalg
+ variants: function
+
+- func: linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
+ cpp_no_default_args: ['atol', 'rtol']
+ python_module: linalg
+ variants: function
+
+- func: linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor
+ python_module: linalg
+ variants: function
+
- func: linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor
python_module: linalg
variants: function
-- func: linalg_pinv.out(Tensor self, float rcond=1e-15, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
variants: function
- func: linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
@@ -10563,18 +11128,36 @@
python_module: linalg
- func: linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
-- func: linalg_matrix_rank(Tensor self, float? tol=None, bool hermitian=False) -> Tensor
+- func: linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
python_module: linalg
variants: function
-- func: linalg_matrix_rank.out(Tensor self, float? tol=None, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
+- func: linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
variants: function
+- func: linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor
+ cpp_no_default_args: ['atol', 'rtol']
+ python_module: linalg
+ variants: function
+
+- func: linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)
+ cpp_no_default_args: ['atol', 'rtol']
+ python_module: linalg
+ variants: function
+
+- func: linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor
+ python_module: linalg
+ variants: function
+
+- func: linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: linalg
+ variants: function
+
- func: linalg_matrix_rank.tol_tensor(Tensor input, Tensor tol, bool hermitian=False) -> Tensor
python_module: linalg
variants: function
- func: linalg_matrix_rank.out_tol_tensor(Tensor input, Tensor tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
@@ -10619,9 +11202,15 @@
# Note: this function is only for testing.
- func: _test_ambiguous_defaults.b(Tensor dummy, int a=2, str b="2") -> Tensor
cpp_no_default_args: ['a', 'b']
python_module: nn
+
+# Note: this function is only for testing.
+- func: _test_warn_in_autograd(Tensor self) -> Tensor
+ python_module: nn
+ dispatch:
+ CompositeExplicitAutograd: _test_warn_in_autograd
- func: segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor
variants: function
dispatch:
CPU, CUDA: segment_reduce_kernel