codegen/native_functions.yaml in torch-rb-0.4.2 vs codegen/native_functions.yaml in torch-rb-0.5.0
- old
+ new
@@ -45,10 +45,11 @@
use_c10_dispatcher: full
variants: function
# Computes the gradient of current tensor w.r.t. graph leaves.
- func: backward(Tensor self, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> ()
+ use_c10_dispatcher: full
manual_kernel_registration: True
variants: method
# DEPRECATED. Sets the tensor data held by this `Variable` to be the same as
# `new_data`. It requires that `new_data` and `Variable` have compatible tensor
@@ -92,10 +93,11 @@
use_c10_dispatcher: full
manual_kernel_registration: True
variants: method
- func: requires_grad_(Tensor(a!) self, bool requires_grad=True) -> Tensor(a!)
+ use_c10_dispatcher: full
manual_kernel_registration: True
variants: method
# Enables .grad attribute for non-leaf Tensors.
- func: retain_grad(Tensor(a!) self) -> ()
@@ -123,16 +125,10 @@
use_c10_dispatcher: full
- func: refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a)
variants: method
-- func: unflatten.Dimname(Tensor self, Dimname dim, int[] sizes, Dimname[] names) -> Tensor
- variants: method
-
-- func: unflatten.int(Tensor self, int dim, int[] sizes, Dimname[] names) -> Tensor
- variants: method
-
- func: _use_cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank) -> bool
use_c10_dispatcher: full
dispatch:
CUDA: _use_cudnn_ctc_loss
@@ -148,43 +144,50 @@
use_c10_dispatcher: full
dispatch:
CUDA: _cudnn_rnn_flatten_weight
- func: _cudnn_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor? weight_buf, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: _cudnn_rnn
- func: _cudnn_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
+ use_c10_dispatcher: full
dispatch:
CUDA: _cudnn_rnn_backward
-- func: _cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor
+- func: _cudnn_init_dropout_state(float dropout, bool train, int dropout_seed, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: _cudnn_init_dropout_state
- func: _debug_has_internal_overlap(Tensor self) -> int
use_c10_dispatcher: full
variants: function
- func: _fused_dropout(Tensor self, float p, Generator? generator=None) -> (Tensor, Tensor)
variants: function
dispatch:
- CUDA: fused_dropout_cuda
+ CUDA: fused_dropout_cuda
- func: _masked_scale(Tensor self, Tensor mask, float scale) -> Tensor
use_c10_dispatcher: full
variants: function
dispatch:
- CUDA: masked_scale_cuda
+ CUDA: masked_scale_cuda
- func: _sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
- func: _sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: _sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: _sobol_engine_initialize_state_(Tensor(a!) self, int dimension) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: _reshape_from_tensor(Tensor self, Tensor shape) -> Tensor
use_c10_dispatcher: full
- func: _shape_as_tensor(Tensor self) -> Tensor
@@ -192,67 +195,105 @@
- func: dropout(Tensor input, float p, bool train) -> Tensor
use_c10_dispatcher: full
- func: dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: feature_dropout(Tensor input, float p, bool train) -> Tensor
use_c10_dispatcher: full
- func: feature_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: alpha_dropout(Tensor input, float p, bool train) -> Tensor
use_c10_dispatcher: full
- func: alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor
use_c10_dispatcher: full
- func: feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: abs(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: abs_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: abs_out
+# Note [Adding an alias]
+# To add an alias do the following:
+#
+# 1) Copy the original functions native_functions.yaml entry, but replace the
+# original function's name with their own and delete any dispatch
+# keys for the aliases. Specifying a dispatch key will prevent
+# autograd from recording the operations the alias performs, which
+# will stop it from "inheriting" the original operation's autograd behavior.
+# 2) Implement the corresponding functions and have them redispatch to the
+# original function.
+# 3) Add entries for the alias (and original function, if needed) to
+# aten/src/ATen/core/interned_strings.h
+# (This may require removing an entry from ATen/core/aten_interned_strings.h.)
+# 4) Add docstrings to the new function that reference the original function,
+# and document the method as usual (if it exists.)
+# (See torch/_torch_docs.py and docs/source/torch.rst if adding a function,
+# torch/_tensor_docs.py and docs/source/tensors.rst if adding a method,
+# or module-specific doc bindings (like torch/linalg/__init__.py) if
+# adding an alias in a namespace.)
+# 5) Update torch/overrides.py consistent with the original function.
+# 6) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp.
+# 7) Add entries to test/test_op_aliases.py's "alias_infos"
+#
+# See torch.absolute, an alias for torch.abs, as an example.
+
+# Absolute, alias for abs
- func: absolute(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- dispatch:
- CPU: abs
- CUDA: abs
- func: absolute_(Tensor(a!) self) -> Tensor(a!)
- variants: function, method
- dispatch:
- CPU: abs_
- CUDA: abs_
+ use_c10_dispatcher: full
+ variants: method
- func: absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- dispatch:
- CPU: abs_out
- CUDA: abs_out
- func: angle(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: angle_out
- func: view_as_real(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: function
- func: view_as_complex(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: function
+- func: sgn(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: sgn_(Tensor(a!) self) -> Tensor(a!)
+ variants: method
+
+- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: sgn_out
+
- func: real(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: function
- func: imag(Tensor(a) self) -> Tensor(a)
@@ -262,20 +303,40 @@
- func: conj(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: conj.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: conj_out
+- func: _conj(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
- func: acos(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: acos_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: acos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: acos_out
+# arccos, alias of acos
+- func: arccos(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arccos_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
- func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
use_c10_dispatcher: full
- func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
use_c10_dispatcher: full
@@ -286,61 +347,83 @@
- func: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: add
- CUDA: add
- SparseCPU: add_sparse
- SparseCUDA: add_sparse
+ CPU, CUDA: add
+ SparseCPU, SparseCUDA: add_sparse
MkldnnCPU: mkldnn_add
- Vulkan: vulkan_add
- func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: add_
- CUDA: add_
- SparseCPU: add_sparse_
- SparseCUDA: add_sparse_
+ CPU, CUDA: add_
+ SparseCPU, SparseCUDA: add_sparse_
MkldnnCPU: mkldnn_add_
- func: add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: add_out
- CUDA: add_out
+ CPU, CUDA: add_out
SparseCPU: add_out_sparse_cpu
SparseCUDA: add_out_sparse_cuda
MkldnnCPU: mkldnn_add_out
+- func: _add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ dispatch:
+ CPU: add_relu
+
+- func: _add_relu_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function
+ dispatch:
+ CPU: add_relu_
+
+- func: _add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+ variants: function
+ dispatch:
+ CPU: add_relu_out
+
# For C++ only, until we have conversion from C++ numbers to Tensor
- func: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: addmv
- func: addmv_(Tensor(a!) self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: addmv_
- func: addmv.out(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: addmv_out
- func: _addmv_impl_(Tensor(a!) self, Tensor self2, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
dispatch:
CPU: addmv_impl_cpu
CUDA: addmv_impl_cuda
- func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
- func: affine_grid_generator(Tensor theta, int[] size, bool align_corners) -> Tensor
@@ -352,12 +435,16 @@
variants: function
- func: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: all
- func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: all_out
- func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
variants: function, method
- func: all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
@@ -367,23 +454,30 @@
variants: function, method
- func: any.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: any
- func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: any_out
- func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
variants: function, method
- func: any.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
- func: arange(Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: arange.start(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: arange.start_step(Scalar start, Scalar end, Scalar step, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!)
- func: arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
@@ -400,64 +494,93 @@
- func: argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: argmax
- CUDA: argmax
+ CPU, CUDA: argmax
- func: argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: argmin
- CUDA: argmin
+ CPU, CUDA: argmin
- func: acosh(Tensor self) -> Tensor
use_c10_dispatcher: full
- supports_named_tensor: True
variants: function, method
- func: acosh_(Tensor(a!) self) -> Tensor(a!)
- supports_named_tensor: True
+ use_c10_dispatcher: full
variants: function, method
- func: acosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- supports_named_tensor: True
+ dispatch:
+ CPU, CUDA: acosh_out
+# arccosh, alias for acosh
+- func: arccosh(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arccosh_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arccosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
- func: asinh(Tensor self) -> Tensor
use_c10_dispatcher: full
- supports_named_tensor: True
variants: function, method
- func: asinh_(Tensor(a!) self) -> Tensor(a!)
- supports_named_tensor: True
+ use_c10_dispatcher: full
variants: function, method
- func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- supports_named_tensor: True
+ dispatch:
+ CPU, CUDA: asinh_out
+# arcsinh, alias for asinh
+- func: arcsinh(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arcsinh_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arcsinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
- func: atanh(Tensor self) -> Tensor
use_c10_dispatcher: full
- supports_named_tensor: True
variants: function, method
- func: atanh_(Tensor(a!) self) -> Tensor(a!)
- supports_named_tensor: True
+ use_c10_dispatcher: full
variants: function, method
- func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- supports_named_tensor: True
+ dispatch:
+ CPU, CUDA: atanh_out
+# arctanh, alias for atanh
+- func: arctanh(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arctanh_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arctanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
- func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: as_strided_tensorimpl
- CUDA: as_strided_tensorimpl
- QuantizedCPU: as_strided_qtensorimpl
- QuantizedCUDA: as_strided_qtensorimpl
+ CPU, CUDA: as_strided_tensorimpl
+ QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl
device_guard: False
- func: as_strided_(Tensor(a!) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a!)
variants: function, method
device_guard: False
@@ -465,82 +588,152 @@
- func: asin(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: asin_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: asin_
+ SparseCPU, SparseCUDA: asin_sparse_
- func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: asin_out
+ SparseCPU, SparseCUDA: asin_out_sparse
+# arcsin, alias of asin
+- func: arcsin(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arcsin_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arcsin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
- func: atan(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: atan_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: atan_out
+# arctan, alias of atan
+- func: arctan(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arctan_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: arctan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: atleast_1d(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: atleast_1d.Sequence(Tensor[] tensors) -> Tensor[]
+ use_c10_dispatcher: full
+
+- func: atleast_2d(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: atleast_2d.Sequence(Tensor[] tensors) -> Tensor[]
+ use_c10_dispatcher: full
+ variants: function
+
+- func: atleast_3d(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: atleast_3d.Sequence(Tensor[] tensors) -> Tensor[]
+ use_c10_dispatcher: full
+ variants: function
+
- func: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
CPU: baddbmm_cpu
CUDA: baddbmm_cuda
- func: baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: baddbmm__cpu
CUDA: baddbmm__cuda
- func: _baddbmm_mkl_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function
- func: baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
CPU: baddbmm_out_cpu
CUDA: baddbmm_out_cuda
- func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: bartlett_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
+ use_c10_dispatcher: full
- func: quantized_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor var, float eps, float output_scale, int output_zero_point) -> Tensor
- requires_tensor: True
+ use_c10_dispatcher: full
dispatch:
QuantizedCPU: quantized_batch_norm
- func: _batch_norm_impl_index(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> (Tensor, Tensor, Tensor, Tensor, int)
+ use_c10_dispatcher: full
- func: _batch_norm_impl_index_backward(int impl_index, Tensor input, Tensor grad_output, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var_transform, bool train, float eps, bool[3] output_mask, Tensor reservedSpace) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
# Sample bernoulli with values in `self` as probability.
- func: bernoulli(Tensor self, *, Generator? generator=None) -> Tensor
variants: function, method
- func: bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
variants: function
+ dispatch:
+ CPU, CUDA: bernoulli_out
- func: bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: bernoulli_
- func: bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: bernoulli_
# This out-of-place version isn't used explicitly, but needed by jit.
# There is no default valid on `p` here because it would introduce ambiguity
# with `bernoulli(Tensor self, *, Generator? generator=None)` declaration.
- func: bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor
variants: function, method
- func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias) -> Tensor
+ use_c10_dispatcher: full
- func: binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
variants: function
dispatch:
CPU: binary_cross_entropy_cpu
CUDA: binary_cross_entropy_cuda
@@ -551,10 +744,11 @@
dispatch:
CPU: binary_cross_entropy_out_cpu
CUDA: binary_cross_entropy_out_cuda
- func: binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
variants: function
dispatch:
CPU: binary_cross_entropy_backward_cpu
CUDA: binary_cross_entropy_backward_cuda
@@ -565,84 +759,89 @@
dispatch:
CPU: binary_cross_entropy_backward_out_cpu
CUDA: binary_cross_entropy_backward_out_cuda
- func: binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
variants: function
- func: binary_cross_entropy_with_logits_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
variants: function
- func: bincount(Tensor self, Tensor? weights=None, int minlength=0) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
dispatch:
CPU: _bincount_cpu
CUDA: _bincount_cuda
- func: bitwise_not(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: bitwise_not_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: bitwise_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: bitwise_not_out
- CUDA: bitwise_not_out
+ CPU, CUDA: bitwise_not_out
- func: logical_not(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: logical_not_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: logical_not_out
- CUDA: logical_not_out
+ CPU, CUDA: logical_not_out
- func: logical_xor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: logical_xor_out
- CUDA: logical_xor_out
+ CPU, CUDA: logical_xor_out
- func: logical_and(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: logical_and_out
- CUDA: logical_and_out
+ CPU, CUDA: logical_and_out
- func: logical_or(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: logical_or_out
- CUDA: logical_or_out
+ CPU, CUDA: logical_or_out
- func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: bmm(Tensor self, Tensor mat2) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
@@ -690,103 +889,156 @@
- func: ceil(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: ceil_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: ceil_out
- CUDA: ceil_out
+ CPU, CUDA: ceil_out
- func: chain_matmul(Tensor[] matrices) -> Tensor
use_c10_dispatcher: full
variants: function
+- func: unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[]
+ use_c10_dispatcher: full
+ variants: function, method
+ device_guard: False
+
- func: chunk(Tensor(a) self, int chunks, int dim=0) -> Tensor(a)[]
use_c10_dispatcher: full
variants: function, method
device_guard: False
- func: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: clamp
- CUDA: clamp
- QuantizedCPU: quantized_clamp
- Vulkan: vulkan_clamp
+ CPU, CUDA: clamp
+ QuantizedCPU: clamp_quantized_cpu
- func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: clamp_out
- func: clamp_max(Tensor self, Scalar max) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: clamp_max_out
- func: clamp_min(Tensor self, Scalar min) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: clamp_min_out
+# clip is an alias for clamp
+- func: clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: clip_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
+ variants: function, method
+
+- func: clip.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
+
- func: cudnn_is_acceptable(Tensor self) -> bool
use_c10_dispatcher: full
device_guard: False
+- func: complex(Tensor real, Tensor imag) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: complex_out
+
+- func: polar(Tensor abs, Tensor angle) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: polar_out
+
- func: constant_pad_nd(Tensor self, int[] pad, Scalar value=0) -> Tensor
use_c10_dispatcher: full
variants: function
-- func: contiguous(Tensor self, *, MemoryFormat memory_format=contiguous_format) -> Tensor
+- func: contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)
+ use_c10_dispatcher: full
variants: method
- func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
+ use_c10_dispatcher: full
- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
+ use_c10_dispatcher: full
- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
use_c10_dispatcher: full
-- func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
+- func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
+ use_c10_dispatcher: full
+- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
+ use_c10_dispatcher: full
+
- func: _convolution_nogroup(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding) -> Tensor
+ use_c10_dispatcher: full
-- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
+ use_c10_dispatcher: full
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
+ use_c10_dispatcher: full
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
+ use_c10_dispatcher: full
- func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
use_c10_dispatcher: full
- func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
use_c10_dispatcher: full
# NB: we inherit the goofy argument order from PyTorch torch.nn.functional
- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
+ use_c10_dispatcher: full
- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor
+ use_c10_dispatcher: full
- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
+ use_c10_dispatcher: full
- func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
- manual_kernel_registration: True
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
use_c10_dispatcher: full
@@ -795,26 +1047,42 @@
- func: cos(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: cos_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: cos_out
- func: cosh(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: cosh_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: cosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: cosh_out
- func: cosine_embedding_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
use_c10_dispatcher: full
+- func: count_nonzero.dim_IntList(Tensor self, int[] dim) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU, CUDA: count_nonzero
+
+- func: count_nonzero(Tensor self, int? dim=None) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
- func: cudnn_affine_grid_generator(Tensor theta, int N, int C, int H, int W) -> Tensor grid
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_affine_grid_generator_forward
@@ -823,64 +1091,78 @@
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_affine_grid_generator_backward
- func: cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: cudnn_batch_norm
# NB: You can only use this if you used cudnn_batch_norm training=True
- func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: cudnn_batch_norm_backward
- func: cudnn_convolution.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_deprecated
-- func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: cudnn_convolution.deprecated2(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
use_c10_dispatcher: full
dispatch:
+ CUDA: cudnn_convolution_deprecated2
+
+- func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
CUDA: cudnn_convolution
-- func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_backward_input
-- func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[2] output_mask) -> (Tensor, Tensor)
+- func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_backward
-- func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_backward_weight
- func: cudnn_convolution_transpose.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_transpose_deprecated
-- func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: cudnn_convolution_transpose.deprecated2(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
use_c10_dispatcher: full
dispatch:
+ CUDA: cudnn_convolution_transpose_deprecated2
+
+- func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
CUDA: cudnn_convolution_transpose
# NB: output_padding not strictly needed here, but it's helpful for the float
# backwards
-- func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[2] output_mask) -> (Tensor, Tensor)
+- func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor)
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_transpose_backward
-- func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_transpose_backward_input
-- func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
use_c10_dispatcher: full
dispatch:
CUDA: cudnn_convolution_transpose_backward_weight
# NB: input is special cased in a way I don't quite understand
@@ -926,21 +1208,33 @@
variants: function
dispatch:
CPU: cummin_helper_cpu
CUDA: cummin_helper_cuda
+- func: cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: cumprod.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
- func: cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
variants: function, method
- func: cumprod.dimname_out(Tensor self, Dimname dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+- func: cumprod_backward(Tensor grad, Tensor input, int dim) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: cumsum.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
- func: cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
@@ -956,23 +1250,19 @@
use_c10_dispatcher: full
- func: _ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)
use_c10_dispatcher: full
dispatch:
- CPU: ctc_loss_cpu
+ CPU: ctc_loss_cpu
CUDA: ctc_loss_gpu
- func: _ctc_loss_backward(Tensor grad, Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, Tensor neg_log_likelihood, Tensor log_alpha, int blank, bool zero_infinity=False) -> Tensor
use_c10_dispatcher: full
dispatch:
CPU: ctc_loss_backward_cpu
CUDA: ctc_loss_backward_gpu
-- func: det(Tensor self) -> Tensor
- use_c10_dispatcher: full
- variants: function, method
-
- func: diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: diagflat(Tensor self, int offset=0) -> Tensor
@@ -984,54 +1274,103 @@
variants: function, method
- func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)
variants: function, method
+- func: diagonal_backward(Tensor grad, int[] input_sizes, int offset, int dim1, int dim2) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: div.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: div
- CUDA: div
- SparseCPU: div_sparse
- SparseCUDA: div_sparse
+ CPU, CUDA: div
+ SparseCPU, SparseCUDA: div_sparse
- func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: div_
- CUDA: div_
- SparseCPU: div_sparse_
- SparseCUDA: div_sparse_
+ CPU, CUDA: div_
+ SparseCPU, SparseCUDA: div_sparse_
- func: div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: div_out
- CUDA: div_out
- SparseCPU: div_out_sparse_zerodim
- SparseCUDA: div_out_sparse_zerodim
+ CPU, CUDA: div_out
+ SparseCPU, SparseCUDA: div_out_sparse_zerodim
# For C++ only, until we have conversion from C++ numbers to Tensor
- func: div.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
+# divide, alias for div
+- func: divide.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: divide.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+ # true_divide, an alias for div
+- func: true_divide.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: true_divide.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: dot(Tensor self, Tensor tensor) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: legacy::cpu::_th_dot
- CUDA: legacy::cuda::_th_dot
+ CPU: dot
+ CUDA: dot_cuda
- func: dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)
+- func: vdot(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU: vdot
+ CUDA: vdot_cuda
+
+- func: vdot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
- func: einsum(str equation, Tensor[] tensors) -> Tensor
use_c10_dispatcher: full
- func: embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
use_c10_dispatcher: full
@@ -1044,10 +1383,11 @@
dispatch:
CPU: embedding_dense_backward_cpu
CUDA: embedding_dense_backward_cuda
- func: embedding_renorm_(Tensor(a!) self, Tensor indices, float max_norm, float norm_type) -> Tensor(a!)
+ use_c10_dispatcher: full
dispatch:
CPU: embedding_renorm_cpu_
CUDA: embedding_renorm_cuda_
- func: embedding_sparse_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq) -> Tensor
@@ -1060,22 +1400,34 @@
# the backward inputs are the same as forward ones.
# The above `embedding_bag` wrapper is created to achieve this, e.g.,
# applying indices = indices.contiguous().
# The backward functions apply a check that these input tensors are contiguous.
+
+- func: _embedding_bag_forward_only(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False) -> (Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
+ dispatch:
+ CPU: _embedding_bag_forward_only_cpu
+ CUDA: _embedding_bag_forward_only_cuda
+
- func: embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False) -> (Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
- func: _embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False) -> (Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CPU: _embedding_bag_cpu
CUDA: _embedding_bag_cuda
- func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, int num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights) -> Tensor
+ use_c10_dispatcher: full
- func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, int num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights) -> Tensor
+ use_c10_dispatcher: full
- func: _embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, int num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CPU: _embedding_bag_dense_backward_cpu
CUDA: _embedding_bag_dense_backward_cuda
- func: _embedding_bag_per_sample_weights_backward(Tensor grad, Tensor weight, Tensor indices, Tensor offsets, Tensor offset2bag, int mode) -> Tensor
@@ -1083,122 +1435,154 @@
dispatch:
CPU: _embedding_bag_per_sample_weights_backward_cpu
CUDA: _embedding_bag_per_sample_weights_backward_cuda
- func: empty_meta(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ #use_c10_dispatcher: full
- func: empty.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
device_guard: False
- func: empty.memory_format(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ #use_c10_dispatcher: full
dispatch:
CPU: empty_cpu
CUDA: empty_cuda
MkldnnCPU: empty_mkldnn
- SparseCPU: empty_sparse
- SparseCUDA: empty_sparse
- Vulkan: empty_vulkan
+ SparseCPU, SparseCUDA: empty_sparse
- func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ #use_c10_dispatcher: full
variants: method
- func: new_full(Tensor self, int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
variants: method
- func: new_zeros(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
variants: method
# other overrides are to provide a more helpful error message that dtype is required
- func: _empty_affine_quantized(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, float scale=1, int zero_point=0, MemoryFormat? memory_format=contiguous_format) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CPU: empty_affine_quantized_other_backends_stub
- QuantizedCPU: empty_affine_quantized
- QuantizedCUDA: empty_affine_quantized
+ QuantizedCPU, QuantizedCUDA: empty_affine_quantized
# it's a factory function receiving a tensor argument, thus overriding explicitly
# other overrides are to provide a more helpful error message that dtype is required
- func: _empty_per_channel_affine_quantized(int[] size, *, Tensor scales, Tensor zero_points, int axis, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=contiguous_format) -> Tensor
+ use_c10_dispatcher: full
category_override: factory
dispatch:
CPU: empty_per_channel_affine_quantized_other_backends_stub
- QuantizedCPU: empty_per_channel_affine_quantized_cpu
+ QuantizedCPU, QuantizedCUDA: empty_per_channel_affine_quantized
- func: resize_(Tensor(a!) self, int[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!)
- manual_kernel_registration: True
+ use_c10_dispatcher: full
variants: method
device_guard: False
+ dispatch:
+ CPU: resize_
+ CUDA: resize_cuda_
+ QuantizedCPU: quantized_resize_cpu_
- func: empty_quantized(int[] size, Tensor qtensor) -> Tensor
+ use_c10_dispatcher: full
variants: function
dispatch:
- QuantizedCPU: empty_quantized
- QuantizedCUDA: empty_quantized
+ QuantizedCPU, QuantizedCUDA: empty_quantized
- func: empty.out(int[] size, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)
device_guard: False
- func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
device_guard: False
- func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CPU: empty_strided_cpu
CUDA: empty_strided_cuda
- Vulkan: empty_strided_vulkan
- func: erf(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: erf_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: erf_out
- func: erfc(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: erfc_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: erfc_out
- func: exp(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: exp_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: exp_out
+- func: exp2(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: exp2_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: exp2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: exp2_out
+
- func: expm1(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: expm1_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: expm1_out
- CUDA: expm1_out
+ CPU, CUDA: expm1_out
- func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a)
use_c10_dispatcher: full
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
device_guard: False
-- func: expand_as(Tensor self, Tensor other) -> Tensor
+- func: expand_as(Tensor(a) self, Tensor other) -> Tensor(a)
use_c10_dispatcher: full
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
device_guard: False
- func: eye(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: eye.m(int n, int m, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: eye.out(int n, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: eye_out_cpu
CUDA: eye_out_cuda
@@ -1206,100 +1590,138 @@
- func: eye.m_out(int n, int m, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: eye_out_cpu
CUDA: eye_out_cuda
-- func: flatten.using_ints(Tensor self, int start_dim=0, int end_dim=-1) -> Tensor
+- func: flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
-- func: flatten.named_out_dim(Tensor self, int start_dim, int end_dim, Dimname out_dim) -> Tensor
+- func: flatten.named_out_dim(Tensor(a) self, int start_dim, int end_dim, Dimname out_dim) -> Tensor(a)
variants: function, method
-- func: flatten.using_names(Tensor self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor
+- func: flatten.using_names(Tensor(a) self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor(a)
variants: function, method
-- func: flatten.DimnameList(Tensor self, Dimname[] dims, Dimname out_dim) -> Tensor
+- func: flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a)
variants: function, method
+- func: unflatten.int(Tensor(a) self, int dim, int[] sizes, Dimname[]? names=None) -> Tensor(a)
+ variants: method
+
+- func: unflatten.Dimname(Tensor(a) self, Dimname dim, int[] sizes, Dimname[] names) -> Tensor(a)
+ variants: method
+
- func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: floor(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: floor_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: floor_out
- CUDA: floor_out
+ CPU, CUDA: floor_out
- func: floor_divide(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: floor_divide
- CUDA: floor_divide
- SparseCPU: floor_divide_sparse
- SparseCUDA: floor_divide_sparse
+ CPU, CUDA: floor_divide
+ SparseCPU, SparseCUDA: floor_divide_sparse
- func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: floor_divide_
- CUDA: floor_divide_
- SparseCPU: floor_divide_sparse_
- SparseCUDA: floor_divide_sparse_
+ CPU, CUDA: floor_divide_
+ SparseCPU, SparseCUDA: floor_divide_sparse_
- func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: floor_divide_out
- CUDA: floor_divide_out
- SparseCPU: floor_divide_out_sparse_zerodim
- SparseCUDA: floor_divide_out_sparse_zerodim
+ CPU, CUDA: floor_divide_out
+ SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: frac(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: frac_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: frac_out
- func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_guard: False
- func: full(int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: full.out(int[] size, Scalar fill_value, *, Tensor(a!) out) -> Tensor(a!)
- func: full_like(Tensor self, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: from_file(str filename, bool? shared=None, int? size=0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CPU: from_file
+- func: gcd.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: gcd_out
+
+- func: gcd(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: lcm.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: lcm_out
+
+- func: lcm(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: lcm_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
# NOTE [ grid_sampler Native Functions ]
# `grid_sampler` does all the shape checking and then dispatches to one of
# `cudnn_grid_sampler`, `grid_sampler_2d`, or `grid_sampler_3d`, each of which
# has the corresponding backward defined as native functions as well. Therefore,
# in these functions and their backwards, no more shape checking is done.
#
+# There is also _grid_sampler_2d_backward_cpu_fallback which is an
+# implementation detail of grid_sampler_2d and is only exposed here for testing
+# purposes.
+#
# Additionally, arguments `padding_mode` and `interpolation_mode` are cast to
# enums defined in `native/GridSampler.h`. `cudnn_grid_sampler` doesn't take in
# `interpolation_mode` because it only supports Bilinear interpolation mode.
# Nor does it take in `align_corners` because it only supports the mode
# `align_corners = True`.
@@ -1316,10 +1738,17 @@
use_c10_dispatcher: full
dispatch:
CPU: grid_sampler_2d_backward_cpu
CUDA: grid_sampler_2d_backward_cuda
+# See NOTE [ grid_sample CPU fallback ]
+- func: _grid_sampler_2d_cpu_fallback(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
+ use_c10_dispatcher: full
+
+- func: _grid_sampler_2d_cpu_fallback_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
+
- func: grid_sampler_3d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
use_c10_dispatcher: full
dispatch:
CPU: grid_sampler_3d_cpu
CUDA: grid_sampler_3d_cuda
@@ -1329,48 +1758,53 @@
dispatch:
CPU: grid_sampler_3d_backward_cpu
CUDA: grid_sampler_3d_backward_cuda
- func: hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: hamming_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: hamming_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: hamming_window.periodic_alpha(int window_length, bool periodic, float alpha, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: hamming_window.periodic_alpha_beta(int window_length, bool periodic, float alpha, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
-- func: hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor
+- func: kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
use_c10_dispatcher: full
-- func: ger(Tensor self, Tensor vec2) -> Tensor
+- func: kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
use_c10_dispatcher: full
- variants: function, method
-- func: ger.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)
+- func: kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
+- func: hinge_embedding_loss(Tensor self, Tensor target, float margin=1.0, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
+
- func: group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor
+ use_c10_dispatcher: full
- func: native_group_norm(Tensor input, Tensor? weight, Tensor? bias, int N, int C, int HxW, int group, float eps) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
- CPU: native_group_norm
- CUDA: native_group_norm
+ CPU, CUDA: native_group_norm
+ Math: math_group_norm
- func: native_group_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, int N, int C, int HxW, int group, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
- CPU: native_group_norm_backward
- CUDA: native_group_norm_backward
+ CPU, CUDA: native_group_norm_backward
-# FFT
-
-- func: fft(Tensor self, int signal_ndim, bool normalized=False) -> Tensor
- use_c10_dispatcher: full
- variants: function, method
-
- func: ifft(Tensor self, int signal_ndim, bool normalized=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: rfft(Tensor self, int signal_ndim, bool normalized=False, bool onesided=True) -> Tensor
@@ -1382,10 +1816,14 @@
variants: function, method
- func: _fft_with_size(Tensor self, int signal_ndim, bool complex_input, bool complex_output, bool inverse, int[] checked_signal_sizes, bool normalized, bool onesided, int[] output_sizes) -> Tensor
use_c10_dispatcher: full
variants: function
+
+- func: _fft_with_size.norm_modes(Tensor self, int signal_ndim, bool complex_input, bool complex_output, bool inverse, int[] checked_signal_sizes, int normalization, bool onesided, int[] output_sizes) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
dispatch:
CPU: _fft_mkl
CUDA: _fft_cufft
- func: _cufft_get_plan_cache_size(int device_index) -> int
@@ -1400,16 +1838,19 @@
- func: _cufft_clear_plan_cache(int device_index) -> ()
use_c10_dispatcher: full
- func: index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
variants: function, method
+ dispatch:
+ CPU, CUDA: index
# NB: This function is special-cased in tools/autograd/gen_variable_type.py
# NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp:
# - Tensor Tensor::index(ArrayRef<TensorIndex> indices)
# - Tensor Tensor::index(std::initializer_list<TensorIndex> indices)
- func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
use_c10_dispatcher: full
variants: function, method
@@ -1431,12 +1872,15 @@
- func: index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
variants: function, method
- func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
variants: function
+ dispatch:
+ CPU, CUDA: _index_put_impl_
- func: instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor
+ use_c10_dispatcher: full
variants: function
- func: inverse(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
@@ -1457,14 +1901,12 @@
- func: isnan(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
device_guard: False
dispatch:
- CPU: isnan
- CUDA: isnan
- SparseCPU: isnan_sparse
- SparseCUDA: isnan_sparse
+ CPU, CUDA: isnan
+ SparseCPU, SparseCUDA: isnan_sparse
- func: is_distributed(Tensor self) -> bool
use_c10_dispatcher: full
variants: function, method
device_guard: False
@@ -1477,10 +1919,14 @@
- func: is_complex(Tensor self) -> bool
use_c10_dispatcher: full
variants: function, method
device_guard: False
+- func: isreal(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
- func: is_nonzero(Tensor self) -> bool
use_c10_dispatcher: full
variants: function, method
device_guard: False
@@ -1516,25 +1962,30 @@
variants: function, method
- func: kthvalue.dimname_out(Tensor self, int k, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
- func: layer_norm(Tensor input, int[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor
+ use_c10_dispatcher: full
- func: native_layer_norm(Tensor input, Tensor? weight, Tensor? bias, int M, int N, float eps) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CPU: layer_norm_cpu
CUDA: layer_norm_cuda
- func: native_layer_norm_backward(Tensor grad_out, Tensor input, Tensor mean, Tensor rstd, Tensor? weight, int M, int N, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CPU: layer_norm_backward_cpu
CUDA: layer_norm_backward_cuda
- func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
- func: mkldnn_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
MkldnnCPU: mkldnn_linear
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
@@ -1559,97 +2010,101 @@
use_c10_dispatcher: full
- func: fbgemm_pack_quantized_matrix.KN(Tensor input, int K, int N) -> Tensor
use_c10_dispatcher: full
-- func: linspace(Scalar start, Scalar end, int steps=100, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+- func: linspace(Scalar start, Scalar end, int? steps=None, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
-- func: linspace.out(Scalar start, Scalar end, int steps=100, *, Tensor(a!) out) -> Tensor(a!)
+- func: linspace.out(Scalar start, Scalar end, int? steps=None, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: linspace_cpu_out
CUDA: linspace_cuda_out
- func: log(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: log_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: log_out
- CUDA: log_out
+ CPU, CUDA: log_out
- func: log10(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: log10_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: log10.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: log10_out
- CUDA: log10_out
+ CPU, CUDA: log10_out
- func: log1p(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: log1p_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: log1p_
- CUDA: log1p_
- SparseCPU: log1p_sparse_
- SparseCUDA: log1p_sparse_
+ CPU, CUDA: log1p_
+ SparseCPU, SparseCUDA: log1p_sparse_
- func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: log1p_out
- CUDA: log1p_out
- SparseCPU: log1p_out_sparse
- SparseCUDA: log1p_out_sparse
+ CPU, CUDA: log1p_out
+ SparseCPU, SparseCUDA: log1p_out_sparse
- func: log2(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: log2_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: log2_out
- CUDA: log2_out
+ CPU, CUDA: log2_out
- func: logaddexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: logaddexp_out
- func: logaddexp(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: logaddexp2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: logaddexp2_out
- func: logaddexp2(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: logdet(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
-- func: logspace(Scalar start, Scalar end, int steps=100, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+- func: logspace(Scalar start, Scalar end, int? steps=None, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
-- func: logspace.out(Scalar start, Scalar end, int steps=100, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
+- func: logspace.out(Scalar start, Scalar end, int? steps=None, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: logspace_cpu_out
CUDA: logspace_cuda_out
# log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
variants: function, method
@@ -1675,10 +2130,11 @@
dispatch:
CPU: _logcumsumexp_out_cpu
CUDA: _logcumsumexp_out_cuda
- func: logcumsumexp(Tensor self, int dim) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
- func: logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor
@@ -1714,28 +2170,65 @@
- func: matrix_power(Tensor self, int n) -> Tensor
use_c10_dispatcher: full
variants: function, method
-- func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+- func: matrix_exp(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: matrix_exp
-- func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
+- func: matrix_exp_backward(Tensor self, Tensor grad) -> Tensor
+ use_c10_dispatcher: full
-- func: max_values(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
+- func: _aminmax(Tensor self) -> (Tensor, Tensor)
use_c10_dispatcher: full
+ variants: function
+ dispatch:
+ CPU, CUDA: _aminmax_all
+
+- func: _aminmax.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
+ variants: function
+ dispatch:
+ CPU, CUDA: _aminmax
+
+- func: _compute_linear_combination(Tensor input, Tensor coefficients) -> Tensor
+ dispatch:
+ CPU, CUDA: _compute_linear_combination
+
+- func: _compute_linear_combination.out(Tensor input, Tensor coefficients, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: _compute_linear_combination_out
+
+- func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+ use_c10_dispatcher: full
variants: function, method
+- func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
+ dispatch:
+ CPU, CUDA: max_out
+
- func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
variants: function, method
- func: max.names_dim_max(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices)
-- func: max_values.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
+- func: value_selecting_reduction_backward(Tensor grad, int dim, Tensor indices, int[] sizes, bool keepdim) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
+- func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
+- func: amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: amax_out
+
# Return: (Tensor output, Tensor indices)
- func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
use_c10_dispatcher: full
- func: max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor
@@ -1744,45 +2237,51 @@
- func: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
use_c10_dispatcher: full
- func: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
use_c10_dispatcher: full
- requires_tensor: True
dispatch:
MkldnnCPU: mkldnn_max_pool2d
+- func: mkldnn_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
+ MkldnnCPU: mkldnn_max_pool3d
+
+- func: quantized_max_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
+ QuantizedCPU: quantized_max_pool1d
+
- func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
use_c10_dispatcher: full
- requires_tensor: True
dispatch:
QuantizedCPU: quantized_max_pool2d
- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
use_c10_dispatcher: full
# The CPU and GPU dispatch variants are named weirdly here because otherwise there
# are namespacing issues in C++
- func: mean(Tensor self, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: mean_cpu_gpu
- CUDA: mean_cpu_gpu
- QuantizedCPU: quantized_mean_cpu
+ CPU, CUDA: mean_cpu_gpu
+ QuantizedCPU: mean_quantized_cpu
- func: mean.dim(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: mean_cpu_gpu
- CUDA: mean_cpu_gpu
- QuantizedCPU: quantized_mean_cpu
- Vulkan: mean_vulkan
+ CPU, CUDA: mean_cpu_gpu
+ QuantizedCPU: mean_quantized_cpu
- func: mean.out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: mean_out_cpu_gpu
- CUDA: mean_out_cpu_gpu
- QuantizedCPU: quantized_mean_out_cpu
+ CPU, CUDA: mean_out_cpu_gpu
+ QuantizedCPU: mean_out_quantized_cpu
- func: mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
variants: function, method
- func: mean.names_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
@@ -1801,24 +2300,28 @@
- func: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
use_c10_dispatcher: full
variants: function, method
- func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
+ dispatch:
+ CPU, CUDA: min_out
-- func: min_values(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
- use_c10_dispatcher: full
- variants: function, method
-
- func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
variants: function, method
- func: min.names_dim_min(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices)
-- func: min_values.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
+- func: amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
+- func: amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: amin_out
+
- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
+ use_c10_dispatcher: full
- func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor
use_c10_dispatcher: full
- func: mkldnn_convolution_backward_weights(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> (Tensor, Tensor)
@@ -1826,18 +2329,21 @@
- func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
use_c10_dispatcher: full
- func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_batch_norm
- func: miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_batch_norm_backward
- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_convolution
- func: miopen_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
use_c10_dispatcher: full
@@ -1858,10 +2364,11 @@
use_c10_dispatcher: full
dispatch:
CUDA: miopen_convolution_backward_weight
- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_convolution_transpose
# NB: output_padding not strictly needed here, but it's helpful for the float
# backwards
@@ -1879,10 +2386,11 @@
use_c10_dispatcher: full
dispatch:
CUDA: miopen_convolution_transpose_backward_weight
- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_depthwise_convolution
- func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
use_c10_dispatcher: full
@@ -1898,39 +2406,41 @@
use_c10_dispatcher: full
dispatch:
CUDA: miopen_depthwise_convolution_backward_weight
- func: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_rnn
- func: miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
+ use_c10_dispatcher: full
dispatch:
CUDA: miopen_rnn_backward
- func: mm(Tensor self, Tensor mat2) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
CPU: mm_cpu
CUDA: mm_cuda
- SparseCPU: _sparse_mm
- SparseCUDA: _sparse_mm
+ SparseCPU, SparseCUDA: _sparse_mm
- func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: mm_cpu_out
CUDA: mm_out_cuda
- SparseCPU: _sparse_mm_out
- SparseCUDA: _sparse_mm_out
+ SparseCPU, SparseCUDA: _sparse_mm_out
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
use_c10_dispatcher: full
- func: mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: mode
- func: mode.values(Tensor self, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
- func: mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
variants: function, method
@@ -1939,67 +2449,80 @@
- func: mul.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: mul
- CUDA: mul
- SparseCPU: mul_sparse
- SparseCUDA: mul_sparse
+ CPU, CUDA: mul
+ SparseCPU, SparseCUDA: mul_sparse
MkldnnCPU: mkldnn_mul
- func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: mul_
- CUDA: mul_
- SparseCPU: mul_sparse_
- SparseCUDA: mul_sparse_
+ CPU, CUDA: mul_
+ SparseCPU, SparseCUDA: mul_sparse_
MkldnnCPU: mkldnn_mul_
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: mul_out
- CUDA: mul_out
+ CPU, CUDA: mul_out
SparseCPU: mul_out_sparse_cpu
SparseCUDA: mul_out_sparse_cuda
MkldnnCPU: mkldnn_mul_out
# For C++ only, until we have conversion from C++ numbers to Tensor
- func: mul.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
+# multiply, alias for mul
+- func: multiply.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: multiply_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: multiply.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: multiply.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: multiply_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: mv(Tensor self, Tensor vec) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: mv
- CUDA: mv
- SparseCPU: mv_sparse
- SparseCUDA: mv_sparse
+ CPU, CUDA: mv
+ SparseCPU, SparseCUDA: mv_sparse
- func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
- func: mvlgamma(Tensor self, int p) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: mvlgamma_(Tensor(a!) self, int p) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: narrow_copy(Tensor self, int dim, int start, int length) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
- CPU: narrow_copy_dense
- CUDA: narrow_copy_dense
- SparseCPU: narrow_copy_sparse
- SparseCUDA: narrow_copy_sparse
+ CPU, CUDA: narrow_copy_dense
+ SparseCPU, SparseCUDA: narrow_copy_sparse
- func: narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
device_guard: False
@@ -2008,10 +2531,11 @@
use_c10_dispatcher: full
variants: function, method
device_guard: False
- func: native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CPU: batch_norm_cpu
CUDA: batch_norm_cuda
MkldnnCPU: mkldnn_batch_norm
@@ -2023,40 +2547,47 @@
use_c10_dispatcher: full
dispatch:
CUDA: batch_norm_stats_cuda
- func: batch_norm_elemt(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: batch_norm_elemt_cuda
- func: batch_norm_elemt.out(Tensor input, Tensor? weight, Tensor? bias, Tensor mean, Tensor invstd, float eps, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CUDA: batch_norm_elemt_cuda_out
# for backward compatibility
- func: batch_norm_gather_stats(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, int count) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: batch_norm_gather_stats_cuda
- func: batch_norm_gather_stats_with_counts(Tensor input, Tensor mean, Tensor invstd, Tensor? running_mean, Tensor? running_var, float momentum, float eps, Tensor counts) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: batch_norm_gather_stats_with_counts_cuda
- func: native_batch_norm_backward(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CPU: batch_norm_backward_cpu
CUDA: batch_norm_backward_cuda
- func: batch_norm_backward_reduce(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, bool input_g, bool weight_g, bool bias_g) -> (Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: batch_norm_backward_reduce_cuda
- func: batch_norm_backward_elemt(Tensor grad_out, Tensor input, Tensor mean, Tensor invstd, Tensor? weight, Tensor mean_dy, Tensor mean_dy_xmu) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CUDA: batch_norm_backward_elemt_cuda
- func: batch_norm_update_stats(Tensor input, Tensor? running_mean, Tensor? running_var, float momentum) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CPU: batch_norm_update_stats_cpu
CUDA: batch_norm_update_stats_cuda
- func: is_vulkan_available() -> bool
@@ -2064,10 +2595,11 @@
- func: _nnpack_available() -> bool
use_c10_dispatcher: full
- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor
+ use_c10_dispatcher: full
variants: function
- func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
use_c10_dispatcher: full
variants: function
@@ -2082,14 +2614,16 @@
- func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_guard: False
- func: ones(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: ones.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
- func: ones_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: pairwise_distance(Tensor x1, Tensor x2, float p=2, float eps=1e-06, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
- func: cdist(Tensor x1, Tensor x2, float p=2, int? compute_mode=None) -> Tensor
@@ -2098,31 +2632,47 @@
- func: _euclidean_dist(Tensor x1, Tensor x2) -> Tensor
use_c10_dispatcher: full
- func: _cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor
use_c10_dispatcher: full
+ dispatch:
+ CPU, CUDA: _cdist_forward
- func: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
use_c10_dispatcher: full
+ dispatch:
+ CPU, CUDA: _cdist_backward
- func: pdist(Tensor self, float p=2) -> Tensor
use_c10_dispatcher: full
- func: _pdist_forward(Tensor self, float p=2) -> Tensor
use_c10_dispatcher: full
+ dispatch:
+ CPU, CUDA: _pdist_forward
- func: _pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor
use_c10_dispatcher: full
+ dispatch:
+ CPU, CUDA: _pdist_backward
- func: cosine_similarity(Tensor x1, Tensor x2, int dim=1, float eps=1e-08) -> Tensor
use_c10_dispatcher: full
variants: function
- func: permute(Tensor(a) self, int[] dims) -> Tensor(a)
use_c10_dispatcher: full
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
+- func: movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a)
+ use_c10_dispatcher: full
+ variants: function, method
+
# Only exposed from C++ -- in Python,
# we expose it as an attribute `T`, not a function.
#
# I'd like to name this "T" in C++ too, but
# calling a native function "T" causes undefined
@@ -2137,17 +2687,17 @@
- func: channel_shuffle(Tensor self, int groups) -> Tensor
use_c10_dispatcher: full
dispatch:
CPU: channel_shuffle
- QuantizedCPU: quantized_channel_shuffle
+ QuantizedCPU: channel_shuffle_quantized_cpu
- func: is_pinned(Tensor self) -> bool
use_c10_dispatcher: full
variants: method
-- func: pin_memory(Tensor self) -> Tensor
+- func: pin_memory(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: method
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
use_c10_dispatcher: full
@@ -2158,54 +2708,55 @@
variants: function
- func: rad2deg(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- supports_named_tensor: True
- func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- supports_named_tensor: True
- func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- supports_named_tensor: True
- func: deg2rad(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- supports_named_tensor: True
- func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- supports_named_tensor: True
- func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- supports_named_tensor: True
- func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: rand.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_guard: False
- func: rand.generator_with_names(int[] size, *, Generator? generator, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_guard: False
- func: rand(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: rand.generator(int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: rand.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
- func: rand.generator_out(int[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
- func: rand_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: randint(int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: randint.generator(int high, int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: randint.low(int low, int high, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: randint.low_generator(int low, int high, int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: randint.out(int high, int[] size, *, Tensor(a!) out) -> Tensor(a!)
@@ -2214,14 +2765,17 @@
- func: randint.low_out(int low, int high, int[] size, *, Tensor(a!) out) -> Tensor(a!)
- func: randint.low_generator_out(int low, int high, int[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
- func: randint_like(Tensor self, int high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: randint_like.low_dtype(Tensor self, int low, int high, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: randn(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: randn.generator(int[] size, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: randn.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_guard: False
@@ -2232,12 +2786,14 @@
- func: randn.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
- func: randn.generator_out(int[] size, *, Generator? generator, Tensor(a!) out) -> Tensor(a!)
- func: randn_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: randperm(int n, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: randperm.generator(int n, *, Generator? generator, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: randperm.out(int n, *, Tensor(a!) out) -> Tensor(a!)
@@ -2245,12 +2801,14 @@
dispatch:
CPU: randperm_out_cpu
CUDA: randperm_out_cuda
- func: range.step(Scalar start, Scalar end, Scalar step=1, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: range_cpu_out
CUDA: range_cuda_out
@@ -2258,26 +2816,44 @@
- func: reciprocal(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: reciprocal_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: reciprocal_out
- func: neg(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: neg_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: neg_
+ SparseCPU, SparseCUDA: neg_sparse_
- func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: neg_out
- CUDA: neg_out
+ CPU, CUDA: neg_out
+ SparseCPU, SparseCUDA: neg_out_sparse
+# Alias for neg
+- func: negative(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: negative_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: negative.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+
- func: repeat(Tensor self, int[] repeats) -> Tensor
use_c10_dispatcher: full
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
- func: repeat_interleave.Tensor(Tensor repeats) -> Tensor
@@ -2293,32 +2869,32 @@
- func: repeat_interleave.self_int(Tensor self, int repeats, int? dim=None) -> Tensor
use_c10_dispatcher: full
variants: function, method
-- func: reshape(Tensor self, int[] shape) -> Tensor
+- func: reshape(Tensor(a) self, int[] shape) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
device_guard: False
- func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
use_c10_dispatcher: full
device_guard: False
- requires_tensor: True
dispatch:
MkldnnCPU: mkldnn_reshape
-- func: reshape_as(Tensor self, Tensor other) -> Tensor
+- func: reshape_as(Tensor(a) self, Tensor other) -> Tensor(a)
use_c10_dispatcher: full
variants: method
device_guard: False
- func: round(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: round_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: round_out
@@ -2330,22 +2906,21 @@
- func: relu(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: relu
- CUDA: relu
+ CPU, CUDA: relu
MkldnnCPU: mkldnn_relu
- QuantizedCPU: quantized_relu
+ QuantizedCPU: relu_quantized_cpu
- func: relu_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: relu_
- CUDA: relu_
+ CPU, CUDA: relu_
MkldnnCPU: mkldnn_relu_
- QuantizedCPU: quantized_relu_
+ QuantizedCPU: relu_quantized_cpu_
- func: prelu(Tensor self, Tensor weight) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
@@ -2371,87 +2946,141 @@
python_module: nn
dispatch:
CPU: gelu_backward_cpu
CUDA: gelu_backward_cuda
+- func: infinitely_differentiable_gelu_backward(Tensor grad, Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ python_module: nn
+ device_guard: False
+
- func: hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: hardshrink
- func: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: hardshrink_backward
- func: rsqrt(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: rsqrt_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: rsqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: rsqrt_out
- CUDA: rsqrt_out
+ CPU, CUDA: rsqrt_out
- func: select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a)
variants: function, method
device_guard: False
- func: select.int(Tensor(a) self, int dim, int index) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
device_guard: False
+- func: select_backward(Tensor grad, int[] input_sizes, int dim, int index) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: selu(Tensor self) -> Tensor
use_c10_dispatcher: full
- func: selu_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
- func: celu(Tensor self, Scalar alpha=1.0) -> Tensor
use_c10_dispatcher: full
- func: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
+ use_c10_dispatcher: full
+- func: silu(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+
+- func: silu_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ python_module: nn
+
+- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ python_module: nn
+ dispatch:
+ CPU, CUDA: silu_out
+
+- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+
- func: sigmoid(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: sigmoid
- CUDA: sigmoid
- QuantizedCPU: quantized_sigmoid
+ CPU, CUDA: sigmoid
+ QuantizedCPU: sigmoid_quantized_cpu
MkldnnCPU: mkldnn_sigmoid
- func: sigmoid_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: sigmoid_
- CUDA: sigmoid_
+ CPU, CUDA: sigmoid_
MkldnnCPU: mkldnn_sigmoid_
- func: sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: sigmoid_out
+- func: logit(Tensor self, float? eps=None) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU, CUDA: logit
+
+- func: logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU, CUDA: logit_
+
+- func: logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: logit_out
+
- func: sin(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: sin_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: sin_out
- CUDA: sin_out
+ CPU, CUDA: sin_out
- func: sinh(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: sinh_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: sinh_out
# Returns a copy of this `Variable` that is detached from its autograd graph.
# This method is OK to call if the `Variable` is a view.
#
# NOTE: Previously, if we change the tensor metadata (e.g. sizes / strides /
@@ -2460,20 +3089,19 @@
# those metadata changes to the detached tensor will not update the original tensor
# anymore, and in the `detach()` function we need to set `allow_tensor_metadata_change_`
# to false to make such changes explicitly illegal, in order to prevent users from
# changing metadata of the detached tensor and expecting the original tensor to also
# be updated.
-- func: detach(Tensor self) -> Tensor
+- func: detach(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
- manual_kernel_registration: True
variants: function, method
# Like `detach()`, but modifies this `Variable` in-place. This method may
# only be called on non-view `Variable`s. You can use `is_view()` to check
# this. If this `Variable` is a view, throws an `std::runtime_error()`.
- func: detach_(Tensor(a!) self) -> Tensor(a!)
- manual_kernel_registration: True
+ use_c10_dispatcher: full
variants: function, method
- func: size.int(Tensor self, int dim) -> int
use_c10_dispatcher: full
variants: function, method
@@ -2486,20 +3114,26 @@
- func: slice.Tensor(Tensor(a) self, int dim=0, int start=0, int end=9223372036854775807, int step=1) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
device_guard: False
+- func: slice_backward(Tensor grad, int[] input_sizes, int dim, int start, int end, int step) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
use_c10_dispatcher: full
variants: function, method
- func: smm(Tensor self, Tensor mat2) -> Tensor
use_c10_dispatcher: full
variants: function, method
# softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
- func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
variants: function, method
@@ -2514,20 +3148,30 @@
use_c10_dispatcher: full
dispatch:
CPU: softmax_backward_cpu
CUDA: softmax_backward_cuda
+- func: unsafe_split.Tensor(Tensor self, int split_size, int dim=0) -> Tensor[]
+ use_c10_dispatcher: full
+ variants: function, method
+ device_guard: False
+
- func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[]
use_c10_dispatcher: full
variants: function, method
device_guard: False
-- func: split_with_sizes(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
+- func: unsafe_split_with_sizes(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
use_c10_dispatcher: full
variants: function, method
device_guard: False
+- func: split_with_sizes(Tensor(a) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
+ use_c10_dispatcher: full
+ variants: function, method
+ device_guard: False
+
- func: squeeze(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: function, method
device_guard: False
@@ -2539,14 +3183,16 @@
- func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
variants: function, method
device_guard: False
- func: squeeze_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!)
variants: method
@@ -2566,18 +3212,35 @@
- func: stack(Tensor[] tensors, int dim=0) -> Tensor
use_c10_dispatcher: full
- func: stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
+- func: hstack(Tensor[] tensors) -> Tensor
+ use_c10_dispatcher: full
+
+- func: hstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: vstack(Tensor[] tensors) -> Tensor
+ use_c10_dispatcher: full
+
+- func: vstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: dstack(Tensor[] tensors) -> Tensor
+ use_c10_dispatcher: full
+
+- func: dstack.out(Tensor[] tensors, *, Tensor(a!) out) -> Tensor(a!)
+
# The signature is designed to be consistent with librosa except that it is
# missing the `pad_mode` and `center` arguments, which are taken care of at
# `torch.functional.py`. They shall be moved here once we have mapping between
# Python strings and C++ Enum in codegen.
-- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool onesided=True) -> Tensor
+- func: stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
-- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool onesided=True, int? length=None) -> Tensor
+- func: istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: stride.int(Tensor self, int dim) -> int
use_c10_dispatcher: full
variants: function, method
@@ -2586,76 +3249,122 @@
- func: stride.Dimname(Tensor self, Dimname dim) -> int
variants: function, method
device_guard: False
- func: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: sum
- func: sum.dim_IntList(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: sum
- func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
variants: function, method
- func: sum.IntList_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: sum_out
- func: sum.DimnameList_out(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+- func: nansum(Tensor self, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU, CUDA: nansum
+
+- func: nansum.dim_IntList(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU, CUDA: nansum
+
+- func: nansum.IntList_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: nansum_out
+
- func: sum_to_size(Tensor self, int[] size) -> Tensor
use_c10_dispatcher: full
variants: method
device_guard: False
- func: sqrt(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: sqrt_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: sqrt_out
- func: square(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: square_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: std(Tensor self, bool unbiased=True) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: std
- func: std.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: std
- func: std_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: std_mean
- func: std_mean.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: std_mean
- func: std_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
variants: function
- func: std.out(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: std_out
- func: std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
variants: function, method
- func: std.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
- func: prod(Tensor self, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: prod
- func: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: prod
- func: prod.int_out(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: prod_out
- func: prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
variants: function, method
- func: prod.Dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
@@ -2664,34 +3373,40 @@
use_c10_dispatcher: full
device_guard: False
variants: function, method
- func: t_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
device_guard: False
variants: method
- func: tan(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: tan_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: tan_out
- func: tanh(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: tanh
- CUDA: tanh
- QuantizedCPU: quantized_tanh
+ CPU, CUDA: tanh
+ QuantizedCPU: tanh_quantized_cpu
- func: tanh_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function, method
- func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: tanh_out
- func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
use_c10_dispatcher: full
variants: function
@@ -2700,13 +3415,14 @@
use_c10_dispatcher: full
variants: function
dispatch:
CPU: threshold
CUDA: threshold_cuda
- QuantizedCPU: quantized_threshold
+ QuantizedCPU: threshold_quantized_cpu
- func: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function
dispatch:
CPU: threshold_
CUDA: threshold__cuda
@@ -2732,21 +3448,21 @@
device_guard: False
- func: _mkldnn_transpose(Tensor self, int dim0, int dim1) -> Tensor
use_c10_dispatcher: full
device_guard: False
- requires_tensor: True
dispatch:
MkldnnCPU: mkldnn_transpose
- func: transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: _mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
+ use_c10_dispatcher: full
device_guard: False
- requires_tensor: True
dispatch:
MkldnnCPU: mkldnn_transpose_
- func: one_hot(Tensor self, int num_classes=-1) -> Tensor
use_c10_dispatcher: full
@@ -2773,11 +3489,11 @@
variants: function, method
dispatch:
CPU: roll_cpu
CUDA: roll_cuda
-# default int[] value [0,1] should not add space after comma, since native_parse.py uses ', ' to split args
+# default int[] value [0,1] should not add space after comma, since codegen parser uses ', ' to split args
- func: rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor
use_c10_dispatcher: full
variants: function, method
@@ -2791,53 +3507,33 @@
use_c10_dispatcher: full
- func: triplet_margin_loss(Tensor anchor, Tensor positive, Tensor negative, float margin=1.0, float p=2, float eps=1e-06, bool swap=False, int reduction=Mean) -> Tensor
use_c10_dispatcher: full
-- func: true_divide.Tensor(Tensor self, Tensor other) -> Tensor
+- func: trunc(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- dispatch:
- CPU: true_divide
- CUDA: true_divide
- SparseCPU: true_divide_sparse
- SparseCUDA: true_divide_sparse
-- func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
- variants: method
- dispatch:
- CPU: true_divide_
- CUDA: true_divide_
- SparseCPU: true_divide_sparse_
- SparseCUDA: true_divide_sparse_
+- func: trunc_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
-- func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+- func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: true_divide_out
- CUDA: true_divide_out
- SparseCPU: true_divide_out_sparse_zerodim
- SparseCUDA: true_divide_out_sparse_zerodim
+ CPU, CUDA: trunc_out
-- func: true_divide.Scalar(Tensor self, Scalar other) -> Tensor
+# Alias for trunc
+- func: fix(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
-- func: true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
- variants: method
-
-- func: trunc(Tensor self) -> Tensor
+- func: fix_(Tensor(a!) self) -> Tensor(a!)
use_c10_dispatcher: full
variants: function, method
-- func: trunc_(Tensor(a!) self) -> Tensor(a!)
- variants: function, method
+- func: fix.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
-- func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- dispatch:
- CPU: trunc_out
- CUDA: trunc_out
-
- func: type_as(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method
- func: _has_compatible_shallow_copy_type(Tensor self, Tensor from) -> bool
@@ -2890,43 +3586,54 @@
use_c10_dispatcher: full
variants: function, method
device_guard: False
- func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: vander(Tensor x, int? N=None, bool increasing=False) -> Tensor
use_c10_dispatcher: full
- func: var(Tensor self, bool unbiased=True) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: var
- func: var.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
+ dispatch:
+ CPU, CUDA: var
- func: var.out(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: var_out
- func: var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor
variants: function, method
- func: var.names_out(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
- func: var_mean(Tensor self, bool unbiased=True) -> (Tensor, Tensor)
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: var_mean
- func: var_mean.dim(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: var_mean
- func: var_mean.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> (Tensor, Tensor)
variants: function
-- func: view_as(Tensor self, Tensor other) -> Tensor
+- func: view_as(Tensor(a) self, Tensor other) -> Tensor(a)
use_c10_dispatcher: full
variants: method
device_guard: False
# we define both of these because 'where' does the broadcast and '_s_where' doesn't;
@@ -2934,17 +3641,31 @@
# _s_where derivative.
- func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function, method
+- func: where.ScalarSelf(Tensor condition, Scalar self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: where.Scalar(Tensor condition, Scalar self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
- func: where(Tensor condition) -> Tensor[]
use_c10_dispatcher: full
variants: function
- func: _s_where(Tensor condition, Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: _s_where
- func: norm_except_dim(Tensor v, int pow=2, int dim=0) -> Tensor
use_c10_dispatcher: full
variants: function
@@ -2972,14 +3693,16 @@
- func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_guard: False
- func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!)
- func: zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
- func: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor
use_c10_dispatcher: full
variants: function
dispatch:
@@ -3018,77 +3741,97 @@
# complicated
- func: native_norm(Tensor self, Scalar p=2) -> Tensor
use_c10_dispatcher: full
dispatch:
- SparseCPU: norm_sparse
- SparseCUDA: norm_sparse
+ SparseCPU, SparseCUDA: norm_sparse
+- func: native_norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, ScalarType? dtype) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
+ SparseCPU, SparseCUDA: norm_sparse
+
# TODO: reduce signatures down to one when optional args is available
- func: _sparse_sum(Tensor self) -> Tensor
use_c10_dispatcher: full
- func: _sparse_sum.dtype(Tensor self, *, ScalarType dtype) -> Tensor
+ use_c10_dispatcher: full
- func: _sparse_sum.dim(Tensor self, int[1] dim) -> Tensor
use_c10_dispatcher: full
- func: _sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor
+ use_c10_dispatcher: full
- func: _sparse_sum_backward(Tensor grad, Tensor self, int[] dim) -> Tensor
use_c10_dispatcher: full
dispatch:
- SparseCPU: _sparse_sum_backward_cpu
- SparseCUDA: _sparse_sum_backward_cuda
+ SparseCPU: _sparse_sum_backward_cpu
+ SparseCUDA: _sparse_sum_backward_cuda
- func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function
- func: _sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
variants: function
- func: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
use_c10_dispatcher: full
dispatch:
SparseCPU: softmax_sparse_cpu
+ SparseCUDA: softmax_sparse_cuda
- func: _sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
+ use_c10_dispatcher: full
dispatch:
SparseCPU: softmax_backward_sparse_cpu
+ SparseCUDA: softmax_backward_sparse_cuda
- func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ use_c10_dispatcher: full
variants: function
- func: _sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
variants: function
- func: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
use_c10_dispatcher: full
dispatch:
SparseCPU: log_softmax_sparse_cpu
+ SparseCUDA: log_softmax_sparse_cuda
- func: _sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor
+ use_c10_dispatcher: full
dispatch:
SparseCPU: log_softmax_backward_sparse_cpu
+ SparseCUDA: log_softmax_backward_sparse_cuda
- func: norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: norm.Scalar(Tensor self, Scalar p=2) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
- func: norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: norm.dtype_out(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: norm_out
- func: norm.out(Tensor self, Scalar? p, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: norm_out
- func: norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
variants: function, method
- func: norm.names_ScalarOpt_dim(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False) -> Tensor
@@ -3122,85 +3865,95 @@
- func: nuclear_norm.dim_out(Tensor self, int[2] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
variants: function
- func: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: clone
- CUDA: clone
- SparseCPU: clone_sparse
- SparseCUDA: clone_sparse
+ CPU, CUDA: clone
+ SparseCPU, SparseCUDA: clone_sparse
MkldnnCPU: mkldnn_clone
- QuantizedCPU: quantized_clone
- QuantizedCUDA: quantized_clone
+ QuantizedCPU, QuantizedCUDA: quantized_clone
- func: resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!)
- manual_kernel_registration: True
- variants: function, method
-
-- func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
- dispatch:
- CPU: pow_out
- CUDA: pow_out
- SparseCPU: pow_out_sparse_scalar
- SparseCUDA: pow_out_sparse_scalar
-
-- func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
use_c10_dispatcher: full
variants: function, method
- dispatch:
- CPU: pow
- CUDA: pow
- SparseCPU: pow_sparse_scalar
- SparseCUDA: pow_sparse_scalar
- func: zero_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: zero_
- CUDA: zero_
- SparseCPU: zero_sparse_
- SparseCUDA: zero_sparse_
+ CPU, CUDA: zero_
+ SparseCPU, SparseCUDA: zero_sparse_
MkldnnCPU: mkldnn_zero_
- func: sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: sub_out
- CUDA: sub_out
- SparseCPU: sub_out_sparse
- SparseCUDA: sub_out_sparse
+ CPU, CUDA: sub_out
+ SparseCPU, SparseCUDA: sub_out_sparse
- func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- CPU: sub
- CUDA: sub
- SparseCPU: sub_sparse
- SparseCUDA: sub_sparse
+ CPU, CUDA: sub
+ SparseCPU, SparseCUDA: sub_sparse
- func: sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: sub_
- CUDA: sub_
- SparseCPU: sub_sparse_
- SparseCUDA: sub_sparse_
+ CPU, CUDA: sub_
+ SparseCPU, SparseCUDA: sub_sparse_
# For C++ only, until we have conversion from C++ numbers to Tensor
- func: sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
+# subtract, alias for sub
+- func: subtract.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
+
+- func: subtract.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: subtract_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+# For C++ only, until we have conversion from C++ numbers to Tensor
+- func: subtract.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: subtract_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: rsub
+- func: heaviside.out(Tensor self, Tensor values, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: heaviside_out
+
+- func: heaviside(Tensor self, Tensor values) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: heaviside_(Tensor(a!) self, Tensor values) -> Tensor(a!)
+ variants: method
+
# For C++ only, until we have conversion from C++ numbers to Tensor
- func: rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
variants: function
@@ -3222,16 +3975,16 @@
dispatch:
CPU: addmm_cpu
CUDA: addmm_cuda
SparseCPU: addmm_sparse_dense_cpu
SparseCUDA: addmm_sparse_dense_cuda
- Vulkan: vulkan_addmm
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: legacy::cpu::_th_addmm_
+ CPU: addmm_cpu_
CUDA: addmm__cuda
# Warning! For whatever reason, the inplace sparse addmm is NON
# broadcasting
SparseCPU: s_addmm_sparse_dense_cpu_
SparseCUDA: s_addmm_sparse_dense_cuda_
@@ -3253,17 +4006,11 @@
# **must not** have specific type dispatches because otherwise codegen will
# consider them as abstract methods (see Note [Abstract ATen methods]), dispatch
# using **Tensor** type, and thus lose autograd tracking on the actual method
# they dispatch to, e.g., `sparse_coo_tensor_with_dims_and_tensors`.
#
-# The actual ctors `sparse_coo_tensor_with_dims` and `sparse_coo_tensor_with_dims_and_tensors`,
-# on the other hand, need to create `SparseTensorImpl` and know nothing about
-# how `VariableType`s work. So they need to be dispatched using Tensor types.
-# We thus put `requires_tensor=True` to ensure that `VariableType` will unwrap
-# the given variables and call with the Tensor type.
#
-#
# Sparse Methods API Design
# ~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Goals: 1. Flexible API for users to write custom sparse ops
# 2. ctor and member accessor with autograd support
@@ -3351,194 +4098,169 @@
# shared. In other words, their outputs are non-differentiable views of the
# sparse tensor.
# FIXME: would be nicer if TensorOptions was optional based; not adding default arguments for options given
# the default would never make sense.
-- func: sparse_coo_tensor.size(int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor
+- func: sparse_coo_tensor.size(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
+ use_c10_dispatcher: full
- func: sparse_coo_tensor.indices(Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: sparse_coo_tensor.indices_size(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
- func: _sparse_coo_tensor_unsafe(Tensor indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
-- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor
+- func: _validate_sparse_coo_tensor_args(Tensor indices, Tensor values, int[] size) -> ()
+ use_c10_dispatcher: full
+
+- func: _sparse_coo_tensor_with_dims(int sparse_dim, int dense_dim, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
+ use_c10_dispatcher: full
dispatch:
- SparseCPU: new_with_dims_sparse
- SparseCUDA: new_with_dims_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: new_with_dims_sparse
-- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, int[] size, Tensor indices, Tensor values, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False) -> Tensor
+- func: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, int[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
+ use_c10_dispatcher: full
dispatch:
- SparseCPU: new_with_dims_and_tensor_sparse
- SparseCUDA: new_with_dims_and_tensor_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: new_with_dims_and_tensor_sparse
- func: sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: sparse_resize_
- SparseCUDA: sparse_resize_
- requires_tensor: True
+ SparseCPU, SparseCUDA: sparse_resize_
- func: sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: sparse_resize_and_clear_
- SparseCUDA: sparse_resize_and_clear_
- requires_tensor: True
+ SparseCPU, SparseCUDA: sparse_resize_and_clear_
- func: sparse_mask(Tensor self, Tensor mask) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
SparseCPU: sparse_mask_cpu
SparseCUDA: sparse_mask_cuda
- requires_tensor: True
- func: to_dense(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: sparse_to_dense
- SparseCUDA: sparse_to_dense
+ SparseCPU, SparseCUDA: sparse_to_dense
MkldnnCPU: mkldnn_to_dense
- requires_tensor: True
- func: to_dense_backward(Tensor grad, Tensor input) -> Tensor
use_c10_dispatcher: full
- func: sparse_dim(Tensor self) -> int
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: sparse_dim_sparse
- SparseCUDA: sparse_dim_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: sparse_dim_sparse
device_guard: False
# legacy method
- func: _dimI(Tensor self) -> int
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: sparse_dim_sparse
- SparseCUDA: sparse_dim_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: sparse_dim_sparse
device_guard: False
- func: dense_dim(Tensor self) -> int
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: dense_dim_sparse
- SparseCUDA: dense_dim_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: dense_dim_sparse
device_guard: False
# legacy method
- func: _dimV(Tensor self) -> int
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: dense_dim_sparse
- SparseCUDA: dense_dim_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: dense_dim_sparse
device_guard: False
- func: _nnz(Tensor self) -> int
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: _nnz_sparse
- SparseCUDA: _nnz_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: _nnz_sparse
device_guard: False
- func: coalesce(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
SparseCPU: coalesce_sparse_cpu
SparseCUDA: coalesce_sparse_cuda
- requires_tensor: True
- func: is_coalesced(Tensor self) -> bool
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: is_coalesced_sparse
- SparseCUDA: is_coalesced_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: is_coalesced_sparse
device_guard: False
- func: _indices(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: _indices_sparse
- SparseCUDA: _indices_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: _indices_sparse
device_guard: False
- func: _values(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: _values_sparse
- SparseCUDA: _values_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: _values_sparse
device_guard: False
# This method doesn't do any check but only directly sets the flag. So it can be
# a bit unsafe. Similar to _indices and _values, this is useful for implementing
# custom sparse operations in Python/C++ extension.
- func: _coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: _coalesced_sparse_
- SparseCUDA: _coalesced_sparse_
- requires_tensor: True
+ SparseCPU, SparseCUDA: _coalesced_sparse_
device_guard: False
- func: indices(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: indices_sparse
- SparseCUDA: indices_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: indices_sparse
device_guard: False
- func: values(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: method
dispatch:
- SparseCPU: values_sparse
- SparseCUDA: values_sparse
- requires_tensor: True
+ SparseCPU, SparseCUDA: values_sparse
device_guard: False
- func: hspmm.out(Tensor mat1, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
SparseCPU: hspmm_out_sparse_cpu
SparseCUDA: hspmm_out_sparse_cuda
- requires_tensor: True
- func: hspmm(Tensor mat1, Tensor mat2) -> Tensor
use_c10_dispatcher: full
dispatch:
SparseCPU: hspmm_sparse_cpu
SparseCUDA: hspmm_sparse_cuda
- requires_tensor: True
- func: copy_sparse_to_sparse_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: function
dispatch:
- SparseCPU: copy_sparse_
- SparseCUDA: copy_sparse_
- requires_tensor: True
+ SparseCPU, SparseCUDA: copy_sparse_
- func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]
use_c10_dispatcher: full
variants: function, method
@@ -3547,19 +4269,17 @@
- func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
- CPU: dense_to_sparse
- CUDA: dense_to_sparse
+ CPU, CUDA: dense_to_sparse
- func: to_sparse(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
- CPU: dense_to_sparse
- CUDA: dense_to_sparse
+ CPU, CUDA: dense_to_sparse
- func: to_mkldnn(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method
dispatch:
@@ -3570,80 +4290,86 @@
variants: function
python_module: nn
dispatch:
MkldnnCPU: mkldnn_reorder_conv2d_weight
+- func: mkldnn_reorder_conv3d_weight(Tensor self, int[3] padding=0, int[3] stride=1, int[3] dilation=1, int groups=1) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ python_module: nn
+ dispatch:
+ MkldnnCPU: mkldnn_reorder_conv3d_weight
+
- func: to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor
use_c10_dispatcher: full
- func: quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor
+ use_c10_dispatcher: full
variants: function
dispatch:
- CPU: quantize_per_tensor
- CUDA: quantize_per_tensor
+ CPU, CUDA: quantize_per_tensor
- func: quantize_per_tensor.tensors(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype) -> Tensor[]
+ use_c10_dispatcher: full
variants: function
dispatch:
CPU: quantize_per_tensor_list_cpu
- func: quantize_per_channel(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype) -> Tensor
+ use_c10_dispatcher: full
variants: function
dispatch:
CPU: quantize_per_channel_cpu
- func: dequantize.self(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: dequantize_quant
- QuantizedCUDA: dequantize_quant
+ QuantizedCPU, QuantizedCUDA: dequantize_quant
- func: dequantize.tensors(Tensor[] tensors) -> Tensor[]
use_c10_dispatcher: full
variants: function
dispatch:
- QuantizedCPU: dequantize_tensors_quant
+ QuantizedCPU: dequantize_tensors_quantized_cpu
- func: q_scale(Tensor self) -> float
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: q_scale_quant
- QuantizedCUDA: q_scale_quant
+ QuantizedCPU, QuantizedCUDA: q_scale_quant
- func: q_zero_point(Tensor self) -> int
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: q_zero_point_quant
- QuantizedCUDA: q_zero_point_quant
+ QuantizedCPU, QuantizedCUDA: q_zero_point_quant
- func: q_per_channel_scales(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: q_per_channel_scales_quant
+ QuantizedCPU, QuantizedCUDA: q_per_channel_scales
- func: q_per_channel_zero_points(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: q_per_channel_zero_points_quant
+ QuantizedCPU, QuantizedCUDA: q_per_channel_zero_points
- func: q_per_channel_axis(Tensor self) -> int
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: q_per_channel_axis_quant
+ QuantizedCPU, QuantizedCUDA: q_per_channel_axis
- func: int_repr(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
dispatch:
- QuantizedCPU: int_repr_quant_cpu
- QuantizedCUDA: int_repr_quant_cuda
+ QuantizedCPU: int_repr_quantized_cpu
+ QuantizedCUDA: int_repr_quantized_cuda
- func: _make_per_tensor_quantized_tensor(Tensor self, float scale, int zero_point) -> Tensor
use_c10_dispatcher: full
dispatch:
CPU: make_per_tensor_quantized_tensor_cpu
@@ -3656,49 +4382,84 @@
- func: qscheme(Tensor self) -> QScheme
use_c10_dispatcher: full
variants: method
dispatch:
- QuantizedCPU: qscheme_quant
- QuantizedCUDA: qscheme_quant
+ QuantizedCPU, QuantizedCUDA: qscheme_quant
- func: fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> Tensor
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: fake_quantize_per_tensor_affine
- func: fake_quantize_per_tensor_affine_backward(Tensor grad, Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> Tensor
use_c10_dispatcher: full
variants: function
+- func: _fake_quantize_learnable_per_tensor_affine(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ dispatch:
+ CPU, CUDA: _fake_quantize_learnable_per_tensor_affine
+
+- func: _fake_quantize_learnable_per_tensor_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
+ variants: function
+
- func: fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor
use_c10_dispatcher: full
variants: function
+ dispatch:
+ CPU, CUDA: fake_quantize_per_channel_affine
- func: fake_quantize_per_channel_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor
use_c10_dispatcher: full
variants: function
+- func: _fake_quantize_learnable_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ dispatch:
+ CPU, CUDA: _fake_quantize_learnable_per_channel_affine
+
+- func: _fake_quantize_learnable_per_channel_affine_backward(Tensor grad, Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
+ variants: function
+
- func: _choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int)
use_c10_dispatcher: full
variants: function
+- func: _saturate_weight_to_fp16(Tensor weight) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+- func: choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (float, float)
+ use_c10_dispatcher: full
+ variants: function
+
# to(Device) must not exist because all constructors of Device also works for
# TensorOptions. Otherwise, an ambiguity error is thrown.
# See NOTE [ TensorOptions Constructors ].
-- func: to.dtype_layout(Tensor self, *, ScalarType dtype, Layout layout, Device device, bool pin_memory=False, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
+- func: to.dtype_layout(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: to.device(Tensor self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: to.dtype(Tensor self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: to.other(Tensor self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor
+ use_c10_dispatcher: full
variants: method
device_guard: False
- func: meshgrid(Tensor[] tensors) -> Tensor[]
use_c10_dispatcher: full
@@ -3714,24 +4475,30 @@
- func: item(Tensor self) -> Scalar
use_c10_dispatcher: full
variants: method
- func: result_type.Tensor(Tensor tensor, Tensor other) -> ScalarType
+ use_c10_dispatcher: full
variants: function
- func: result_type.Scalar(Tensor tensor, Scalar other) -> ScalarType
+ use_c10_dispatcher: full
variants: function
- func: result_type.Scalar_Tensor(Scalar scalar, Tensor tensor) -> ScalarType
+ use_c10_dispatcher: full
variants: function
- func: result_type.Scalar_Scalar(Scalar scalar1, Scalar scalar2) -> ScalarType
+ use_c10_dispatcher: full
- func: can_cast(ScalarType from, ScalarType to) -> bool
+ use_c10_dispatcher: full
variants: function
- func: promote_types(ScalarType type1, ScalarType type2) -> ScalarType
+ use_c10_dispatcher: full
variants: function
# NB: Does NOT check precondition that numel == 1
- func: _local_scalar_dense(Tensor self) -> Scalar
use_c10_dispatcher: full
@@ -3740,29 +4507,34 @@
CUDA: _local_scalar_dense_cuda
variants: function
# Fused RNN kernels
- func: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: _thnn_fused_lstm_cell_cuda
- func: _thnn_fused_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor cx, Tensor cy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: _thnn_fused_lstm_cell_backward_cuda
- func: _thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
- func: _thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
dispatch:
CUDA: _thnn_fused_gru_cell_cuda
- func: _thnn_fused_gru_cell_backward(Tensor grad_hy, Tensor workspace, bool has_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
use_c10_dispatcher: full
dispatch:
CUDA: _thnn_fused_gru_cell_backward_cuda
- func: _thnn_differentiable_gru_cell_backward(Tensor grad_hy, Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias, Tensor? hidden_bias) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+ use_c10_dispatcher: full
# RNN cells and layers
- func: lstm.input(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor)
use_c10_dispatcher: full
@@ -3786,23 +4558,29 @@
- func: rnn_relu.data(Tensor data, Tensor batch_sizes, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional) -> (Tensor, Tensor)
use_c10_dispatcher: full
- func: lstm_cell(Tensor input, Tensor[] hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> (Tensor, Tensor)
+ use_c10_dispatcher: full
- func: gru_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor
+ use_c10_dispatcher: full
- func: rnn_tanh_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor
+ use_c10_dispatcher: full
- func: rnn_relu_cell(Tensor input, Tensor hx, Tensor w_ih, Tensor w_hh, Tensor? b_ih=None, Tensor? b_hh=None) -> Tensor
+ use_c10_dispatcher: full
# Quantized RNN layer registration has been moved to C10 dispatch in `RNN.cpp`
# Quantized RNN layers
# - func: quantized_lstm(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first, *, ScalarType? dtype=None, bool use_dynamic=False) -> (Tensor, Tensor, Tensor)
+# use_c10_dispatcher: full
# - func: quantized_lstm.data(Tensor data, Tensor batch_sizes, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, *, ScalarType? dtype=None, bool use_dynamic=False) -> (Tensor, Tensor, Tensor)
+# use_c10_dispatcher: full
# Quantized GRU layers
# - func: quantized_gru.input(Tensor input, Tensor hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor)
# use_c10_dispatcher: full
@@ -3837,70 +4615,70 @@
- func: set_.source_Storage(Tensor(a!) self, Storage source) -> Tensor(a!)
variants: method
device_guard: False
dispatch:
- CPU: set_
- CUDA: set_
+ CPU, CUDA: set_
- func: set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
variants: method
device_guard: False
dispatch:
CPU: set_storage_cpu_
CUDA: set_storage_cuda_
- QuantizedCPU: set_storage_quantized_
- QuantizedCUDA: set_storage_quantized_
+ QuantizedCPU, QuantizedCUDA: set_storage_quantized_
- func: set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
device_guard: False
dispatch:
- CPU: set_tensor_
- CUDA: set_tensor_
+ CPU, CUDA: set_tensor_
- func: set_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: set_cpu_
CUDA: set_cuda_
- func: set_quantizer_(Tensor(a!) self, ConstQuantizerPtr quantizer) -> Tensor(a!)
variants: method
dispatch:
- QuantizedCPU: set_quantizer_
- QuantizedCUDA: set_quantizer_
+ QuantizedCPU, QuantizedCUDA: set_quantizer_
- func: is_set_to(Tensor self, Tensor tensor) -> bool
use_c10_dispatcher: full
variants: method
device_guard: False
dispatch:
- CPU: is_set_to
- CUDA: is_set_to
+ CPU, CUDA: is_set_to
- func: masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: masked_fill__cpu
CUDA: masked_fill__cuda
- func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: masked_fill__cpu
CUDA: masked_fill__cuda
- func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: masked_scatter__cpu
CUDA: masked_scatter__cuda
@@ -3911,23 +4689,22 @@
- func: view(Tensor(a) self, int[] size) -> Tensor(a)
use_c10_dispatcher: full
variants: method
device_guard: False
dispatch:
- CPU: view
- CUDA: view
+ CPU, CUDA, QuantizedCPU, QuantizedCUDA: view
MkldnnCPU: mkldnn_view
- QuantizedCPU: view
- QuantizedCUDA: view
- func: put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: legacy::cpu::_th_put_
CUDA: legacy::cuda::_th_put_
- func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: index_add_cpu_
CUDA: index_add_cuda_
@@ -3937,24 +4714,25 @@
- func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor
variants: function, method
- func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: legacy::cpu::_th_index_fill_
CUDA: legacy::cuda::_th_index_fill_
- func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: index_fill_
- CUDA: index_fill_
+ CPU, CUDA: index_fill_
- func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
use_c10_dispatcher: full
variants: function, method
@@ -3969,24 +4747,24 @@
- func: index_fill.Dimname_Tensor(Tensor self, Dimname dim, Tensor index, Tensor value) -> Tensor
variants: function, method
- func: scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: scatter_
- CUDA: scatter_
+ CPU, CUDA: scatter_
- func: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: scatter_fill_
- CUDA: scatter_fill_
+ CPU, CUDA: scatter_fill_
- func: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
use_c10_dispatcher: full
variants: function, method
@@ -3994,83 +4772,67 @@
variants: function, method
- func: scatter.dimname_value(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor
variants: function, method
+- func: scatter_.reduce(Tensor(a!) self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+ dispatch:
+ CPU, CUDA: scatter_reduce_
+
+- func: scatter_.value_reduce(Tensor(a!) self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+ dispatch:
+ CPU, CUDA: scatter_scalar_reduce_
+
- func: scatter_add_(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: scatter_add_
- CUDA: scatter_add_
+ CPU, CUDA: scatter_add_
- func: scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor
variants: function, method
-- func: lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
- variants: method
-
-- func: lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
- variants: method
-
-- func: gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
- variants: method
-
-- func: gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
- variants: method
-
-- func: le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
- variants: method
-
-- func: le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
- variants: method
-
-- func: ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
- variants: method
-
-- func: ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
- variants: method
-
- func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
-- func: ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
- variants: method
-
-- func: ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
- variants: method
-
- func: bitwise_and.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
- CPU: bitwise_and_out
- CUDA: bitwise_and_out
+ CPU, CUDA: bitwise_and_out
- func: bitwise_and.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
- CPU: bitwise_and_out
- CUDA: bitwise_and_out
+ CPU, CUDA: bitwise_and_out
- func: bitwise_and.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __and__.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
@@ -4078,39 +4840,41 @@
- func: __and__.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: __iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: bitwise_or.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
- CPU: bitwise_or_out
- CUDA: bitwise_or_out
+ CPU, CUDA: bitwise_or_out
- func: bitwise_or.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
- CPU: bitwise_or_out
- CUDA: bitwise_or_out
+ CPU, CUDA: bitwise_or_out
- func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __or__.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
@@ -4118,39 +4882,41 @@
- func: __or__.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: __ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __ior__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: bitwise_xor.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
- CPU: bitwise_xor_out
- CUDA: bitwise_xor_out
+ CPU, CUDA: bitwise_xor_out
- func: bitwise_xor.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
- CPU: bitwise_xor_out
- CUDA: bitwise_xor_out
+ CPU, CUDA: bitwise_xor_out
- func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __xor__.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
@@ -4158,152 +4924,166 @@
- func: __xor__.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: __ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __ixor__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: __lshift__.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: __lshift__
- CUDA: __lshift__
+ CPU, CUDA: __lshift__
- func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: __lshift__
- CUDA: __lshift__
+ CPU, CUDA: __lshift__
- func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: __ilshift__
- CUDA: __ilshift__
+ CPU, CUDA: __ilshift__
- func: __ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: __ilshift__
- CUDA: __ilshift__
+ CPU, CUDA: __ilshift__
- func: __rshift__.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: __rshift__
- CUDA: __rshift__
+ CPU, CUDA: __rshift__
- func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: __rshift__
- CUDA: __rshift__
+ CPU, CUDA: __rshift__
- func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: __irshift__
- CUDA: __irshift__
+ CPU, CUDA: __irshift__
- func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: __irshift__
- CUDA: __irshift__
+ CPU, CUDA: __irshift__
- func: lgamma_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: _lgamma__cpu
CUDA: _lgamma__cuda
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
+ dispatch:
+ CPU, CUDA: atan2_
- func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: tril_cpu_
CUDA: tril_cuda_
- func: triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: triu_cpu_
CUDA: triu_cuda_
- func: digamma_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
+ dispatch:
+ CPU, CUDA: digamma_
- func: polygamma_(Tensor(a!) self, int n) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: renorm_(Tensor(a!) self, Scalar p, int dim, Scalar maxnorm) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: legacy::cpu::_th_renorm_
CUDA: legacy::cuda::_th_renorm_
- func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: pow_
- CUDA: pow_
+ CPU, CUDA: pow_
- func: pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: pow_
- CUDA: pow_
+ CPU, CUDA: pow_
- func: lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: lerp_cpu_scalar_
CUDA: lerp_cuda_scalar_
- func: lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: lerp_cpu_tensor_
CUDA: lerp_cuda_tensor_
- func: fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: fmod_
CUDA: fmod_cuda_
- func: fmod_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: fmod_
CUDA: fmod_cuda_
- func: remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: remainder_
- CUDA: remainder_
+ CPU, CUDA: remainder_
- func: remainder_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: remainder_
- CUDA: remainder_
+ CPU, CUDA: remainder_
- func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
- CPU: legacy::cpu::_th_addbmm_
+ CPU: addbmm_cpu_
CUDA: addbmm__cuda
- func: addbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: addbmm_cpu_out
@@ -4315,35 +5095,52 @@
dispatch:
CPU: addbmm_cpu
CUDA: addbmm_cuda
- func: addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: random_
- func: random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: random_
- func: random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: random_
- func: uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: uniform_
- func: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: cauchy_
- func: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: log_normal_
- func: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: exponential_
- func: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: geometric_
# wrappers for TH functions
- func: diag.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
@@ -4352,15 +5149,24 @@
- func: diag(Tensor self, int diagonal=0) -> Tensor
use_c10_dispatcher: full
variants: method, function
+- func: diag_backward(Tensor grad, int[] input_sizes, int diagonal) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: cross_out
- func: cross(Tensor self, Tensor other, int? dim=None) -> Tensor
use_c10_dispatcher: full
variants: method, function
+ dispatch:
+ CPU, CUDA: cross
- func: triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: triu_cpu_out
CUDA: triu_cuda_out
@@ -4377,15 +5183,17 @@
- func: tril(Tensor self, int diagonal=0) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CPU: tril_indices_cpu
CUDA: tril_indices_cuda
- func: triu_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ use_c10_dispatcher: full
dispatch:
CPU: triu_indices_cpu
CUDA: triu_indices_cuda
- func: trace(Tensor self) -> Tensor
@@ -4393,178 +5201,304 @@
variants: method, function
dispatch:
CPU: legacy::cpu::_th_trace
CUDA: trace_cuda
+- func: trace_backward(Tensor grad, int[] sizes) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: ne.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: ne_out
- CUDA: ne_out
+ CPU, CUDA: ne_out
QuantizedCPU: ne_out_quantized_cpu
- func: ne.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: ne
- CUDA: ne
+ CPU, CUDA: ne
QuantizedCPU: ne_quantized_cpu
- func: ne.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: ne_out
- CUDA: ne_out
+ CPU, CUDA: ne_out
QuantizedCPU: ne_out_quantized_cpu
- func: ne.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: ne
- CUDA: ne
+ CPU, CUDA: ne
QuantizedCPU: ne_quantized_cpu
+- func: ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+# not_equal, alias for torch.ne
+- func: not_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: not_equal.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: not_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: not_equal.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: not_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: not_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: eq_out
- CUDA: eq_out
+ CPU, CUDA: eq_out
QuantizedCPU: eq_out_quantized_cpu
- func: eq.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: eq
- CUDA: eq
+ CPU, CUDA: eq
QuantizedCPU: eq_quantized_cpu
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: eq_out
- CUDA: eq_out
+ CPU, CUDA: eq_out
QuantizedCPU: eq_out_quantized_cpu
- func: eq.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: eq
- CUDA: eq
+ CPU, CUDA: eq
QuantizedCPU: eq_quantized_cpu
- func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: ge_out
- CUDA: ge_out
+ CPU, CUDA: ge_out
QuantizedCPU: ge_out_quantized_cpu
- func: ge.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: ge
- CUDA: ge
+ CPU, CUDA: ge
QuantizedCPU: ge_quantized_cpu
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: ge_out
- CUDA: ge_out
+ CPU, CUDA: ge_out
QuantizedCPU: ge_out_quantized_cpu
- func: ge.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: ge
- CUDA: ge
+ CPU, CUDA: ge
QuantizedCPU: ge_quantized_cpu
+- func: ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+# greater_equal, alias for torch.ge
+- func: greater_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: greater_equal.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: greater_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: greater_equal.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: greater_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: greater_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: le.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: le_out
- CUDA: le_out
+ CPU, CUDA: le_out
QuantizedCPU: le_out_quantized_cpu
- func: le.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: le
- CUDA: le
+ CPU, CUDA: le
QuantizedCPU: le_quantized_cpu
- func: le.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: le_out
- CUDA: le_out
+ CPU, CUDA: le_out
QuantizedCPU: le_out_quantized_cpu
- func: le.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: le
- CUDA: le
+ CPU, CUDA: le
QuantizedCPU: le_quantized_cpu
+- func: le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+# less_equal, alias for torch.le
+- func: less_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: less_equal.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: less_equal.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: less_equal.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: less_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: less_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: gt_out
- CUDA: gt_out
+ CPU, CUDA: gt_out
QuantizedCPU: gt_out_quantized_cpu
- func: gt.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: gt
- CUDA: gt
+ CPU, CUDA: gt
QuantizedCPU: gt_quantized_cpu
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: gt_out
- CUDA: gt_out
+ CPU, CUDA: gt_out
QuantizedCPU: gt_out_quantized_cpu
- func: gt.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: gt
- CUDA: gt
+ CPU, CUDA: gt
QuantizedCPU: gt_quantized_cpu
+- func: gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+# greater, alias for torch.gt
+- func: greater.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: greater.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: greater.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: greater.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: greater_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: greater_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: lt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: lt_out
- CUDA: lt_out
+ CPU, CUDA: lt_out
QuantizedCPU: lt_out_quantized_cpu
- func: lt.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: lt
- CUDA: lt
+ CPU, CUDA: lt
QuantizedCPU: lt_quantized_cpu
- func: lt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: lt_out
- CUDA: lt_out
+ CPU, CUDA: lt_out
QuantizedCPU: lt_out_quantized_cpu
- func: lt.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: lt
- CUDA: lt
+ CPU, CUDA: lt
QuantizedCPU: lt_quantized_cpu
+- func: lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+# less, alias for torch.lt
+- func: less.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: less.Scalar(Tensor self, Scalar other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: less.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: less.Tensor(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: less_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
+- func: less_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: method
+
- func: take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: legacy::cpu::_th_take_out
CUDA: legacy::cuda::_th_take_out
@@ -4573,29 +5507,39 @@
variants: method, function
dispatch:
CPU: legacy::cpu::_th_take
CUDA: legacy::cuda::_th_take
+- func: take_backward(Tensor grad, Tensor input, Tensor index) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: index_select.out(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: index_select_out_cpu_
- CUDA: legacy::cuda::_th_index_select_out
+ CUDA: index_select_out_cuda
- func: index_select(Tensor self, int dim, Tensor index) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
CPU: index_select_cpu_
- CUDA: legacy::cuda::_th_index_select
+ CUDA: index_select_cuda
SparseCPU: index_select_sparse
SparseCUDA: index_select_sparse
- func: index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
- func: index_select.dimname(Tensor self, Dimname dim, Tensor index) -> Tensor
variants: method, function
+- func: index_select_backward(Tensor grad, int[] self_sizes, int dim, Tensor index) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: masked_select.out(Tensor self, Tensor mask, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: masked_select_out_cpu
CUDA: masked_select_out_cuda
@@ -4604,21 +5548,26 @@
variants: method, function
dispatch:
CPU: masked_select_cpu
CUDA: masked_select_cuda
+- func: masked_select_backward(Tensor grad, Tensor input, Tensor mask) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: nonzero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: legacy::cpu::_th_nonzero_out
- CUDA: legacy::cuda::_th_nonzero_out
+ CUDA: nonzero_out_cuda
- func: nonzero(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
CPU: legacy::cpu::_th_nonzero
- CUDA: legacy::cuda::_th_nonzero
+ CUDA: nonzero_cuda
- func: nonzero_numpy(Tensor self) -> Tensor[]
use_c10_dispatcher: full
variants: method, function
@@ -4629,31 +5578,40 @@
- func: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: gather
- CUDA: gather
+ CPU, CUDA: gather
+- func: gather_backward(Tensor grad, Tensor self, int dim, Tensor index, bool sparse_grad) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+ device_guard: False
+
- func: gather.dimname_out(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!)
- func: gather.dimname(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False) -> Tensor
variants: method, function
- func: _gather_sparse_backward(Tensor self, int dim, Tensor index, Tensor grad) -> Tensor
use_c10_dispatcher: full
- func: addcmul.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: addcmul_out
- func: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: addcdiv.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: addcdiv_out
- func: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
use_c10_dispatcher: full
variants: method, function
@@ -4837,18 +5795,16 @@
CUDA: _lu_solve_helper_cuda
# TODO: remove dispatch section when porting TH CUDA to ATen
- func: multinomial.out(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: multinomial_out
- CUDA: multinomial_out
+ CPU, CUDA: multinomial_out
- func: multinomial(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
variants: method, function
dispatch:
- CPU: multinomial
- CUDA: multinomial
+ CPU, CUDA: multinomial
- func: _multinomial_alias_setup(Tensor probs) -> (Tensor, Tensor)
use_c10_dispatcher: full
variants: function
dispatch:
@@ -4868,64 +5824,94 @@
- func: lgamma(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: lgamma
- CUDA: lgamma
+ CPU, CUDA: lgamma
- func: digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: digamma_out
- func: digamma(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
+ dispatch:
+ CPU, CUDA: digamma
- func: polygamma.out(int n, Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: polygamma_out
- func: polygamma(int n, Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: erfinv(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: erfinv
- CUDA: erfinv
+ CPU, CUDA: erfinv
- func: erfinv_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
dispatch:
CPU: _erfinv__cpu
CUDA: _erfinv__cuda
- func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: _erfinv_out_cpu
CUDA: _erfinv_out_cuda
+- func: i0(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: i0_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: i0_out
+
- func: sign(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
- func: sign_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
variants: method
- func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: sign_out
- CUDA: sign_out
+ CPU, CUDA: sign_out
+- func: signbit(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU: signbit_out
+ CUDA: signbit_out
+
- func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
use_c10_dispatcher: full
variants: method, function
- func: atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: atan2_out
- func: atan2(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
+ dispatch:
+ CPU, CUDA: atan2
- func: lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: lerp_cpu_scalar_out
CUDA: lerp_cuda_scalar_out
@@ -4983,81 +5969,149 @@
variants: method, function
dispatch:
CPU: fmod
CUDA: fmod_cuda
+- func: hypot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: hypot_out
+
+- func: hypot(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+ dispatch:
+ CPU, CUDA: hypot
+
+- func: hypot_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ variants: method
+
+- func: nextafter.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: nextafter_out
+
+- func: nextafter(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+ dispatch:
+ CPU, CUDA: nextafter
+
+- func: nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+ variants: method
+
- func: remainder.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: remainder_out
- CUDA: remainder_out
+ CPU, CUDA: remainder_out
- func: remainder.Scalar(Tensor self, Scalar other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: remainder
- CUDA: remainder
+ CPU, CUDA: remainder
- func: remainder.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: remainder_out
- CUDA: remainder_out
+ CPU, CUDA: remainder_out
- func: remainder.Tensor(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: remainder
- CUDA: remainder
+ CPU, CUDA: remainder
-- func: min.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+- func: min(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+ dispatch:
+ CPU, CUDA: min
+ QuantizedCPU: min_quantized_cpu
-- func: min.other(Tensor self, Tensor other) -> Tensor
+- func: max(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
+ dispatch:
+ CPU, CUDA: max
+ QuantizedCPU: max_quantized_cpu
-- func: min(Tensor self) -> Tensor
+- func: maximum(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: min
- CUDA: min
- QuantizedCPU: min_quant
+ CPU, CUDA: maximum
-- func: max.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+- func: maximum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: maximum_out
+# binary max, alias of maximum
+# NOTE: max is not an alias for maximum, since there is also unary max
- func: max.other(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
-- func: max(Tensor self) -> Tensor
+- func: max.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: minimum(Tensor self, Tensor other) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: max
- CUDA: max
- QuantizedCPU: max_quant
+ CPU, CUDA: minimum
+- func: minimum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: minimum_out
+
+# binary min, alias for minimum
+# NOTE: min is not an alias for minimum, since there is also unary min
+- func: min.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: min.other(Tensor self, Tensor other) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
- func: median(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
CPU: median_cpu
CUDA: median_cuda
+- func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
+- func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor
+ use_c10_dispatcher: full
+ variants: method, function
+
- func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
dispatch:
- CPU: legacy::cpu::_th_sort_out
+ CPU: sort_out_cpu
CUDA: legacy::cuda::_th_sort_out
- func: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: legacy::cpu::_th_sort
+ CPU: sort_cpu
CUDA: legacy::cuda::_th_sort
- QuantizedCPU: sort_quant
+ QuantizedCPU: sort_quantized_cpu
- func: sort.dimname_values(Tensor self, Dimname dim, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
- func: sort.dimname(Tensor self, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices)
variants: method, function
@@ -5067,35 +6121,34 @@
variants: method, function
- func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
variants: method, function
-- func: topk.values(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) ->(Tensor(a!) values, Tensor(b!) indices)
+- func: topk.values(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
dispatch:
CPU: topk_out_cpu
CUDA: legacy::cuda::_th_topk_out
- func: topk(Tensor self, int k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: topk
- CUDA: topk
- QuantizedCPU: quantized_topk_cpu
+ CPU, CUDA: topk
+ QuantizedCPU: topk_quantized_cpu
- func: all(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
+ dispatch:
+ CPU, CUDA: all
- func: any(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: any
- CUDA: any
- SparseCPU: any_sparse
- SparseCUDA: any_sparse
+ CPU, CUDA: any
+ SparseCPU, SparseCUDA: any_sparse
- func: renorm.out(Tensor self, Scalar p, int dim, Scalar maxnorm, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: legacy::cpu::_th_renorm_out
CUDA: legacy::cuda::_th_renorm_out
@@ -5110,93 +6163,98 @@
- func: unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a)
use_c10_dispatcher: full
variants: method
device_guard: False
dispatch:
- CPU: unfold
- CUDA: unfold
- QuantizedCPU: unfold
- QuantizedCUDA: unfold
+ CPU, CUDA: unfold
+ QuantizedCPU, QuantizedCUDA: unfold
- func: unfold_backward(Tensor grad_in, int[] input_sizes, int dim, int size, int step) -> Tensor
+ use_c10_dispatcher: full
variants: function
dispatch:
- CPU: unfold_backward
- CUDA: unfold_backward
+ CPU, CUDA: unfold_backward
- func: equal(Tensor self, Tensor other) -> bool
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: legacy::cpu::_th_equal
- CUDA: legacy::cuda::_th_equal
- QuantizedCPU: quantized_equal_cpu
+ CPU: cpu_equal
+ CUDA: cuda_equal
+ QuantizedCPU: equal_quantized_cpu
- func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: pow_out
- CUDA: pow_out
+ CPU, CUDA: pow_out
- func: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor
use_c10_dispatcher: full
variants: method, function
dispatch:
- CPU: pow
- CUDA: pow
+ CPU, CUDA: pow
- func: pow.Scalar_out(Scalar self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
- CPU: pow_out
- CUDA: pow_out
+ CPU, CUDA: pow_out
- func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
use_c10_dispatcher: full
dispatch:
- CPU: pow
- CUDA: pow
+ CPU, CUDA: pow
+- func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: pow_out
+ SparseCPU, SparseCUDA: pow_out_sparse_scalar
+
+- func: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+ dispatch:
+ CPU, CUDA: pow
+ SparseCPU, SparseCUDA: pow_sparse_scalar
+
- func: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)
variants: method
+ dispatch:
+ CPU, CUDA: normal_
- func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: normal_out
- func: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
+ dispatch:
+ CPU, CUDA: normal
- func: normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: normal_out
- func: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor
+ dispatch:
+ CPU, CUDA: normal
- func: normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: normal_out
- func: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor
+ dispatch:
+ CPU, CUDA: normal
- func: normal.float_float(float mean, float std, int[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
- func: normal.float_float_out(float mean, float std, int[] size, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
- func: alias(Tensor(a) self) -> Tensor(a)
use_c10_dispatcher: full
variants: method, function
-- func: _addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+- func: _index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
use_c10_dispatcher: full
dispatch:
- CPU: legacy::cpu::_th_addr
- CUDA: addr_cuda
-
-- func: _addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
- dispatch:
- CPU: legacy::cpu::_th_addr_
- CUDA: addr__cuda
-
-- func: _addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
- dispatch:
- CPU: legacy::cpu::_th_addr_out
- CUDA: addr_out_cuda
-
-- func: _index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
- dispatch:
CPU: legacy::cpu::_th_index_copy_
CUDA: legacy::cuda::_th_index_copy_
- func: _cumsum(Tensor self, int dim) -> Tensor
use_c10_dispatcher: full
@@ -5218,33 +6276,301 @@
- func: _cumprod.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: _cumprod_out_cpu
CUDA: _cumprod_out_cuda
+- func: _var(Tensor self, bool unbiased=True) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
+ CPU: legacy::cpu::_th_var
+
+- func: _std(Tensor self, bool unbiased=True) -> Tensor
+ use_c10_dispatcher: full
+ dispatch:
+ CPU: legacy::cpu::_th_std
+
- func: _amp_non_finite_check_and_unscale_(Tensor(a!) self, Tensor(b!) found_inf, Tensor inv_scale) -> ()
+ use_c10_dispatcher: full
variants: function
dispatch:
CUDA: _amp_non_finite_check_and_unscale_cuda_
- func: _amp_update_scale(Tensor(a!) growth_tracker, Tensor current_scale, Tensor found_inf, float scale_growth_factor, float scale_backoff_factor, int growth_interval) -> Tensor
+ use_c10_dispatcher: full
variants: function
dispatch:
CUDA: _amp_update_scale_cuda
- func: _cat(Tensor[] tensors, int dim=0) -> Tensor
use_c10_dispatcher: full
dispatch:
CPU: _cat_cpu
CUDA: cat_cuda
- QuantizedCPU: quantized_cat
+ QuantizedCPU: cat_quantized_cpu
- func: _cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: _cat_out_cpu
CUDA: cat_out_cuda
- QuantizedCPU: quantized_cat_out
+ QuantizedCPU: cat_out_quantized_cpu
+- func: _foreach_add.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_scalar_kernel_slow
+ CUDA: foreach_tensor_add_scalar_kernel_cuda
+
+- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_scalar_kernel_slow_
+ CUDA: foreach_tensor_add_scalar_kernel_cuda_
+
+- func: _foreach_sub.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sub_scalar_kernel_slow
+ CUDA: foreach_tensor_sub_scalar_kernel_cuda
+
+- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sub_scalar_kernel_slow_
+ CUDA: foreach_tensor_sub_scalar_kernel_cuda_
+
+- func: _foreach_mul.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_mul_scalar_kernel_slow
+ CUDA: foreach_tensor_mul_scalar_kernel_cuda
+
+- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_mul_scalar_kernel_slow_
+ CUDA: foreach_tensor_mul_scalar_kernel_cuda_
+
+- func: _foreach_div.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_scalar_kernel_slow
+ CUDA: foreach_tensor_div_scalar_kernel_cuda
+
+- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_scalar_kernel_slow_
+ CUDA: foreach_tensor_div_scalar_kernel_cuda_
+
+- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_list_kernel_slow
+ CUDA: foreach_tensor_add_list_kernel_cuda
+
+- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_list_kernel_slow_
+ CUDA: foreach_tensor_add_list_kernel_cuda_
+
+- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sub_list_kernel_slow
+ CUDA: foreach_tensor_sub_list_kernel_cuda
+
+- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sub_list_kernel_slow_
+ CUDA: foreach_tensor_sub_list_kernel_cuda_
+
+- func: _foreach_mul.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_mul_list_kernel_slow
+ CUDA: foreach_tensor_mul_list_kernel_cuda
+
+- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_mul_list_kernel_slow_
+ CUDA: foreach_tensor_mul_list_kernel_cuda_
+
+- func: _foreach_div.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_list_kernel_slow
+ CUDA: foreach_tensor_div_list_kernel_cuda
+
+- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_list_kernel_slow_
+ CUDA: foreach_tensor_div_list_kernel_cuda_
+
+- func: _foreach_add_scalar_list(Tensor[] tensors, float[] scalars) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_scalarlist_kernel_slow
+ CUDA: foreach_tensor_add_scalarlist_kernel_cuda
+
+- func: _foreach_add_scalar_list_(Tensor(a!)[] self, float[] scalars) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_scalarlist_kernel_slow_
+ CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
+
+- func: _foreach_sub_scalar_list(Tensor[] tensors, float[] scalars) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sub_scalarlist_kernel_slow
+ CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
+
+- func: _foreach_sub_scalar_list_(Tensor(a!)[] self, float[] scalars) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sub_scalarlist_kernel_slow_
+ CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
+
+- func: _foreach_div_scalar_list(Tensor[] tensors, float[] scalars) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_scalarlist_kernel_slow
+ CUDA: foreach_tensor_div_scalarlist_kernel_cuda
+
+- func: _foreach_div_scalar_list_(Tensor(a!)[] self, float[] scalars) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_scalarlist_kernel_slow_
+ CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
+
+- func: _foreach_mul_scalar_list(Tensor[] tensors, float[] scalars) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_mul_scalarlist_kernel_slow
+ CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
+
+- func: _foreach_mul_scalar_list_(Tensor(a!)[] self, float[] scalars) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_mul_scalarlist_kernel_slow_
+ CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
+
+- func: _foreach_exp(Tensor[] tensors) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_exp_slow
+ CUDA: foreach_tensor_exp_cuda
+
+- func: _foreach_exp_(Tensor(a!)[] self) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_exp_slow_
+ CUDA: foreach_tensor_exp_cuda_
+
+- func: _foreach_sqrt(Tensor[] tensors) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sqrt_slow
+ CUDA: foreach_tensor_sqrt_cuda
+
+- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_sqrt_slow_
+ CUDA: foreach_tensor_sqrt_cuda_
+
+- func: _foreach_addcdiv_(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_addcdiv_slow_
+ CUDA: foreach_tensor_addcdiv_cuda_
+
+- func: _foreach_addcmul_(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_addcmul_slow_
+ CUDA: foreach_tensor_addcmul_cuda_
+
+- func: _foreach_addcdiv(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_addcdiv_slow
+ CUDA: foreach_tensor_addcdiv_cuda
+
+- func: _foreach_addcmul(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
+ use_c10_dispatcher: full
+ device_guard: False
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_addcmul_slow
+ CUDA: foreach_tensor_addcmul_cuda
+
- func: _mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor, Tensor)
use_c10_dispatcher: full
dispatch:
CPU: legacy::cpu::_th_mode
CUDA: legacy::cuda::_th_mode
@@ -5290,27 +6616,29 @@
## NN wrappers
- func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
+ dispatch:
+ CPU, CUDA: mse_loss_out
- func: mse_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: mse_loss
- func: mse_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: mse_loss_backward_out
- CUDA: mse_loss_backward_out
+ CPU, CUDA: mse_loss_backward_out
- func: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: mse_loss_backward
- CUDA: mse_loss_backward
+ CPU, CUDA: mse_loss_backward
- func: l1_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
@@ -5318,12 +6646,11 @@
python_module: nn
- func: l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: l1_loss_backward_out
- CUDA: l1_loss_backward_out
+ CPU, CUDA: l1_loss_backward_out
- func: l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
use_c10_dispatcher: full
python_module: nn
@@ -5332,10 +6659,11 @@
dispatch:
CPU: multi_margin_loss_cpu_out
CUDA: legacy::cuda::_thnn_multi_margin_loss_forward_out
- func: multi_margin_loss(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: multi_margin_loss_cpu
CUDA: legacy::cuda::_thnn_multi_margin_loss_forward
@@ -5344,10 +6672,11 @@
dispatch:
CPU: multi_margin_loss_cpu_backward_out
CUDA: legacy::cuda::_thnn_multi_margin_loss_backward_out
- func: multi_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, Scalar p, Scalar margin, Tensor? weight=None, int reduction=Mean) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: multi_margin_loss_cpu_backward
CUDA: legacy::cuda::_thnn_multi_margin_loss_backward
@@ -5386,19 +6715,21 @@
- func: nll_loss.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: nll_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
- func: nll_loss_forward.output(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, *, Tensor(a!) output, Tensor(b!) total_weight) -> (Tensor(a!), Tensor(b!))
python_module: nn
dispatch:
CPU: nll_loss_forward_out_cpu
CUDA: legacy::cuda::_thnn_nll_loss_forward_out
- func: nll_loss_forward(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index) -> (Tensor output, Tensor total_weight)
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: nll_loss_forward_cpu
CUDA: legacy::cuda::_thnn_nll_loss_forward
@@ -5407,28 +6738,31 @@
dispatch:
CPU: nll_loss_backward_out_cpu
CUDA: legacy::cuda::_thnn_nll_loss_backward_out
- func: nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: nll_loss_backward_cpu
CUDA: legacy::cuda::_thnn_nll_loss_backward
- func: nll_loss2d.out(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: nll_loss2d(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
- func: nll_loss2d_forward.output(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, *, Tensor(a!) output, Tensor(b!) total_weight) -> (Tensor(a!), Tensor(b!))
python_module: nn
dispatch:
CPU: nll_loss2d_forward_out_cpu
CUDA: legacy::cuda::_thnn_nll_loss2d_forward_out
- func: nll_loss2d_forward(Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index) -> (Tensor output, Tensor total_weight)
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: nll_loss2d_forward_cpu
CUDA: legacy::cuda::_thnn_nll_loss2d_forward
@@ -5437,32 +6771,35 @@
dispatch:
CPU: nll_loss2d_backward_out_cpu
CUDA: legacy::cuda::_thnn_nll_loss2d_backward_out
- func: nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, int ignore_index, Tensor total_weight) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: nll_loss2d_backward_cpu
CUDA: legacy::cuda::_thnn_nll_loss2d_backward
-- func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
+- func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: smooth_l1_loss_out
CUDA: smooth_l1_loss_out
-- func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
+- func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: smooth_l1_loss
-- func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
+- func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: smooth_l1_loss_backward_out
CUDA: smooth_l1_loss_backward_out
-- func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
+- func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
use_c10_dispatcher: full
python_module: nn
- func: soft_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
@@ -5478,26 +6815,32 @@
use_c10_dispatcher: full
python_module: nn
- func: elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
+ dispatch:
+ CPU, CUDA: elu_out
- func: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: elu
- func: elu_backward.grad_input(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: elu_backward_out
- CUDA: elu_backward_out
+ CPU, CUDA: elu_backward_out
- func: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, Tensor output) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: elu_backward
- func: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!)
+ use_c10_dispatcher: full
python_module: nn
- func: glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
@@ -5524,104 +6867,111 @@
CPU: glu_backward
CUDA: legacy::cuda::_thnn_glu_backward
- func: hardsigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
+ dispatch:
+ CPU, CUDA: hardsigmoid_out
- func: hardsigmoid(Tensor self) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: hardsigmoid
- CUDA: hardsigmoid
- QuantizedCPU: quantized_hardsigmoid
+ CPU, CUDA: hardsigmoid
+ QuantizedCPU: hardsigmoid_quantized_cpu
- func: hardsigmoid_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: hardsigmoid_
- func: hardsigmoid_backward(Tensor grad_output, Tensor self) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: hardsigmoid_backward
- CUDA: hardsigmoid_backward
+ CPU, CUDA: hardsigmoid_backward
- func: hardtanh.out(Tensor self, Scalar min_val=-1, Scalar max_val=1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: hardtanh_out
- CUDA: hardtanh_out
- QuantizedCPU: quantized_hardtanh_out
+ CPU, CUDA: hardtanh_out
+ QuantizedCPU: hardtanh_out_quantized_cpu
- func: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: hardtanh
- CUDA: hardtanh
- QuantizedCPU: quantized_hardtanh
+ CPU, CUDA: hardtanh
+ QuantizedCPU: hardtanh_quantized_cpu
- func: hardtanh_backward.grad_input(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: hardtanh_backward_out
- CUDA: hardtanh_backward_out
+ CPU, CUDA: hardtanh_backward_out
- func: hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: hardtanh_backward
- func: hardtanh_(Tensor(a!) self, Scalar min_val=-1, Scalar max_val=1) -> Tensor(a!)
+ use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: hardtanh_
- CUDA: hardtanh_
- QuantizedCPU: quantized_hardtanh_
- Vulkan: vulkan_hardtanh_
+ CPU, CUDA: hardtanh_
+ QuantizedCPU: hardtanh_quantized_cpu_
- func: hardswish.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
+ dispatch:
+ CPU, CUDA: hardswish_out
- func: hardswish(Tensor self) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: hardswish
- func: hardswish_(Tensor(a!) self) -> Tensor(a!)
+ use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: hardswish_
- func: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: hardswish_backward
- CUDA: hardswish_backward
+ CPU, CUDA: hardswish_backward
- func: leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: leaky_relu_out
- CUDA: leaky_relu_out
- QuantizedCPU: quantized_leaky_relu_out
+ CPU, CUDA: leaky_relu_out
+ QuantizedCPU: leaky_relu_out_quantized_cpu
- func: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: leaky_relu
- CUDA: leaky_relu
- QuantizedCPU: quantized_leaky_relu
+ CPU, CUDA: leaky_relu
+ QuantizedCPU: heaky_relu_quantized_cpu
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: leaky_relu_backward
- func: leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)
+ use_c10_dispatcher: full
python_module: nn
dispatch:
- CPU: leaky_relu_
- CUDA: leaky_relu_
- QuantizedCPU: quantized_leaky_relu_
+ CPU, CUDA: leaky_relu_
+ QuantizedCPU: leaky_relu_quantized_cpu_
- func: log_sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: log_sigmoid(Tensor self) -> Tensor
@@ -5676,65 +7026,73 @@
CPU: rrelu_with_noise_cpu_
CUDA: legacy::cuda::_thnn_rrelu_with_noise_forward_
- func: softplus.out(Tensor self, Scalar beta=1, Scalar threshold=20, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
+ dispatch:
+ CPU, CUDA: softplus_out
- func: softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: softplus
- func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: softplus_backward_out
- CUDA: softplus_backward_out
+ CPU, CUDA: softplus_backward_out
- func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: softplus_backward
- func: softshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
+ dispatch:
+ CPU, CUDA: softshrink_out
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: softshrink
- func: softshrink_backward.grad_input(Tensor grad_output, Tensor self, Scalar lambd, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: softshrink_backward_out
- CUDA: softshrink_backward_out
+ CPU, CUDA: softshrink_backward_out
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: softshrink_backward
- func: adaptive_avg_pool2d.out(Tensor self, int[2] output_size, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: adaptive_avg_pool2d_out_cpu
- CUDA: adaptive_avg_pool2d_out_cuda
+ CPU, CUDA: adaptive_avg_pool2d_out_cpu
MkldnnCPU: mkldnn_adaptive_avg_pool2d_out
- func: adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
use_c10_dispatcher: full
python_module: nn
- func: mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
use_c10_dispatcher: full
dispatch:
MkldnnCPU: mkldnn_adaptive_avg_pool2d
- requires_tensor: True
- func: _adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
use_c10_dispatcher: full
dispatch:
CPU: adaptive_avg_pool2d_cpu
CUDA: adaptive_avg_pool2d_cuda
- QuantizedCPU: quantized_adaptive_avg_pool2d
+ QuantizedCPU: adaptive_avg_pool2d_quantized_cpu
- func: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
@@ -5744,17 +7102,19 @@
- func: adaptive_avg_pool3d.out(Tensor self, int[3] output_size, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: adaptive_avg_pool3d_out_cpu
CUDA: adaptive_avg_pool3d_out_cuda
+ QuantizedCPU: adaptive_avg_pool3d_out_quantized_cpu
- func: adaptive_avg_pool3d(Tensor self, int[3] output_size) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: adaptive_avg_pool3d_cpu
CUDA: adaptive_avg_pool3d_cuda
+ QuantizedCPU: adaptive_avg_pool3d_quantized_cpu
- func: adaptive_avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: adaptive_avg_pool3d_backward_out_cpu
@@ -5835,11 +7195,11 @@
python_module: nn
dispatch:
CPU: avg_pool2d_cpu
CUDA: avg_pool2d_cuda
MkldnnCPU: mkldnn_avg_pool2d
- QuantizedCPU: quantized_avg_pool2d
+ QuantizedCPU: avg_pool2d_quantized_cpu
- func: avg_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: avg_pool2d_backward_out_cpu
@@ -5855,18 +7215,20 @@
- func: avg_pool3d.out(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: avg_pool3d_out_cpu
CUDA: avg_pool3d_out_cuda
+ MkldnnCPU: mkldnn_avg_pool3d_out
- func: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: avg_pool3d_cpu
CUDA: avg_pool3d_cuda
- QuantizedCPU: quantized_avg_pool3d
+ MkldnnCPU: mkldnn_avg_pool3d
+ QuantizedCPU: avg_pool3d_quantized_cpu
- func: avg_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: avg_pool3d_backward_out_cpu
@@ -6172,10 +7534,112 @@
python_module: nn
dispatch:
CPU: replication_pad3d_backward_cpu
CUDA: replication_pad3d_backward_cuda
+- func: upsample_linear1d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_linear1d_cpu
+ CUDA: upsample_linear1d_cuda
+
+- func: upsample_linear1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_linear1d_backward_cpu
+ CUDA: upsample_linear1d_backward_cuda
+
+- func: upsample_bilinear2d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_bilinear2d_cpu
+ CUDA: upsample_bilinear2d_cuda
+ QuantizedCPU: upsample_bilinear2d_quantized_cpu
+
+- func: upsample_bilinear2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_bilinear2d_backward_cpu
+ CUDA: upsample_bilinear2d_backward_cuda
+
+- func: upsample_trilinear3d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_trilinear3d_cpu
+ CUDA: upsample_trilinear3d_cuda
+
+- func: upsample_trilinear3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_trilinear3d_backward_cpu
+ CUDA: upsample_trilinear3d_backward_cuda
+
+- func: upsample_bicubic2d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_bicubic2d_cpu
+ CUDA: upsample_bicubic2d_cuda
+
+- func: upsample_bicubic2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_bicubic2d_backward_cpu
+ CUDA: upsample_bicubic2d_backward_cuda
+
+- func: upsample_nearest1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_nearest1d_cpu
+ CUDA: upsample_nearest1d_cuda
+
+- func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_nearest1d_backward_cpu
+ CUDA: upsample_nearest1d_backward_cuda
+
+- func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_nearest2d_cpu
+ CUDA: upsample_nearest2d_cuda
+ QuantizedCPU: upsample_nearest2d_quantized_cpu
+
+- func: upsample_nearest2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_nearest2d_backward_cpu
+ CUDA: upsample_nearest2d_backward_cuda
+
+- func: upsample_nearest3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_nearest3d_cpu
+ CUDA: upsample_nearest3d_cuda
+ QuantizedCPU: upsample_nearest3d_quantized_cpu
+
+- func: upsample_nearest3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: upsample_nearest3d_backward_cpu
+ CUDA: upsample_nearest3d_backward_cuda
+
+# NOTE: all of the non-"vec" upsample overloads are only kept for backward compatibility.
- func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: upsample_linear1d_out_cpu
CUDA: upsample_linear1d_out_cuda
@@ -6210,11 +7674,11 @@
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_bilinear2d_cpu
CUDA: upsample_bilinear2d_cuda
- QuantizedCPU: quantized_upsample_bilinear2d_cpu
+ QuantizedCPU: upsample_bilinear2d_quantized_cpu
- func: upsample_bilinear2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: upsample_bilinear2d_backward_out_cpu
@@ -6315,12 +7779,11 @@
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_nearest2d_cpu
CUDA: upsample_nearest2d_cuda
- QuantizedCPU: quantized_upsample_nearest2d_cpu
- Vulkan: upsample_nearest2d_vulkan
+ QuantizedCPU: upsample_nearest2d_quantized_cpu
- func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: upsample_nearest2d_backward_out_cpu
@@ -6343,11 +7806,11 @@
use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: upsample_nearest3d_cpu
CUDA: upsample_nearest3d_cuda
- QuantizedCPU: quantized_upsample_nearest3d_cpu
+ QuantizedCPU: upsample_nearest3d_quantized_cpu
- func: upsample_nearest3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
CPU: upsample_nearest3d_backward_out_cpu
@@ -6361,26 +7824,39 @@
CUDA: upsample_nearest3d_backward_cuda
- func: sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: sigmoid_backward_out
- CUDA: sigmoid_backward_out
+ CPU, CUDA: sigmoid_backward_out
- func: sigmoid_backward(Tensor grad_output, Tensor output) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: sigmoid_backward
+- func: logit_backward.grad_input(Tensor grad_output, Tensor self, float? eps=None, *, Tensor(a!) grad_input) -> Tensor(a!)
+ python_module: nn
+ dispatch:
+ CPU, CUDA: logit_backward_out
+
+- func: logit_backward(Tensor grad_output, Tensor self, float? eps=None) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU, CUDA: logit_backward
+
- func: tanh_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!)
python_module: nn
dispatch:
- CPU: tanh_backward_out
- CUDA: tanh_backward_out
+ CPU, CUDA: tanh_backward_out
- func: tanh_backward(Tensor grad_output, Tensor output) -> Tensor
use_c10_dispatcher: full
python_module: nn
+ dispatch:
+ CPU, CUDA: tanh_backward
# What's a thnn_conv_ versus a slow_conv_?
#
# Historically, we have inefficient implementations of convolutions
# coming from the THNN/THCUNN library. These convolutions typically
@@ -6403,10 +7879,11 @@
dispatch:
CPU: slow_conv_transpose2d_out_cpu
CUDA: slow_conv_transpose2d_out_cuda
- func: slow_conv_transpose2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] output_padding=0, int[2] dilation=1) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: slow_conv_transpose2d_cpu
CUDA: slow_conv_transpose2d_cuda
@@ -6428,10 +7905,11 @@
dispatch:
CPU: slow_conv_transpose3d_out_cpu
CUDA: slow_conv_transpose3d_out_cuda
- func: slow_conv_transpose3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: slow_conv_transpose3d_cpu
CUDA: slow_conv_transpose3d_cuda
@@ -6450,19 +7928,21 @@
- func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
- func: thnn_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
python_module: nn
dispatch:
CPU: slow_conv2d_forward_out_cpu
CUDA: legacy::cuda::_thnn_conv2d_forward_out
- func: thnn_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: slow_conv2d_forward_cpu
CUDA: legacy::cuda::_thnn_conv2d_forward
@@ -6481,18 +7961,20 @@
- func: thnn_conv_depthwise2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: thnn_conv_depthwise2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
- func: thnn_conv_depthwise2d_forward.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CUDA: legacy::cuda::_thnn_conv_depthwise2d_forward_out
- func: thnn_conv_depthwise2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, int[2] dilation) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CUDA: legacy::cuda::_thnn_conv_depthwise2d_forward
- func: thnn_conv_depthwise2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight) -> (Tensor(a!), Tensor(b!))
@@ -6508,18 +7990,20 @@
- func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
- func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
- func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!))
python_module: nn
dispatch:
CPU: slow_conv3d_forward_out_cpu
- func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input)
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: slow_conv3d_forward_cpu
- func: slow_conv3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!)? grad_input, Tensor(b!)? grad_weight, Tensor(c!)? grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
@@ -6532,10 +8016,11 @@
python_module: nn
dispatch:
CPU: slow_conv3d_backward_cpu
- func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: slow_conv_dilated2d_cpu
CUDA: slow_conv_dilated2d_cuda
@@ -6545,10 +8030,11 @@
dispatch:
CPU: slow_conv_dilated2d_backward_cpu
CUDA: slow_conv_dilated2d_backward_cuda
- func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor
+ use_c10_dispatcher: full
python_module: nn
dispatch:
CPU: slow_conv_dilated3d_cpu
CUDA: slow_conv_dilated3d_cuda
@@ -6619,9 +8105,172 @@
- func: isinf(Tensor self) -> Tensor
use_c10_dispatcher: full
variants: function, method
device_guard: False
-# Note: this function is only for testing.
+- func: isposinf(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: isposinf_out
+
+- func: isneginf(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CPU, CUDA: isneginf_out
+
+# NOTE [_add_batch_dim and _remove_batch_dim]
+# _add_batch_dim and _remove_batch_dim are meant to be used in the implementation
+# of the vmap frontend API (see torch/_vmap_internals.py). They are not
+# user-facing, hence the leading underscore. Please don't use them them anywhere else.
+- func: _add_batch_dim(Tensor self, int batch_dim, int level) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+# See NOTE [_add_batch_dim and _remove_batch_dim]
+- func: _remove_batch_dim(Tensor self, int level, int batch_size, int out_dim) -> Tensor
+ use_c10_dispatcher: full
+ variants: function
+
+## Functions related to the fast Fourier transform and the torch.fft namespace
+# Note [FFT namespace binding]
+# Functions in the fft python module should have their names start with
+# "fft_" underscore and be bound to the desired Python name in
+# torch/fft/__init__.py, and the desired C++ name in torch/csrc/api/include/torch/fft.h.
+# The "fft_" names should be hidden from the user and not documented.
+#
+# See fft_fft as an example.
+
+# torch.fft.fft
+# NOTE: NOT an alias for torch.fft, which has different semantics
+- func: fft_fft(Tensor self, int? n=None, int dim=-1, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_ifft(Tensor self, int? n=None, int dim=-1, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_rfft(Tensor self, int? n=None, int dim=-1, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_irfft(Tensor self, int? n=None, int dim=-1, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_hfft(Tensor self, int? n=None, int dim=-1, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_ihfft(Tensor self, int? n=None, int dim=-1, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_fftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_ifftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_rfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft_irfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor
+ python_module: fft
+ use_c10_dispatcher: full
+ variants: function
+
+- func: fft(Tensor self, int signal_ndim, bool normalized=False) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+## Functions for linear algebra and the torch.linalg namespace
+# Note [linalg namespace binding]
+# Functions in the linalg python module should have their names start with
+# "linalg_" and be bound to the desired Python name in
+# torch/linalg/__init__.py, and the desired C++ name in torch/csrc/api/include/torch/linalg.h.
+# The "linalg_" names should be hidden from the user and not documented.
+#
+# See linalg_det as an example.
+
+# torch.linalg.det, alias for torch.det
+- func: linalg_det(Tensor self) -> Tensor
+ python_module: linalg
+ use_c10_dispatcher: full
+ variants: function
+
+- func: det(Tensor self) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+# torch.outer, alias for torch.ger
+- func: outer(Tensor self, Tensor vec2) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: outer.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: ger(Tensor self, Tensor vec2) -> Tensor
+ use_c10_dispatcher: full
+ variants: function, method
+
+- func: ger.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+ python_module: linalg
+ variants: function
+
+- func: linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+ python_module: linalg
+ variants: function
+
+- func: linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+ python_module: linalg
+ variants: function
+
+- func: linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+ python_module: linalg
+ variants: function
+
+## Functions that are only for testing
# It is undocumented and should not be used outside of tests.
- func: _test_serialization_subcmul(Tensor self, Tensor other, Scalar alpha=1) -> Tensor
use_c10_dispatcher: full
+
+# Note: this function is only for testing.
+- func: _test_optional_intlist(Tensor values, int[]? addends) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: _test_optional_intlist
+
+# Note: this function is only for testing.
+- func: _test_optional_filled_intlist(Tensor values, int[2]? addends) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: _test_optional_intlist
+
+# Note: this function is only for testing.
+- func: _test_optional_floatlist(Tensor values, float[]? addends) -> Tensor
+ use_c10_dispatcher: full
+ python_module: nn
+ dispatch:
+ CPU: _test_optional_floatlist