codegen/native_functions.yaml in torch-rb-0.14.1 vs codegen/native_functions.yaml in torch-rb-0.15.0
- old
+ new
@@ -183,11 +183,11 @@
- func: sym_constrain_range(Scalar size, *, int? min=None, int? max=None) -> ()
dispatch:
CompositeExplicitAutograd: sym_constrain_range
-- func: sym_constrain_range_for_size(Scalar size, *, int? min, int? max) -> ()
+- func: sym_constrain_range_for_size(Scalar size, *, int? min=None, int? max=None) -> ()
dispatch:
CompositeExplicitAutograd: sym_constrain_range_for_size
- func: _functional_sym_constrain_range(Scalar size, int? min, int? max, Tensor dep_token) -> Tensor
dispatch:
@@ -429,10 +429,11 @@
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sgn_out
+ MPS: sgn_out_mps
SparseCPU, SparseCUDA: sgn_sparse_out
SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
tags: pointwise
- func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
@@ -679,19 +680,33 @@
- func: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: all.out
variants: function, method
+- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
+ device_check: NoCheck # TensorIterator
+ structured_delegate: all.dims_out
+ variants: function, method
+ cpp_no_default_args: ['dim']
+ dispatch:
+ CompositeExplicitAutograd: all_dims_default
+
- func: all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
- precomputed:
- - dim -> int dim
dispatch:
CPU, CUDA: all_out
MPS: all_out_mps
+- func: all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+ structured: True
+ dispatch:
+ CPU, CUDA: all_dims_out
+ CompositeExplicitAutograd: all_dims_out_default
+ cpp_no_default_args: ['dim']
+
- func: all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
- func: all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
@@ -707,19 +722,34 @@
device_check: NoCheck # TensorIterator
structured_delegate: any.out
variants: function, method
tags: core
+- func: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
+ device_check: NoCheck # TensorIterator
+ structured_delegate: any.dims_out
+ variants: function, method
+ cpp_no_default_args: ['dim']
+ tags: core
+ dispatch:
+ CompositeExplicitAutograd: any_dims_default
+
- func: any.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
- precomputed:
- - dim -> int dim
dispatch:
CPU, CUDA: any_out
MPS: any_out_mps
+- func: any.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+ structured: True
+ dispatch:
+ CPU, CUDA: any_dims_out
+ CompositeExplicitAutograd: any_dims_out_default
+ cpp_no_default_args: ['dim']
+
- func: any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
- func: any.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
@@ -1324,10 +1354,11 @@
- func: cat(Tensor[] tensors, int dim=0) -> Tensor
structured_delegate: cat.out
dispatch:
SparseCPU, SparseCUDA: cat_sparse
QuantizedCPU: cat_quantized_cpu
+ NestedTensorCPU, NestedTensorCUDA: cat_nested
tags: core
- func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
structured: True
precomputed:
@@ -1611,88 +1642,97 @@
- func: contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)
variants: method
manual_cpp_binding: True
-- func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups) -> Tensor
+- func: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
dispatch:
CompositeExplicitAutograd: convolution
autogen: convolution.out
tags: core
-- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
dispatch:
CompositeExplicitAutograd, CUDA: convolution_backward
autogen: convolution_backward.out
tags: core
-- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
+- func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
dispatch:
CompositeExplicitAutograd: convolution_overrideable
autogen: convolution_overrideable.out
-- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+- func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
dispatch:
CompositeExplicitAutograd: convolution_backward_overrideable
autogen: convolution_backward_overrideable.out
-- func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
+- func: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
dispatch:
CompositeExplicitAutograd: _convolution
autogen: _convolution.out
-- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
+- func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, int[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor
-- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor
+- func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, str padding, SymInt[] dilation, SymInt groups) -> Tensor
+ dispatch:
+ CompositeImplicitAutograd: _convolution_mode_symint
-- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, SymInt[] padding, int[] dilation, bool transposed, SymInt[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
-- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor
+- func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] dilation=1, SymInt groups=1) -> Tensor
dispatch:
CompositeImplicitAutograd: conv1d_symint
-- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
+- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor
dispatch:
CompositeImplicitAutograd: conv2d_symint
-- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
+- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
dispatch:
CompositeImplicitAutograd: conv3d_symint
-- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, str padding="valid", int[1] dilation=1, int groups=1) -> Tensor
+- func: conv1d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, str padding="valid", SymInt[1] dilation=1, SymInt groups=1) -> Tensor
cpp_no_default_args: ['bias', 'stride', 'padding']
+ dispatch:
+ CompositeImplicitAutograd: conv1d_padding_symint
-- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, str padding="valid", int[2] dilation=1, int groups=1) -> Tensor
+- func: conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding="valid", SymInt[2] dilation=1, SymInt groups=1) -> Tensor
cpp_no_default_args: ['bias', 'stride', 'padding']
+ dispatch:
+ CompositeImplicitAutograd: conv2d_padding_symint
-- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, str padding="valid", int[3] dilation=1, int groups=1) -> Tensor
+- func: conv3d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, str padding="valid", SymInt[3] dilation=1, SymInt groups=1) -> Tensor
cpp_no_default_args: ['bias', 'stride', 'padding']
+ dispatch:
+ CompositeImplicitAutograd: conv3d_padding_symint
- func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
dispatch:
CompositeExplicitAutograd: conv_tbc
autogen: conv_tbc.out
- func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
# NB: we inherit the goofy argument order from PyTorch torch.nn.functional
-- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, int groups=1, int[1] dilation=1) -> Tensor
+- func: conv_transpose1d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[1] stride=1, SymInt[1] padding=0, SymInt[1] output_padding=0, SymInt groups=1, SymInt[1] dilation=1) -> Tensor
dispatch:
CompositeImplicitAutograd: conv_transpose1d_symint
-- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int groups=1, int[2] dilation=1) -> Tensor
+- func: conv_transpose2d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt groups=1, SymInt[2] dilation=1) -> Tensor
dispatch:
CompositeImplicitAutograd: conv_transpose2d_symint
-- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int groups=1, int[3] dilation=1) -> Tensor
+- func: conv_transpose3d.input(Tensor input, Tensor weight, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt groups=1, SymInt[3] dilation=1) -> Tensor
dispatch:
CompositeImplicitAutograd: conv_transpose3d_symint
- func: copy(Tensor self, Tensor src, bool non_blocking=False) -> Tensor
variants: function
dispatch:
CompositeExplicitAutogradNonFunctional: copy
+ tags: core
- func: copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)
variants: method
device_check: NoCheck
device_guard: False
@@ -1718,10 +1758,12 @@
- func: cos(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: cos.out
+ dispatch:
+ NestedTensorCPU, NestedTensorCUDA: cos_nested
tags: [core, pointwise]
- func: cos_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
@@ -1800,36 +1842,36 @@
- func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
dispatch:
CUDA: cudnn_batch_norm_backward
autogen: cudnn_batch_norm_backward.out
-- func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+- func: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
dispatch:
CUDA: cudnn_convolution
autogen: cudnn_convolution.out
-- func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+- func: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
dispatch:
CUDA: cudnn_convolution_transpose
autogen: cudnn_convolution_transpose.out
-- func: _mps_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups) -> Tensor
+- func: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
MPS: _mps_convolution_transpose
autogen: _mps_convolution_transpose.out
-- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool[2] output_mask) -> (Tensor, Tensor)
+- func: mps_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[2] output_mask) -> (Tensor, Tensor)
dispatch:
MPS: mps_convolution_transpose_backward
autogen: mps_convolution_transpose_backward.out
-- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
+- func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
CUDA: cudnn_convolution_relu
autogen: cudnn_convolution_relu.out
-- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
+- func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
CUDA: cudnn_convolution_add_relu
autogen: cudnn_convolution_add_relu.out
# NB: input is special cased in a way I don't quite understand
@@ -1965,10 +2007,11 @@
- func: _ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)
dispatch:
CPU: ctc_loss_cpu
CUDA: ctc_loss_gpu
+ Meta: ctc_loss_meta
autogen: _ctc_loss.out
tags: dynamic_output_shape # the shape of second output is data dependent
- func: _ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)
dispatch:
@@ -1997,10 +2040,11 @@
- func: diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)
variants: function, method
dispatch:
CompositeExplicitAutograd: diagonal
+ tags: core
- func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a)
python_module: linalg
variants: function
@@ -2077,11 +2121,11 @@
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: div.out_mode
dispatch:
SparseCPU, SparseCUDA: div_sparse
- tags: pointwise
+ tags: [core, pointwise]
- func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
structured_delegate: div.out_mode
@@ -2118,11 +2162,11 @@
- func: div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: div
- tags: pointwise
+ tags: [core, pointwise]
- func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!)
variants: method
dispatch:
CompositeExplicitAutograd: div_
@@ -2368,11 +2412,11 @@
- func: resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!)
use_const_ref_for_mutable_tensors: True
variants: method
device_check: NoCheck
device_guard: False
- tags: inplace_view
+ tags: [core, inplace_view]
dispatch:
Meta: resize__symint
CPU: resize_
CUDA: resize_cuda_
MPS: resize_mps_
@@ -2515,11 +2559,11 @@
structured_delegate: expm1.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: expm1_sparse
SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
- tags: pointwise
+ tags: [core, pointwise]
- func: expm1_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: expm1.out
variants: function, method
@@ -2682,14 +2726,19 @@
SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim
- func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
+ dispatch:
+ CompositeExplicitAutograd: floor_divide
- func: floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: floor_divide_
+ autogen: floor_divide.Scalar_out
- func: frac(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: frac.out
variants: function, method
@@ -2977,11 +3026,11 @@
# Used by inductor to signal indexing without bounds checks
# Note that we don't support boolean indexing, to avoid dynamic output shapes
- func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
variants: function
dispatch:
- CPU, CUDA: _unsafe_index
+ CompositeExplicitAutograd: _unsafe_index
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
structured: True
variants: function
precomputed:
@@ -3251,18 +3300,22 @@
- func: _cslt_compress(Tensor input) -> Tensor
dispatch:
CUDA: _cslt_compress
-- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, bool transpose_result=False) -> Tensor
+- func: _cslt_sparse_mm(Tensor compressed_A, Tensor dense_B, Tensor? bias=None, Tensor? alpha=None, ScalarType? out_dtype=None, bool transpose_result=False) -> Tensor
dispatch:
CUDA: _cslt_sparse_mm
- func: _sparse_semi_structured_linear(Tensor input, Tensor weight, Tensor meta, *, Tensor? bias=None, str? activation=None) -> Tensor
dispatch:
CUDA: _sparse_semi_structured_linear
+- func: _mixed_dtypes_linear(Tensor input, Tensor weight, Tensor scale, *, Tensor? bias=None, str? activation=None) -> Tensor
+ dispatch:
+ CUDA: _mixed_dtypes_linear
+
- func: fbgemm_linear_int8_weight_fp32_activation(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
- func: fbgemm_linear_int8_weight(Tensor input, Tensor weight, Tensor packed, Tensor col_offsets, Scalar weight_scale, Scalar weight_zero_point, Tensor bias) -> Tensor
- func: fbgemm_linear_quantize_weight(Tensor input) -> (Tensor, Tensor, float, int)
@@ -3289,16 +3342,46 @@
- func: linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
CompositeExplicitAutograd: linspace
+- func: linspace.Tensor_Tensor(Tensor start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: linspace
+
+- func: linspace.Tensor_Scalar(Tensor start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: linspace
+
+- func: linspace.Scalar_Tensor(Scalar start, Tensor end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: linspace
+
- func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, Meta: linspace_out
CUDA: linspace_cuda_out
MPS: linspace_out_mps
+- func: linspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: linspace_out
+
+- func: linspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!)
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: linspace_out
+
+- func: linspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, *, Tensor(a!) out) -> Tensor(a!)
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: linspace_out
+
- func: log(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log.out
variants: function, method
tags: [core, pointwise]
@@ -3320,11 +3403,11 @@
- func: log10(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log10.out
variants: function, method
- tags: pointwise
+ tags: [core, pointwise]
- func: log10_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: log10.out
variants: function, method
@@ -3344,11 +3427,11 @@
structured_delegate: log1p.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: log1p_sparse
SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
- tags: pointwise
+ tags: [core, pointwise]
- func: log1p_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: log1p.out
variants: function, method
@@ -3370,11 +3453,11 @@
- func: log2(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log2.out
variants: function, method
- tags: pointwise
+ tags: [core, pointwise]
- func: log2_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: log2.out
variants: function, method
@@ -3475,15 +3558,45 @@
- func: logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
CompositeExplicitAutograd: logspace
+- func: logspace.Tensor_Tensor(Tensor start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: logspace
+
+- func: logspace.Tensor_Scalar(Tensor start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: logspace
+
+- func: logspace.Scalar_Tensor(Scalar start, Tensor end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: logspace
+
- func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, Meta: logspace_out
CUDA: logspace_cuda_out
+- func: logspace.Tensor_Tensor_out(Tensor start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: logspace_out
+
+- func: logspace.Tensor_Scalar_out(Tensor start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: logspace_out
+
+- func: logspace.Scalar_Tensor_out(Scalar start, Tensor end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!)
+ category_override: factory
+ dispatch:
+ CompositeExplicitAutograd: logspace_out
+
# log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models.
- func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
variants: function, method
- func: log_softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
@@ -3845,21 +3958,21 @@
MPS: amin_out_mps
# TODO: Add this function to MPS dispatch key so that we avoid declaring it in
# native_functions.yaml
# https://github.com/pytorch/pytorch/issues/77394
-- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
+- func: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
MPS: _mps_convolution
autogen: _mps_convolution.out
-- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+- func: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
dispatch:
MPS: mps_convolution_backward
autogen: mps_convolution_backward.out
-- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups) -> Tensor
+- func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
CompositeExplicitAutograd: mkldnn_convolution
autogen: mkldnn_convolution.out
- func: mkldnn_rnn_layer(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train) -> (Tensor, Tensor, Tensor, Tensor)
@@ -3881,30 +3994,30 @@
- func: miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)
dispatch:
CUDA: miopen_batch_norm_backward
autogen: miopen_batch_norm_backward.out
-- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
dispatch:
CUDA: miopen_convolution
autogen: miopen_convolution.out
-- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
dispatch:
CUDA: miopen_convolution_transpose
autogen: miopen_convolution_transpose.out
-- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor
+- func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
dispatch:
CUDA: miopen_depthwise_convolution
autogen: miopen_depthwise_convolution.out
-- func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
+- func: miopen_convolution_relu(Tensor self, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
CUDA: miopen_convolution_relu
-- func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor
+- func: miopen_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, SymInt groups) -> Tensor
dispatch:
CUDA: miopen_convolution_add_relu
- func: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
dispatch:
@@ -3941,10 +4054,18 @@
- func: _int_mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CUDA: _int_mm_out_cuda
+- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
+ dispatch:
+ CUDA: _convert_weight_to_int4pack_cuda
+
+- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
+ dispatch:
+ CUDA: _weight_int4pack_mm_cuda
+
- func: _sparse_mm(Tensor sparse, Tensor dense) -> Tensor
python_module: sparse
- func: _sparse_mm.reduce(Tensor sparse, Tensor dense, str reduce) -> Tensor
python_module: sparse
@@ -4085,10 +4206,11 @@
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
CompositeImplicitAutograd: narrow_symint
+ NestedTensorCPU, NestedTensorCUDA: narrow_nested_symint
- func: narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a)
variants: function, method
device_check: NoCheck
device_guard: False
@@ -4197,11 +4319,11 @@
- func: is_vulkan_available() -> bool
- func: _nnpack_available() -> bool
-- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, int[2] stride=1) -> Tensor
+- func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
variants: function
dispatch:
CompositeExplicitAutograd: _nnpack_spatial_convolution
autogen: _nnpack_spatial_convolution.out
@@ -4312,35 +4434,37 @@
variants: function, method
- func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
dispatch:
CPU: pixel_shuffle_cpu
+ MPS: pixel_shuffle_mps
CompositeExplicitAutogradNonFunctional: math_pixel_shuffle
autogen: pixel_shuffle.out
tags: core
- func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
dispatch:
CPU: pixel_unshuffle_cpu
+ MPS: pixel_unshuffle_mps
CompositeExplicitAutogradNonFunctional: math_pixel_unshuffle
autogen: pixel_unshuffle.out
-- func: channel_shuffle(Tensor self, int groups) -> Tensor
+- func: channel_shuffle(Tensor self, SymInt groups) -> Tensor
dispatch:
CPU, CUDA: channel_shuffle
QuantizedCPU: channel_shuffle_quantized_cpu
autogen: channel_shuffle.out
-- func: native_channel_shuffle(Tensor self, int groups) -> Tensor
+- func: native_channel_shuffle(Tensor self, SymInt groups) -> Tensor
dispatch:
CPU: channel_shuffle_cpu
CompositeImplicitAutograd: math_channel_shuffle
- func: is_pinned(Tensor self, Device? device=None) -> bool
variants: method
dispatch:
- CUDA: is_pinned_cuda
+ NestedTensorCUDA, CUDA: is_pinned_cuda
MPS: is_pinned_mps
CompositeExplicitAutograd: is_pinned_default
# TODO: add a copy kwarg that guarantees that the tensor is put into fresh
# pinned memory
@@ -4350,10 +4474,11 @@
# Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
- func: _pin_memory(Tensor self, Device? device=None) -> Tensor
dispatch:
CUDA: _pin_memory_cuda
MPS: _pin_memory_mps
+ NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
autogen: _pin_memory.out
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
variants: function, method
@@ -4658,23 +4783,25 @@
CompositeExplicitAutograd: repeat
MPS: repeat_mps
autogen: repeat.out
tags: core
-- func: repeat_interleave.Tensor(Tensor repeats, *, int? output_size=None) -> Tensor
+- func: repeat_interleave.Tensor(Tensor repeats, *, SymInt? output_size=None) -> Tensor
variants: function
dispatch:
CPU: repeat_interleave_cpu
CUDA: repeat_interleave_cuda
MPS: repeat_interleave_mps
tags: dynamic_output_shape
autogen: repeat_interleave.Tensor_out
-- func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, int? output_size=None) -> Tensor
+- func: repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
variants: function, method
+ dispatch:
+ CompositeImplicitAutograd: repeat_interleave_symint
-- func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, int? output_size=None) -> Tensor
+- func: repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor
variants: function, method
dispatch:
CompositeImplicitAutograd: repeat_interleave_symint
- func: reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a)
@@ -4971,39 +5098,44 @@
- func: silu(Tensor self) -> Tensor
structured_delegate: silu.out
python_module: nn
dispatch:
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu
+ tags: pointwise
- func: silu_(Tensor(a!) self) -> Tensor(a!)
structured_delegate: silu.out
python_module: nn
dispatch:
NestedTensorCPU, NestedTensorCUDA: NestedTensor_silu_
+ tags: pointwise
- func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: silu_out
MPS: silu_out_mps
+ tags: pointwise
- func: silu_backward.grad_input(Tensor grad_output, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: silu_backward_out
MPS: silu_backward_out_mps
+ tags: pointwise
- func: silu_backward(Tensor grad_output, Tensor self) -> Tensor
structured_delegate: silu_backward.grad_input
python_module: nn
dispatch:
CompositeImplicitAutograd: math_silu_backward
NestedTensorCPU, NestedTensorCUDA: silu_backward_nested
+ tags: pointwise
- func: mish(Tensor self) -> Tensor
structured_delegate: mish.out
python_module: nn
@@ -5015,15 +5147,17 @@
structured: True
structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: mish_out
+ MPS: mish_out_mps
- func: mish_backward(Tensor grad_output, Tensor self) -> Tensor
python_module: nn
dispatch:
CPU, CUDA: mish_backward
+ MPS: mish_backward_mps
CompositeImplicitAutograd: math_mish_backward
- func: sigmoid(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: sigmoid.out
@@ -5074,10 +5208,11 @@
structured_delegate: sin.out
variants: function, method
dispatch:
SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
SparseCPU, SparseCUDA: sin_sparse
+ NestedTensorCPU, NestedTensorCUDA: sin_nested
tags: [core, pointwise]
- func: sin_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sin.out
@@ -5969,11 +6104,11 @@
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
SparseCPU, SparseCUDA: trunc_sparse
SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
- tags: pointwise
+ tags: [core, pointwise]
- func: trunc_(Tensor(a!) self) -> Tensor(a!)
structured_delegate: trunc.out
device_check: NoCheck # TensorIterator
variants: function, method
@@ -6194,17 +6329,19 @@
- func: _weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)
variants: function
dispatch:
CPU: weight_norm_cpu
CUDA: weight_norm_cuda
+ MPS: weight_norm_mps
autogen: _weight_norm_interface.out
- func: _weight_norm_interface_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
variants: function
dispatch:
CPU: weight_norm_backward_cpu
CUDA: weight_norm_backward_cuda
+ MPS: weight_norm_backward_mps
autogen: _weight_norm_interface_backward.out
- func: _weight_norm_differentiable_backward(Tensor grad_w, Tensor saved_v, Tensor saved_g, Tensor saved_norms, int dim) -> (Tensor, Tensor)
variants: function
@@ -6217,10 +6354,11 @@
- func: _efficientzerotensor(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
CPU: _efficientzerotensor
CUDA: _efficientzerotensor_cuda
+ MPS: _efficientzerotensor_mps
Meta: _efficientzerotensor_meta
autogen: _efficientzerotensor.out
- func: zeros(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
@@ -6673,16 +6811,16 @@
- func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
structured_delegate: _addmm_activation.out
variants: function, method
-- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None) -> (Tensor, Tensor)
+- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
variants: function
dispatch:
CUDA: _scaled_mm_cuda
-- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
+- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
variants: function
dispatch:
CUDA: _scaled_mm_out_cuda
# NOTE [ Sparse: autograd and API ]
@@ -7053,11 +7191,11 @@
# By adding the AutogradNestedTensor this makes this function CompositeImplicit-like for nested tensors
- func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
variants: function, method
dispatch:
CompositeExplicitAutograd: unbind
- CompositeImplicitAutogradNestedTensor: NestedTensor_unbind
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_unbind
- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[]
variants: function, method
- func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
@@ -7141,18 +7279,18 @@
variants: method
dispatch:
CPU: dense_to_mkldnn
autogen: to_mkldnn.out
-- func: mkldnn_reorder_conv2d_weight(Tensor self, int[2] padding=0, int[2] stride=1, int[2] dilation=1, int groups=1, int[]? input_size=None) -> Tensor
+- func: mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor
variants: function
python_module: nn
dispatch:
MkldnnCPU: mkldnn_reorder_conv2d_weight
autogen: mkldnn_reorder_conv2d_weight.out
-- func: mkldnn_reorder_conv3d_weight(Tensor self, int[3] padding=0, int[3] stride=1, int[3] dilation=1, int groups=1) -> Tensor
+- func: mkldnn_reorder_conv3d_weight(Tensor self, SymInt[3] padding=0, SymInt[3] stride=1, SymInt[3] dilation=1, SymInt groups=1) -> Tensor
variants: function
python_module: nn
dispatch:
MkldnnCPU: mkldnn_reorder_conv3d_weight
autogen: mkldnn_reorder_conv3d_weight.out
@@ -7654,10 +7792,14 @@
- func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: masked_scatter
+- func: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: masked_scatter_backward_symint
+
- func: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
dispatch:
CUDA: masked_softmax_cuda
CPU: masked_softmax_cpu
autogen: _masked_softmax.out
@@ -7936,10 +8078,12 @@
tags: [core, pointwise]
- func: bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: bitwise_and_
tags: pointwise
- func: bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
@@ -7980,10 +8124,12 @@
tags: pointwise
- func: bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
+ dispatch:
+ CompositeExplicitAutograd: bitwise_or
tags: [core, pointwise]
- func: bitwise_or.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function
@@ -7999,10 +8145,12 @@
tags: [core, pointwise]
- func: bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: bitwise_or_
tags: pointwise
- func: bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
@@ -8043,10 +8191,12 @@
tags: pointwise
- func: bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
+ dispatch:
+ CompositeExplicitAutograd: bitwise_xor
tags: [core, pointwise]
- func: bitwise_xor.Scalar_Tensor(Scalar self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function
@@ -8062,10 +8212,12 @@
tags: [core, pointwise]
- func: bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
+ dispatch:
+ CompositeExplicitAutograd: bitwise_xor_
tags: pointwise
- func: bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
@@ -8502,10 +8654,11 @@
structured_delegate: eq.Scalar_out
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
QuantizedCPU: eq_quantized_cpu
+ NestedTensorCPU, NestedTensorCUDA: eq_scalar_nested
tags: [core, pointwise]
- func: eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
@@ -8538,10 +8691,11 @@
structured_delegate: ge.Scalar_out
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
QuantizedCPU: ge_quantized_cpu
+ NestedTensorCPU, NestedTensorCUDA: ge_scalar_nested
tags: [core, pointwise]
- func: ge.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
@@ -8664,10 +8818,11 @@
structured_delegate: gt.Scalar_out
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
QuantizedCPU: gt_quantized_cpu
+ NestedTensorCPU, NestedTensorCUDA: gt_scalar_nested
tags: [core, pointwise]
- func: gt.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
@@ -9104,10 +9259,11 @@
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: lgamma_out
+ MPS: lgamma_out_mps
tags: pointwise
- func: lgamma_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: lgamma.out
@@ -9124,10 +9280,11 @@
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: digamma_out
+ MPS: digamma_out_mps
tags: pointwise
- func: digamma(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: digamma.out
@@ -9138,10 +9295,11 @@
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: polygamma_out
+ MPS: polygamma_out_mps
tags: pointwise
- func: polygamma(int n, Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: polygamma.out
@@ -9261,11 +9419,11 @@
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: atan2_out
MPS: atan2_out_mps
- tags: pointwise
+ tags: [core, pointwise]
- func: atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: atan2.out
variants: method
@@ -9273,11 +9431,11 @@
- func: atan2(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: atan2.out
variants: method, function
- tags: pointwise
+ tags: [core, pointwise]
# arctan2, alias of atan2
- func: arctan2(Tensor self, Tensor other) -> Tensor
variants: method, function
@@ -9462,11 +9620,11 @@
- func: nextafter.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
- CPU, CUDA: nextafter_out
+ CPU, CUDA, MPS: nextafter_out
tags: pointwise
- func: nextafter(Tensor self, Tensor other) -> Tensor
structured_delegate: nextafter.out
variants: method, function
@@ -9809,11 +9967,11 @@
tags: pointwise
- func: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: pow.Scalar_out
- tags: pointwise
+ tags: [core, pointwise]
- func: pow.Tensor_Scalar_out(Tensor self, Scalar exponent, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
@@ -10018,10 +10176,25 @@
dispatch:
CPU: foreach_tensor_add_scalarlist_kernel_slow_
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
autogen: _foreach_add.ScalarList_out
+- func: _foreach_add.Tensor(Tensor[] self, Tensor other, *, Scalar alpha=1) -> Tensor[]
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_tensor_kernel_slow
+ CUDA: foreach_tensor_add_tensor_kernel_cuda
+
+- func: _foreach_add_.Tensor(Tensor(a!)[] self, Tensor other, *, Scalar alpha=1) -> ()
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_add_tensor_kernel_slow_
+ CUDA: foreach_tensor_add_tensor_kernel_cuda_
+ autogen: _foreach_add.Tensor_out
+
- func: _foreach_sub.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_sub_scalar_kernel_slow
@@ -10168,10 +10341,25 @@
dispatch:
CPU: foreach_tensor_div_scalarlist_kernel_slow_
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
autogen: _foreach_div.ScalarList_out
+- func: _foreach_div.Tensor(Tensor[] self, Tensor other) -> Tensor[]
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_tensor_kernel_slow
+ CUDA: foreach_tensor_div_tensor_kernel_cuda
+
+- func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> ()
+ device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
+ variants: function
+ dispatch:
+ CPU: foreach_tensor_div_tensor_kernel_slow_
+ CUDA: foreach_tensor_div_tensor_kernel_cuda_
+ autogen: _foreach_div.Tensor_out
+
- func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices
variants: function
dispatch:
CPU: foreach_tensor_clamp_max_scalar_kernel_slow
@@ -10988,41 +11176,48 @@
- func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
dispatch:
CPU: bucketize_cpu
CUDA: bucketize_cuda
+ MPS: bucketize_mps
- func: bucketize.Tensor_out(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: bucketize_out_cpu
CUDA: bucketize_out_cuda
+ MPS: bucketize_out_mps
- func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor
dispatch:
CPU: bucketize_cpu
CUDA: bucketize_cuda
+ MPS: bucketize_mps
autogen: bucketize.Scalar_out
- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
dispatch:
CPU: searchsorted_cpu
CUDA: searchsorted_cuda
+ MPS: searchsorted_mps
- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: searchsorted_out_cpu
CUDA: searchsorted_out_cuda
+ MPS: searchsorted_out_mps
- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor
dispatch:
CPU: searchsorted_cpu
CUDA: searchsorted_cuda
+ MPS: searchsorted_mps
- func: searchsorted.Scalar_out(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU: searchsorted_out_cpu
CUDA: searchsorted_out_cuda
+ MPS: searchsorted_out_mps
- func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor
structured_delegate: _convert_indices_from_coo_to_csr.out
- func: _convert_indices_from_coo_to_csr.out(Tensor self, int size, *, bool out_int32=False, Tensor(a!) out) -> Tensor(a!)
@@ -11566,10 +11761,11 @@
structured_inherits: TensorIteratorBase
device_check: NoCheck # TensorIterator
python_module: nn
dispatch:
CPU, CUDA: softshrink_out
+ MPS: softshrink_out_mps
- func: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
structured_delegate: softshrink.out
device_check: NoCheck # TensorIterator
python_module: nn
@@ -11578,10 +11774,11 @@
structured: True
structured_inherits: TensorIteratorBase
python_module: nn
dispatch:
CPU, CUDA: softshrink_backward_out
+ MPS: softshrink_backward_out_mps
- func: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
structured_delegate: softshrink_backward.grad_input
python_module: nn
@@ -12480,105 +12677,105 @@
# one that is written in the native style: modern C++. Algorithmically,
# these are the same thing, but we give them different prefixes to
# make the operational distinction clear.
tags: pointwise
-- func: slow_conv_transpose2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
+- func: slow_conv_transpose2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
structured: True
dispatch:
CPU: slow_conv_transpose2d_structured_cpu
CUDA: slow_conv_transpose2d_structured_cuda
-- func: slow_conv_transpose2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, int[2] dilation=1) -> Tensor
+- func: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
python_module: nn
structured_delegate: slow_conv_transpose2d.out
-- func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
+- func: slow_conv_transpose3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
dispatch:
CPU: slow_conv_transpose3d_out_cpu
CUDA: slow_conv_transpose3d_out_cuda
-- func: slow_conv_transpose3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, int[3] dilation=1) -> Tensor
+- func: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv_transpose3d_cpu
CUDA: slow_conv_transpose3d_cuda
-- func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
+- func: thnn_conv2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
-- func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor
+- func: thnn_conv2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0) -> Tensor
python_module: nn
-- func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!)
+- func: _slow_conv2d_forward.output(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) output) -> Tensor(a!)
python_module: nn
dispatch:
CPU: slow_conv2d_forward_out_cpu
CUDA: slow_conv2d_forward_out_cuda
-- func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor
+- func: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv2d_forward_cpu
CUDA: slow_conv2d_forward_cuda
-- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
+- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!))
python_module: nn
dispatch:
CPU: slow_conv2d_backward_out_cpu
CUDA: slow_conv2d_backward_out_cuda
-- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
python_module: nn
dispatch:
CPU: slow_conv2d_backward_cpu
CUDA: slow_conv2d_backward_cuda
autogen: _slow_conv2d_backward.output_mask_out
-- func: _conv_depthwise2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, SymInt[2] padding, int[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
+- func: _conv_depthwise2d.out(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation, *, Tensor(a!) out) -> Tensor(a!)
use_const_ref_for_mutable_tensors: True
python_module: nn
dispatch:
CUDA: conv_depthwise2d_cuda_out
-- func: _conv_depthwise2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, SymInt[2] padding, int[2] dilation) -> Tensor
+- func: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
python_module: nn
dispatch:
CUDA: conv_depthwise2d_cuda
-- func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding, int[3] dilation) -> Tensor
+- func: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
python_module: nn
dispatch:
CUDA: conv_depthwise3d_cuda
autogen: conv_depthwise3d.out
-- func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
+- func: slow_conv3d.out(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, *, Tensor(a!) out) -> Tensor(a!)
python_module: nn
-- func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0) -> Tensor
+- func: slow_conv3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0) -> Tensor
python_module: nn
-- func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
+- func: slow_conv3d_forward.output(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, *, Tensor(a!) output) -> Tensor(a!)
python_module: nn
dispatch:
CPU: slow_conv3d_forward_out_cpu
-- func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, SymInt[3] padding) -> Tensor
+- func: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv3d_forward_cpu
-- func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, SymInt[2] padding=0, int[2] dilation=1) -> Tensor
+- func: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv_dilated2d_cpu
CUDA: slow_conv_dilated2d_cuda
autogen: slow_conv_dilated2d.out
-- func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, SymInt[3] padding=0, int[3] dilation=1) -> Tensor
+- func: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
python_module: nn
dispatch:
CPU: slow_conv_dilated3d_cpu
CUDA: slow_conv_dilated3d_cuda
autogen: slow_conv_dilated3d.out
@@ -14267,23 +14464,24 @@
- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
variants: function
tags: nondeterministic_seeded
-- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor ouput, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
dispatch:
CPU: _scaled_dot_product_flash_attention_cpu
CUDA: _scaled_dot_product_flash_attention_cuda
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
tags: nondeterministic_seeded
-- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
+- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
device_check: NoCheck
variants: function
dispatch:
CPU: _scaled_dot_product_flash_attention_backward_cpu
CUDA: _scaled_dot_product_flash_attention_backward_cuda
+ NestedTensorCUDA: _scaled_dot_product_flash_attention_backward_nested
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
dispatch:
CUDA: _scaled_dot_product_efficient_attention_cuda
NestedTensorCUDA: _scaled_dot_product_efficient_attention_nestedtensor_cuda
@@ -14293,30 +14491,30 @@
device_check: NoCheck
dispatch:
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
tags: nondeterministic_seeded
-- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
variants: function
dispatch:
CUDA: _flash_attention_forward
tags: nondeterministic_seeded
-- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, int max_q, int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
+- func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
device_check: NoCheck
variants: function
dispatch:
CUDA: _flash_attention_backward
# Returns ouput, logsumexp if compute_logsumexp
-- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset)
+- func: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
variants: function
dispatch:
CUDA: _efficient_attention_forward
tags: nondeterministic_seeded
-- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int max_seqlen_k, int max_seqlen_q, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
+- func: _efficient_attention_backward(Tensor grad_out_, Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor out, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, SymInt max_seqlen_q, SymInt max_seqlen_k, Tensor logsumexp, float dropout_p, Tensor philox_seed, Tensor philox_offset, int custom_mask_type, bool bias_requires_grad, *, float? scale=None, int? num_splits_key=None) -> (Tensor, Tensor, Tensor, Tensor)
device_check: NoCheck
variants: function
dispatch:
CUDA: _efficient_attention_backward
@@ -14420,16 +14618,20 @@
structured_delegate: special_chebyshev_polynomial_t.out
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14442,10 +14644,12 @@
structured: True
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_t_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14463,16 +14667,20 @@
structured_delegate: special_chebyshev_polynomial_u.out
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14485,10 +14693,12 @@
structured: True
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_u_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14506,16 +14716,20 @@
structured_delegate: special_chebyshev_polynomial_v.out
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14528,10 +14742,12 @@
structured: True
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_v_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14549,16 +14765,20 @@
structured_delegate: special_chebyshev_polynomial_w.out
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14571,10 +14791,12 @@
structured: True
variants: function
tags: pointwise
- func: special_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_chebyshev_polynomial_w_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14592,16 +14814,20 @@
structured_delegate: special_hermite_polynomial_h.out
variants: function
tags: pointwise
- func: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_hermite_polynomial_h
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_hermite_polynomial_h
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14614,10 +14840,12 @@
structured: True
variants: function
tags: pointwise
- func: special_hermite_polynomial_h.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_hermite_polynomial_h_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14635,16 +14863,20 @@
structured_delegate: special_hermite_polynomial_he.out
variants: function
tags: pointwise
- func: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_hermite_polynomial_he
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_hermite_polynomial_he
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14657,10 +14889,12 @@
structured: True
variants: function
tags: pointwise
- func: special_hermite_polynomial_he.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_hermite_polynomial_he_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14678,16 +14912,20 @@
structured_delegate: special_laguerre_polynomial_l.out
variants: function
tags: pointwise
- func: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_laguerre_polynomial_l
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_laguerre_polynomial_l
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14700,10 +14938,12 @@
structured: True
variants: function
tags: pointwise
- func: special_laguerre_polynomial_l.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_laguerre_polynomial_l_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14721,16 +14961,20 @@
structured_delegate: special_legendre_polynomial_p.out
variants: function
tags: pointwise
- func: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_legendre_polynomial_p
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_legendre_polynomial_p
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14743,10 +14987,12 @@
structured: True
variants: function
tags: pointwise
- func: special_legendre_polynomial_p.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_legendre_polynomial_p_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14854,16 +15100,20 @@
structured_delegate: special_shifted_chebyshev_polynomial_t.out
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14876,10 +15126,12 @@
structured: True
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_t.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_t_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14897,16 +15149,20 @@
structured_delegate: special_shifted_chebyshev_polynomial_u.out
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14919,10 +15175,12 @@
structured: True
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_u.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_u_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14940,16 +15198,20 @@
structured_delegate: special_shifted_chebyshev_polynomial_v.out
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14962,10 +15224,12 @@
structured: True
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_v_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -14983,16 +15247,20 @@
structured_delegate: special_shifted_chebyshev_polynomial_w.out
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w
device_check: NoCheck
python_module: special
variants: function
tags: pointwise
@@ -15005,9 +15273,11 @@
structured: True
variants: function
tags: pointwise
- func: special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)
+ dispatch:
+ CompositeExplicitAutograd: special_shifted_chebyshev_polynomial_w_out
device_check: NoCheck
python_module: special
variants: function
tags: pointwise