codegen/native_functions.yaml in torch-rb-0.17.1 vs codegen/native_functions.yaml in torch-rb-0.18.0
- old
+ new
@@ -336,30 +336,30 @@
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: abs
SparseCPU, SparseCUDA: abs_sparse
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs
tags: [core, pointwise]
- func: abs_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: abs_
SparseCPU, SparseCUDA: abs_sparse_
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_
NestedTensorCPU, NestedTensorCUDA: NestedTensor_abs_
- func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: abs_out
MPS: abs_out_mps
SparseCPU, SparseCUDA: abs_sparse_out
- SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: abs_sparse_csr_out
tags: pointwise
# Note [Adding an alias]
# To add an alias do the following:
#
@@ -398,18 +398,18 @@
- func: angle(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CPU, CUDA: angle
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr
tags: pointwise
- func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: angle_out
- SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: angle_sparse_csr_out
tags: pointwise
- func: view_as_real(Tensor(a) self) -> Tensor(a)
variants: function
dispatch:
@@ -423,31 +423,31 @@
- func: sgn(Tensor self) -> Tensor
variants: function, method
structured_delegate: sgn.out
dispatch:
SparseCPU, SparseCUDA: sgn_sparse
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn
tags: pointwise
- func: sgn_(Tensor(a!) self) -> Tensor(a!)
variants: method
structured_delegate: sgn.out
dispatch:
SparseCPU, SparseCUDA: sgn_sparse_
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_
NestedTensorCPU, NestedTensorCUDA: NestedTensor_sgn_
tags: pointwise
- func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sgn_out
MPS: sgn_out_mps
SparseCPU, SparseCUDA: sgn_sparse_out
- SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sgn_sparse_csr_out
tags: pointwise
- func: chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
variants: method
@@ -470,11 +470,11 @@
- func: _conj_physical(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: _conj_physical
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr
autogen: _conj_physical.out
- func: conj_physical(Tensor self) -> Tensor
variants: function, method
tags: pointwise
@@ -482,18 +482,18 @@
- func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CPU, CUDA: conj_physical_out
MPS: conj_physical_out_mps
SparseCPU, SparseCUDA: conj_physical_out_sparse
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_out
tags: pointwise
- func: conj_physical_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
dispatch:
CompositeExplicitAutograd: conj_physical_
- SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: conj_physical_sparse_csr_
tags: pointwise
- func: resolve_conj(Tensor(a) self) -> Tensor(a)
variants: function, method
@@ -535,13 +535,15 @@
- func: arccos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
- func: avg_pool1d(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, bool ceil_mode=False, bool count_include_pad=True) -> Tensor
tags: core
+ autogen: avg_pool1d.out
- func: adaptive_avg_pool1d(Tensor self, int[1] output_size) -> Tensor
tags: core
+ autogen: adaptive_avg_pool1d.out
# Return: (Tensor output, Tensor indices)
- func: adaptive_max_pool1d(Tensor self, int[1] output_size) -> (Tensor, Tensor)
- func: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
@@ -692,11 +694,14 @@
- func: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: all.out
variants: function, method
+ dispatch:
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_all
+
- func: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: all.dims_out
variants: function, method
cpp_no_default_args: ['dim']
@@ -861,29 +866,29 @@
- func: asinh(Tensor self) -> Tensor
variants: function, method
structured_delegate: asinh.out
dispatch:
SparseCPU, SparseCUDA: asinh_sparse
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr
tags: [core, pointwise]
- func: asinh_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
structured_delegate: asinh.out
dispatch:
SparseCPU, SparseCUDA: asinh_sparse_
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_
tags: pointwise
- func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: asinh_out
MPS: asinh_out_mps
SparseCPU, SparseCUDA: asinh_sparse_out
- SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asinh_sparse_csr_out
tags: pointwise
# arcsinh, alias for asinh
- func: arcsinh(Tensor self) -> Tensor
variants: function, method
@@ -896,29 +901,29 @@
- func: atanh(Tensor self) -> Tensor
structured_delegate: atanh.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: atanh_sparse
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr
tags: [core, pointwise]
- func: atanh_(Tensor(a!) self) -> Tensor(a!)
structured_delegate: atanh.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: atanh_sparse_
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_
tags: pointwise
- func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: atanh_out
MPS: atanh_out_mps
SparseCPU, SparseCUDA: atanh_sparse_out
- SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atanh_sparse_csr_out
tags: pointwise
# arctanh, alias for atanh
- func: arctanh(Tensor self) -> Tensor
variants: function, method
@@ -952,31 +957,31 @@
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: asin.out
dispatch:
SparseCPU, SparseCUDA: asin_sparse
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr
tags: [core, pointwise]
- func: asin_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
structured_delegate: asin.out
dispatch:
SparseCPU, SparseCUDA: asin_sparse_
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_
tags: pointwise
- func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: asin_out
MPS: asin_out_mps
SparseCPU, SparseCUDA: asin_sparse_out
- SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: asin_sparse_csr_out
tags: pointwise
# arcsin, alias of asin
- func: arcsin(Tensor self) -> Tensor
variants: function, method
@@ -990,31 +995,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: atan.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: atan_sparse
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr
tags: [core, pointwise]
- func: atan_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: atan.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: atan_sparse_
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_
tags: pointwise
- func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: atan_out
MPS: atan_out_mps
SparseCPU, SparseCUDA: atan_sparse_out
- SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: atan_sparse_csr_out
tags: pointwise
# arctan, alias of atan
- func: arctan(Tensor self) -> Tensor
variants: function, method
@@ -1421,31 +1426,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: ceil.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: ceil_sparse
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr
tags: [core, pointwise]
- func: ceil_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: ceil.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: ceil_sparse_
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_
tags: pointwise
- func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: ceil_out
MPS: ceil_out_mps
SparseCPU, SparseCUDA: ceil_sparse_out
- SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: ceil_sparse_csr_out
tags: pointwise
# alias for torch.linalg.multi_dot
- func: chain_matmul(Tensor[] matrices) -> Tensor
variants: function
@@ -1760,11 +1765,11 @@
device_guard: False
dispatch:
MkldnnCPU: copy_mkldnn_
SparseCPU, SparseCUDA: copy_sparse_wrapper_
CompositeExplicitAutograd: copy_
- SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_
NestedTensorCPU, NestedTensorCUDA: copy_nested_
autogen: copy.out
- func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
dispatch:
@@ -2336,11 +2341,11 @@
autogen: _embedding_bag.out
tags: core
- func: _embedding_bag_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
dispatch:
- CompositeImplicitAutograd: _embedding_bag_backward_symint
+ CPU, CUDA: _embedding_bag_backward_symint
- func: _embedding_bag_sparse_backward(Tensor grad, Tensor indices, Tensor offsets, Tensor offset2bag, Tensor bag_size, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
dispatch:
CompositeImplicitAutograd: _embedding_bag_sparse_backward_symint
@@ -2368,12 +2373,14 @@
CPU: empty_cpu
CUDA: empty_cuda
MPS: empty_mps
Meta: empty_meta_symint
MkldnnCPU: empty_mkldnn
- SparseCPU, SparseCUDA, SparseMeta: empty_sparse
- SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_sparse_compressed
+ SparseCPU, SparseCUDA: empty_sparse
+ SparseMeta: empty_sparse_symint
+ SparseCsrCPU, SparseCsrCUDA: empty_sparse_compressed
+ SparseCsrMeta: empty_sparse_compressed_symint
QuantizedCPU, QuantizedCUDA, QuantizedMeta: empty_unknown_quantized
tags: core
- func: empty_permuted(SymInt[] size, int[] physical_layout, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
@@ -2444,11 +2451,11 @@
Meta: resize__symint
CPU: resize_
CUDA: resize_cuda_
MPS: resize_mps_
QuantizedCPU: quantized_resize_cpu_
- SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_sparse_csr_
autogen: resize, resize.out
# This is a utility function to enable users to resize out tensor while registering kernels for out variants.
# Eventually, we can consider exposing `resize_output` as a public API to ship it with python op registration
# to make it easy to register out variants for ops.
@@ -2495,31 +2502,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: erf.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: erf_sparse
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr
tags: [core, pointwise]
- func: erf_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: erf.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: erf_sparse_
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_
tags: pointwise
- func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: erf_out
MPS: erf_out_mps
SparseCPU, SparseCUDA: erf_sparse_out
- SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erf_sparse_csr_out
tags: pointwise
- func: erfc(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: erfc.out
@@ -2583,31 +2590,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: expm1.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: expm1_sparse
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr
tags: [core, pointwise]
- func: expm1_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: expm1.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: expm1_sparse_
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_
tags: pointwise
- func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: expm1_out
MPS: expm1_out_mps
SparseCPU, SparseCUDA: expm1_sparse_out
- SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: expm1_sparse_csr_out
tags: pointwise
- func: expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too.
device_check: NoCheck
@@ -2681,11 +2688,11 @@
dispatch:
CPU, CUDA: fill_
MPS: fill_scalar_mps
QuantizedCPU, QuantizedCUDA: fill_quantized_
Meta: fill_meta_
- SparseCsrCPU, SparseCsrCUDA: fill_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: fill_sparse_csr_
NestedTensorCPU, NestedTensorCUDA: fill_nested_
autogen: fill.Scalar_out
- func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
device_check: NoCheck # TensorIterator
@@ -2702,31 +2709,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: floor.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: floor_sparse
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr
tags: [core, pointwise]
- func: floor_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: floor.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: floor_sparse_
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_
tags: pointwise
- func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: floor_out
MPS: floor_out_mps
SparseCPU, SparseCUDA: floor_sparse_out
- SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: floor_sparse_csr_out
tags: pointwise
- func: floor_divide(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
@@ -2767,31 +2774,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: frac.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: frac_sparse
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr
tags: pointwise
- func: frac_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: frac.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: frac_sparse_
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_
tags: pointwise
- func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: frac_out
MPS: frac_out_mps
SparseCPU, SparseCUDA: frac_sparse_out
- SparseCsrCPU, SparseCsrCUDA: frac_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: frac_sparse_csr_out
tags: pointwise
- func: full.names(int[] size, Scalar fill_value, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
device_check: NoCheck
device_guard: False
@@ -3059,10 +3066,22 @@
- func: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
variants: function
dispatch:
CompositeExplicitAutograd: _unsafe_index
+# Used by inductor to generate masked loads
+# Note that we don't support boolean indexing, to avoid dynamic output shapes
+- func: _unsafe_masked_index(Tensor self, Tensor mask, Tensor?[] indices, Scalar fill) -> Tensor
+ variants: function
+ dispatch:
+ CompositeExplicitAutograd: _unsafe_masked_index
+
+- func: _unsafe_masked_index_put_accumulate(Tensor self, Tensor mask, Tensor?[] indices, Tensor values) -> Tensor
+ variants: function
+ dispatch:
+ CompositeExplicitAutograd: _unsafe_masked_index_put_accumulate
+
- func: index_copy.out(Tensor self, int dim, Tensor index, Tensor source, *, Tensor(a!) out) -> Tensor(a!)
structured: True
variants: function
precomputed:
- dim -> int dim
@@ -3159,11 +3178,11 @@
device_check: NoCheck
device_guard: False
dispatch:
CPU, CUDA, MPS: isnan
SparseCPU, SparseCUDA: isnan_sparse
- SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isnan_sparse_csr
autogen: isnan.out
tags: [core, pointwise]
- func: is_distributed(Tensor self) -> bool
variants: function, method
@@ -3379,10 +3398,14 @@
- func: fbgemm_linear_quantize_weight(Tensor input) -> (Tensor, Tensor, float, int)
- func: fbgemm_pack_gemm_matrix_fp16(Tensor input) -> Tensor
+- func: _wrapped_linear_prepack(Tensor weight, Tensor weight_scale, Tensor weight_zero_point, Tensor bias) -> Tensor
+
+- func: _wrapped_quantized_linear_prepacked(Tensor input, Tensor input_scale, Tensor input_zero_point, Tensor packed_weight, Tensor output_scale, Tensor output_zero_point, int out_channel) -> Tensor
+
- func: fbgemm_linear_fp16_weight_fp32_activation(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
- func: fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor
- func: fbgemm_pack_quantized_matrix(Tensor input) -> Tensor
@@ -3485,31 +3508,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: log1p.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: log1p_sparse
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr
tags: [core, pointwise]
- func: log1p_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: log1p.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: log1p_sparse_
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_
tags: pointwise
- func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: log1p_out
MPS: log1p_out_mps
SparseCPU, SparseCUDA: log1p_sparse_out
- SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: log1p_sparse_csr_out
tags: pointwise
- func: log2(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: log2.out
@@ -3897,15 +3920,14 @@
dispatch:
CompositeExplicitAutograd: mean
tags: core
# For normal naming convention this should be `mean.out`. However since we already have `mean.out` we have to rename this.
-# FIXME: fix CI jobs and re-enable this
-#- func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
-# device_check: NoCheck # TensorIterator
-# dispatch:
-# CompositeExplicitAutograd: mean_dtype_out
+- func: mean.dtype_out(Tensor self, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+ dispatch:
+ CompositeExplicitAutograd: mean_dtype_out
- func: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
structured_delegate: mean.out
device_check: NoCheck # TensorIterator
variants: function, method
@@ -4093,21 +4115,21 @@
- func: mm(Tensor self, Tensor mat2) -> Tensor
structured_delegate: mm.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: _sparse_mm
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm
tags: core
- func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
CPU: mm_out_cpu
CUDA: mm_out_cuda
MPS: mm_out_mps
SparseCPU, SparseCUDA: _sparse_mm_out
- SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: _sparse_csr_mm_out
- func: _int_mm(Tensor self, Tensor mat2) -> Tensor
dispatch:
CPU: _int_mm_cpu
CUDA: _int_mm_cuda
@@ -4119,10 +4141,11 @@
- func: _convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor
dispatch:
CPU: _convert_weight_to_int4pack_cpu
CUDA: _convert_weight_to_int4pack_cuda
+ MPS: _convert_weight_to_int4pack_mps
- func: _weight_int4pack_mm(Tensor self, Tensor mat2, int qGroupSize, Tensor qScaleAndZeros) -> Tensor
dispatch:
CPU: _weight_int4pack_mm_cpu
MPS: _weight_int4pack_mm_mps
@@ -4163,11 +4186,11 @@
device_check: NoCheck # TensorIterator
structured_delegate: mul.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: mul_sparse
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr
MkldnnCPU: mkldnn_mul
ZeroTensor: mul_zerotensor
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Tensor
tags: [core, pointwise]
@@ -4175,11 +4198,11 @@
device_check: NoCheck # TensorIterator
structured_delegate: mul.out
variants: method
dispatch:
SparseCPU, SparseCUDA: mul_sparse_
- SparseCsrCPU, SparseCsrCUDA: mul_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_
MkldnnCPU: mkldnn_mul_
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Tensor
tags: pointwise
- func: mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
@@ -4189,30 +4212,30 @@
dispatch:
CPU, CUDA: mul_out
MPS: mul_out_mps
SparseCPU: mul_out_sparse_cpu
SparseCUDA: mul_out_sparse_cuda
- SparseCsrCPU, SparseCsrCUDA: mul_out_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr
MkldnnCPU: mkldnn_mul_out
tags: pointwise
# For C++ only, until we have conversion from C++ numbers to Tensor
- func: mul.Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CompositeExplicitAutograd: mul
- SparseCsrCPU, SparseCsrCUDA: mul_scalar_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_scalar_sparse_csr
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul_Scalar
tags: [core, pointwise]
- func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
CompositeExplicitAutograd: mul_
- SparseCsrCPU, SparseCsrCUDA: mul__scalar_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul__scalar_sparse_csr
NestedTensorCPU, NestedTensorCUDA: NestedTensor_mul__Scalar
autogen: mul.Scalar_out
tags: pointwise
# multiply, alias for mul
@@ -4528,25 +4551,28 @@
CompositeImplicitAutograd: math_channel_shuffle
- func: is_pinned(Tensor self, Device? device=None) -> bool
variants: method
dispatch:
- NestedTensorCUDA, CUDA: is_pinned_cuda
- MPS: is_pinned_mps
- CompositeExplicitAutograd: is_pinned_default
+ # the NestedTensor keys are necessary because NestedTensor has been removed
+ # from the CompositeExplicitAutograd keyset see Note [NestedTensor Not Included in Backend Keys]
+ CompositeExplicitAutograd, NestedTensorCPU: is_pinned
+ SparseCsrCPU: is_pinned_sparse_compressed
+ SparseCPU: is_pinned_sparse_coo
# TODO: add a copy kwarg that guarantees that the tensor is put into fresh
# pinned memory
- func: pin_memory(Tensor(a) self, Device? device=None) -> Tensor(a)
variants: method
# Unlike pin_memory, this is guaranteed to give a new non-aliasing tensor
- func: _pin_memory(Tensor self, Device? device=None) -> Tensor
dispatch:
- CUDA: _pin_memory_cuda
- MPS: _pin_memory_mps
- NestedTensorCUDA, NestedTensorCPU: _pin_memory_nested
+ CompositeExplicitAutograd: _pin_memory
+ NestedTensorCPU: _pin_memory_nested
+ SparseCPU: _pin_memory_sparse_coo
+ SparseCsrCPU: _pin_memory_sparse_compressed
autogen: _pin_memory.out
- func: pinverse(Tensor self, float rcond=1e-15) -> Tensor
variants: function, method
@@ -4556,46 +4582,46 @@
- func: rad2deg(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: rad2deg
SparseCPU, SparseCUDA: rad2deg_sparse
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr
- func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
dispatch:
CompositeExplicitAutograd: rad2deg_
SparseCPU, SparseCUDA: rad2deg_sparse_
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_
- func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CompositeExplicitAutograd: rad2deg_out
SparseCPU, SparseCUDA: rad2deg_sparse_out
- SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: rad2deg_sparse_csr_out
- func: deg2rad(Tensor self) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: deg2rad
SparseCPU, SparseCUDA: deg2rad_sparse
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr
tags: pointwise
- func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
variants: function, method
dispatch:
CompositeExplicitAutograd: deg2rad_
SparseCPU, SparseCUDA: deg2rad_sparse_
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_
tags: pointwise
- func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
dispatch:
CompositeExplicitAutograd: deg2rad_out
SparseCPU, SparseCUDA: deg2rad_sparse_out
- SparseCsrCPU, SparseCsrCUDA: deg2rad_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: deg2rad_sparse_csr_out
tags: pointwise
- func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
dispatch:
CompositeExplicitAutograd: scalar_tensor
@@ -4809,21 +4835,21 @@
device_check: NoCheck # TensorIterator
structured_delegate: neg.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: neg_sparse
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg
tags: [core, pointwise]
- func: neg_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: neg.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: neg_sparse_
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_
NestedTensorCPU, NestedTensorCUDA: NestedTensor_neg_
tags: pointwise
- func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
@@ -4831,11 +4857,11 @@
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: neg_out
MPS: neg_out_mps
SparseCPU, SparseCUDA: neg_out_sparse
- SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: neg_sparse_csr_out
tags: pointwise
# Alias for neg
- func: negative(Tensor self) -> Tensor
variants: function, method
@@ -4915,20 +4941,20 @@
device_check: NoCheck # TensorIterator
structured_delegate: round.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: round_sparse
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr
tags: [core, pointwise]
- func: round_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: round.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: round_sparse_
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_
tags: pointwise
- func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
@@ -4936,11 +4962,11 @@
dispatch:
CPU: round_out
CUDA: round_out
MPS: round_out_mps
SparseCPU, SparseCUDA: round_sparse_out
- SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: round_sparse_csr_out
tags: pointwise
- func: round.decimals(Tensor self, *, int decimals) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: round.decimals_out
@@ -4979,11 +5005,11 @@
MkldnnCPU: mkldnn_relu
QuantizedCPU: relu_quantized_cpu
QuantizedCUDA: relu_quantized_cuda
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu
SparseCPU, SparseCUDA: relu_sparse
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr
tags: [core, pointwise]
- func: relu_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: function, method
@@ -4993,11 +5019,11 @@
MkldnnCPU: mkldnn_relu_
QuantizedCPU: relu_quantized_cpu_
QuantizedCUDA: relu_quantized_cuda_
NestedTensorCPU, NestedTensorCUDA: NestedTensor_relu_
SparseCPU, SparseCUDA: relu_sparse_
- SparseCsrCPU, SparseCsrCUDA: relu_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: relu_sparse_csr_
autogen: relu.out
tags: pointwise
- func: relu6(Tensor self) -> Tensor
python_module: nn
@@ -5126,11 +5152,11 @@
variants: function, method
device_check: NoCheck
device_guard: False
dispatch:
CompositeExplicitAutograd: select_symint
- SparseCsrCPU, SparseCsrCUDA: select_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_sparse_csr
NestedTensorCPU, NestedTensorCUDA: select_nested
tags: core
- func: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
variants: function
@@ -5275,32 +5301,32 @@
- func: sin(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: sin.out
variants: function, method
dispatch:
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr
SparseCPU, SparseCUDA: sin_sparse
NestedTensorCPU, NestedTensorCUDA: sin_nested
tags: [core, pointwise]
- func: sin_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sin.out
variants: function, method
dispatch:
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_
SparseCPU, SparseCUDA: sin_sparse_
tags: pointwise
- func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sin_out
MPS: sin_out_mps
- SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sin_sparse_csr_out
SparseCPU, SparseCUDA: sin_sparse_out
tags: pointwise
- func: sinc(Tensor self) -> Tensor
structured_delegate: sinc.out
@@ -5323,31 +5349,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: sinh.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: sinh_sparse
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr
tags: [core, pointwise]
- func: sinh_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sinh.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: sinh_sparse_
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_
tags: pointwise
- func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sinh_out
MPS: sinh_out_mps
SparseCPU, SparseCUDA: sinh_sparse_out
- SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sinh_sparse_csr_out
# Returns a copy of this `Variable` that is detached from its autograd graph.
# This method is OK to call if the `Variable` is a view.
#
# NOTE: Previously, if we change the tensor metadata (e.g. sizes / strides /
@@ -5730,11 +5756,11 @@
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
NestedTensorCPU: NestedTensor_sum_dim_CPU
SparseCPU, SparseCUDA: sum_sparse_coo
- SparseCsrCPU, SparseCsrCUDA: sum_sparse_compressed
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed
tags: core
- func: sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
@@ -5776,31 +5802,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: sqrt.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: sqrt_sparse
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr
tags: [core, pointwise]
- func: sqrt_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sqrt.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: sqrt_sparse_
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_
tags: pointwise
- func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sqrt_out
MPS: sqrt_out_mps
SparseCPU, SparseCUDA: sqrt_sparse_out
- SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sqrt_sparse_csr_out
tags: pointwise
- func: square(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
@@ -5934,53 +5960,53 @@
device_check: NoCheck # TensorIterator
structured_delegate: tan.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: tan_sparse
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr
tags: [core, pointwise]
- func: tan_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: tan.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: tan_sparse_
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_
tags: pointwise
- func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: tan_out
MPS: tan_out_mps
SparseCPU, SparseCUDA: tan_sparse_out
- SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tan_sparse_csr_out
tags: pointwise
- func: tanh(Tensor self) -> Tensor
device_check: NoCheck # TensorIterator
structured_delegate: tanh.out
variants: function, method
dispatch:
QuantizedCPU: tanh_quantized_cpu
MkldnnCPU: mkldnn_tanh
SparseCPU, SparseCUDA: tanh_sparse
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh
tags: [core, pointwise]
- func: tanh_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: tanh.out
variants: function, method
dispatch:
MkldnnCPU: mkldnn_tanh_
SparseCPU, SparseCUDA: tanh_sparse_
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_
NestedTensorCPU, NestedTensorCUDA: NestedTensor_tanh_
tags: pointwise
- func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
@@ -5988,11 +6014,11 @@
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: tanh_out
MPS: tanh_out_mps
SparseCPU, SparseCUDA: tanh_sparse_out
- SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: tanh_sparse_csr_out
tags: pointwise
- func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor
variants: function
@@ -6025,19 +6051,19 @@
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: threshold_backward_out
MPS: threshold_backward_out_mps
SparseCPU, SparseCUDA: threshold_backward_sparse_out
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed_out
- func: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
variants: function
structured_delegate: threshold_backward.grad_input
dispatch:
MkldnnCPU: mkldnn_relu_backward
SparseCPU, SparseCUDA: threshold_backward_sparse
- SparseCsrCPU, SparseCsrCUDA: threshold_backward_sparse_compressed
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: threshold_backward_sparse_compressed
NestedTensorCPU, NestedTensorCUDA: threshold_backwards_nested
tags: pointwise
- func: tile(Tensor self, SymInt[] dims) -> Tensor
variants: function, method
@@ -6183,16 +6209,16 @@
tags: view_copy
dispatch:
CompositeExplicitAutogradNonFunctional: _nested_view_from_buffer_copy
autogen: _nested_view_from_buffer_copy.out
-- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
+- func: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)
variants: function
device_check: NoCheck
dispatch: {}
-- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor
+- func: _nested_view_from_jagged_copy(Tensor self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor
variants: function
device_check: NoCheck
tags: view_copy
dispatch:
CompositeExplicitAutogradNonFunctional: _nested_view_from_jagged_copy
@@ -6225,10 +6251,20 @@
- func: _nested_get_ragged_idx(Tensor self) -> int
variants: function
device_check: NoCheck
dispatch: {}
+- func: _nested_get_min_seqlen(Tensor self) -> Tensor
+ variants: function
+ device_check: NoCheck
+ dispatch: {}
+
+- func: _nested_get_max_seqlen(Tensor self) -> Tensor
+ variants: function
+ device_check: NoCheck
+ dispatch: {}
+
- func: _nested_get_jagged_dummy(Tensor any) -> Tensor
category_override: dummy
dispatch: {}
- func: _nested_compute_contiguous_strides_offsets(Tensor nested_size) -> (Tensor, Tensor)
@@ -6249,31 +6285,31 @@
structured_delegate: trunc.out
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
SparseCPU, SparseCUDA: trunc_sparse
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr
tags: [core, pointwise]
- func: trunc_(Tensor(a!) self) -> Tensor(a!)
structured_delegate: trunc.out
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
SparseCPU, SparseCUDA: trunc_sparse_
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_
tags: pointwise
- func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
device_check: NoCheck # TensorIterator
dispatch:
CPU, CUDA: trunc_out
MPS: trunc_out_mps
SparseCPU, SparseCUDA: trunc_sparse_out
- SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: trunc_sparse_csr_out
tags: pointwise
# Alias for trunc
- func: fix(Tensor self) -> Tensor
variants: function, method
@@ -6441,10 +6477,11 @@
- func: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
dispatch:
CPU, CUDA, MPS: where
+ NestedTensorCPU, NestedTensorCUDA: NestedTensor_where
tags: [core, pointwise]
- func: where.self_out(Tensor condition, Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
dispatch:
@@ -6778,11 +6815,11 @@
- func: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
variants: function, method
dispatch:
CompositeExplicitAutograd: clone
SparseCPU, SparseCUDA: clone_sparse
- SparseCsrCPU, SparseCsrCUDA: clone_sparse_compressed
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: clone_sparse_compressed
MkldnnCPU: mkldnn_clone
QuantizedCPU, QuantizedCUDA: quantized_clone
NestedTensorCPU, NestedTensorCUDA: clone_nested
autogen: clone.out
tags: [core, pointwise]
@@ -6802,11 +6839,11 @@
- func: resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!)
use_const_ref_for_mutable_tensors: True
variants: function, method
dispatch:
SparseCPU, SparseCUDA: resize_as_sparse_
- SparseCsrCPU, SparseCsrCUDA: resize_as_sparse_compressed_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: resize_as_sparse_compressed_
autogen: resize_as_sparse, resize_as_sparse.out
- func: zero_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method, function
@@ -6960,11 +6997,11 @@
structured_delegate: addmm.out
variants: function, method
dispatch:
SparseCPU: addmm_sparse_dense_cpu
SparseCUDA: addmm_sparse_dense_cuda
- SparseCsrCPU, SparseCsrCUDA: addmm_sparse_compressed_dense
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: addmm_sparse_compressed_dense
tags: core
- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
structured_delegate: addmm.out
variants: method
@@ -6982,16 +7019,16 @@
- func: _addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor
structured_delegate: _addmm_activation.out
variants: function, method
-- func: _scaled_mm(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False) -> (Tensor, Tensor)
+- func: _scaled_mm(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False) -> Tensor
variants: function
dispatch:
CUDA: _scaled_mm_cuda
-- func: _scaled_mm.out(Tensor self, Tensor mat2, *, Tensor? bias=None, ScalarType? out_dtype=None, Tensor? scale_a=None, Tensor? scale_b=None, Tensor? scale_result=None, bool use_fast_accum=False, Tensor(a!) out, Tensor(b!) out_amax) -> (Tensor(a!), Tensor(b!))
+- func: _scaled_mm.out(Tensor self, Tensor mat2, Tensor scale_a, Tensor scale_b, Tensor? bias=None, Tensor? scale_result=None, ScalarType? out_dtype=None, bool use_fast_accum=False, *, Tensor(a!) out) -> Tensor(a!)
variants: function
dispatch:
CUDA: _scaled_mm_out_cuda
# NOTE [ Sparse: autograd and API ]
@@ -7182,11 +7219,11 @@
- func: sparse_mask(Tensor self, Tensor mask) -> Tensor
variants: method
dispatch:
SparseCPU, SparseCUDA: sparse_mask
- SparseCsrCPU, SparseCsrCUDA: sparse_mask_sparse_compressed
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_mask_sparse_compressed
autogen: sparse_mask.out
- func: _sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor
variants: method
dispatch:
@@ -7202,11 +7239,11 @@
# Special case of to_dense with custom derivative
- func: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
variants: method
dispatch:
SparseCPU, SparseCUDA: sparse_to_dense
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_dense
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_dense
MkldnnCPU: mkldnn_to_dense
autogen: _to_dense.out
- func: to_dense_backward(Tensor grad, Tensor input, bool? masked_grad=None) -> Tensor
@@ -7383,11 +7420,11 @@
- func: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
variants: method
dispatch:
CPU, CUDA: dense_to_sparse
SparseCPU, SparseCUDA: sparse_coo_to_sparse
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
autogen: _to_sparse.sparse_dim_out
- func: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
variants: method
@@ -7395,11 +7432,11 @@
- func: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
variants: method
dispatch:
CPU, CUDA: dense_to_sparse
SparseCPU, SparseCUDA: sparse_coo_to_sparse
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse
autogen: _to_sparse.out
- func: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
variants: method
@@ -7407,11 +7444,11 @@
- func: _to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
variants: method
dispatch:
CPU, CUDA: dense_to_sparse_csr
SparseCPU, SparseCUDA: coo_to_sparse_csr
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csr
autogen: _to_sparse_csr.out
- func: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
variants: method
@@ -7419,11 +7456,11 @@
- func: _to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
variants: method
dispatch:
CPU, CUDA: dense_to_sparse_csc
SparseCPU, SparseCUDA: coo_to_sparse_csc
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_csc
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_csc
autogen: _to_sparse_csc.out
- func: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
variants: method
@@ -7431,11 +7468,11 @@
- func: _to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
variants: method
dispatch:
CPU, CUDA: dense_to_sparse_bsr
SparseCPU, SparseCUDA: coo_to_sparse_bsr
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsr
autogen: _to_sparse_bsr.out
- func: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
variants: method
@@ -7443,11 +7480,11 @@
- func: _to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
variants: method
dispatch:
CPU, CUDA: dense_to_sparse_bsc
SparseCPU, SparseCUDA: coo_to_sparse_bsc
- SparseCsrCPU, SparseCsrCUDA: sparse_compressed_to_sparse_bsc
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sparse_compressed_to_sparse_bsc
autogen: _to_sparse_bsc.out
- func: _to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor)
variants: function
dispatch:
@@ -8429,33 +8466,33 @@
- func: __lshift__.Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
- CPU, CUDA: __lshift__
+ CPU, CUDA, MPS: __lshift__
tags: pointwise
- func: __lshift__.Tensor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
- CPU, CUDA: __lshift__
+ CPU, CUDA, MPS: __lshift__
tags: pointwise
- func: __ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
- CPU, CUDA: __ilshift__
+ CPU, CUDA, MPS: __ilshift__
autogen: __lshift__.Scalar_out
tags: pointwise
- func: __ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
- CPU, CUDA: __ilshift__
+ CPU, CUDA, MPS: __ilshift__
autogen: __lshift__.Tensor_out
tags: pointwise
- func: bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
@@ -8472,11 +8509,11 @@
- func: bitwise_left_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
- CPU, CUDA: bitwise_left_shift_out
+ CPU, CUDA, MPS: bitwise_left_shift_out
tags: pointwise
- func: bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
@@ -8508,32 +8545,32 @@
- func: __rshift__.Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
- CPU, CUDA: __rshift__
+ CPU, CUDA, MPS: __rshift__
tags: pointwise
- func: __rshift__.Tensor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
- CPU, CUDA: __rshift__
+ CPU, CUDA, MPS: __rshift__
tags: pointwise
- func: __irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
- CPU, CUDA: __irshift__
+ CPU, CUDA, MPS: __irshift__
autogen: __rshift__.Scalar_out
- func: __irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
device_check: NoCheck # TensorIterator
variants: method
dispatch:
- CPU, CUDA: __irshift__
+ CPU, CUDA, MPS: __irshift__
autogen: __rshift__.Tensor_out
- func: bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor
device_check: NoCheck # TensorIterator
variants: function, method
@@ -8549,11 +8586,11 @@
- func: bitwise_right_shift.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
- CPU, CUDA: bitwise_right_shift_out
+ CPU, CUDA, MPS: bitwise_right_shift_out
tags: pointwise
- func: bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
@@ -8856,10 +8893,11 @@
structured_delegate: eq.Tensor_out
device_check: NoCheck # TensorIterator
variants: method, function
dispatch:
QuantizedCPU: eq_quantized_cpu
+ NestedTensorCPU, NestedTensorCUDA: eq_tensor_nested
tags: [core, pointwise]
- func: ge.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
@@ -9500,31 +9538,31 @@
device_check: NoCheck # TensorIterator
structured_delegate: erfinv.out
variants: method, function
dispatch:
SparseCPU, SparseCUDA: erfinv_sparse
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr
tags: pointwise
- func: erfinv_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: erfinv.out
variants: method
dispatch:
SparseCPU, SparseCUDA: erfinv_sparse_
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_
tags: pointwise
- func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: erfinv_out
MPS: erfinv_out_mps
SparseCPU, SparseCUDA: erfinv_sparse_out
- SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: erfinv_sparse_csr_out
tags: pointwise
- func: i0(Tensor self) -> Tensor
structured_delegate: i0.out
variants: function, method
@@ -9546,50 +9584,50 @@
device_check: NoCheck # TensorIterator
structured_delegate: sign.out
variants: function, method
dispatch:
SparseCPU, SparseCUDA: sign_sparse
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr
tags: [core, pointwise]
- func: sign_(Tensor(a!) self) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured_delegate: sign.out
variants: method
dispatch:
SparseCPU, SparseCUDA: sign_sparse_
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_
tags: pointwise
- func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
device_check: NoCheck # TensorIterator
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: sign_out
MPS: sign_out_mps
SparseCPU, SparseCUDA: sign_sparse_out
- SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sign_sparse_csr_out
tags: pointwise
- func: signbit(Tensor self) -> Tensor
variants: function, method
structured_delegate: signbit.out
dispatch:
SparseCPU, SparseCUDA: signbit_sparse
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr
tags: pointwise
- func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU: signbit_out
CUDA: signbit_out
MPS: signbit_out_mps
SparseCPU, SparseCUDA: signbit_sparse_out
- SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: signbit_sparse_csr_out
tags: pointwise
- func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
@@ -10036,14 +10074,15 @@
variants: method, function
- func: argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor
device_check: NoCheck # TensorIterator
variants: method, function
- dispatch:
- CPU, CUDA, MPS: argsort_stable
- autogen: argsort.stable_out
+- func: argsort.stable_out(Tensor self, *, bool stable, int dim=-1, bool descending=False, Tensor(a!) out) -> Tensor(a!)
+ device_check: NoCheck # TensorIterator
+ variants: function
+
- func: argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor
variants: method, function
- func: topk.values(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
structured: True
@@ -10218,11 +10257,11 @@
variants: method
dispatch:
CPU, CUDA: normal_
MPS: normal_mps_
Meta: normal_meta_
- SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: normal_sparse_csr_
NestedTensorCPU, NestedTensorCUDA: normal_nested_
autogen: normal.out
# Only used by the functionalization pass.
# Normally, the codegen would be able to generate a normal() NativeFunction,
@@ -13022,11 +13061,11 @@
device_guard: False
dispatch:
CompositeExplicitAutograd: isinf
SparseCPU, SparseCUDA: isinf_sparse
SparseMeta: isinf_sparse_meta
- SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isinf_sparse_csr
autogen: isinf.out
tags: [core, pointwise]
- func: record_stream(Tensor(a!) self, Stream s) -> ()
variants: method
@@ -13036,37 +13075,37 @@
- func: isposinf(Tensor self) -> Tensor
variants: function, method
structured_delegate: isposinf.out
dispatch:
SparseCPU, SparseCUDA: isposinf_sparse
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr
tags: pointwise
- func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: isposinf_out
SparseCPU, SparseCUDA: isposinf_sparse_out
- SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isposinf_sparse_csr_out
tags: pointwise
- func: isneginf(Tensor self) -> Tensor
variants: function, method
structured_delegate: isneginf.out
dispatch:
SparseCPU, SparseCUDA: isneginf_sparse
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr
tags: pointwise
- func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
structured: True
structured_inherits: TensorIteratorBase
dispatch:
CPU, CUDA: isneginf_out
SparseCPU, SparseCUDA: isneginf_sparse_out
- SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr_out
tags: pointwise
# NOTE [_add_batch_dim and _remove_batch_dim]
# _add_batch_dim and _remove_batch_dim are meant to be used in the implementation
# of the vmap frontend API (see torch/_vmap_internals.py). They are not
@@ -13785,14 +13824,20 @@
# linalg.lu_factor
- func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots)
python_module: linalg
variants: function
+ dispatch:
+ CompositeImplicitAutograd: linalg_lu_factor
+ MPS: linalg_lu_factor_mps
- func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots)
python_module: linalg
variants: function
+ dispatch:
+ CompositeImplicitAutograd: linalg_lu_factor_out
+ MPS: linalg_lu_factor_out_mps
- func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
python_module: linalg
structured_delegate: linalg_lu_factor_ex.out
variants: function
@@ -14174,10 +14219,15 @@
python_module: linalg
- func: linalg_solve(Tensor A, Tensor B, *, bool left=True) -> Tensor
python_module: linalg
+- func: _spsolve(Tensor A, Tensor B, *, bool left=True) -> Tensor
+ python_module: sparse
+ dispatch:
+ SparseCsrCUDA: _sparse_csr_linear_solve
+
- func: linalg_solve.out(Tensor A, Tensor B, *, bool left=True, Tensor(a!) out) -> Tensor(a!)
python_module: linalg
- func: linalg_tensorinv(Tensor self, int ind=2) -> Tensor
python_module: linalg
@@ -14350,11 +14400,11 @@
variants: function
dispatch:
CPU, CUDA: _segment_reduce_backward_kernel
autogen: _segment_reduce_backward.out
-- func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0) -> Tensor
+- func: pad_sequence(Tensor[] sequences, bool batch_first=False, float padding_value=0.0, str padding_side="right") -> Tensor
python_module: nn
variants: function
- func: flatten_dense_tensors(Tensor[] tensors) -> Tensor
variants: function
@@ -14456,11 +14506,11 @@
- func: select_copy.int(Tensor self, int dim, SymInt index) -> Tensor
variants: function
dispatch:
CompositeExplicitAutogradNonFunctional: select_copy_symint
- SparseCsrCPU, SparseCsrCUDA: select_copy_sparse_csr
+ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: select_copy_sparse_csr
tags: view_copy
autogen: select_copy.int_out
- func: detach_copy(Tensor self) -> Tensor
variants: function
@@ -14646,22 +14696,29 @@
- func: _jagged_to_padded_dense_forward(Tensor values, Tensor[] offsets, SymInt[] max_lengths, float padding_value=0.0) -> Tensor
variants: function
dispatch:
CUDA: _fbgemm_jagged_to_padded_dense_forward
+ CPU: _jagged_to_padded_dense_forward_cpu
- func: _padded_dense_to_jagged_forward(Tensor dense, Tensor[] offsets, SymInt? total_L=None) -> Tensor
variants: function
dispatch:
CUDA: _fbgemm_dense_to_jagged_forward_symint
+ CPU: _padded_dense_to_jagged_forward_cpu
- func: _nested_tensor_softmax_with_shape(Tensor self, Tensor query) -> Tensor
dispatch:
NestedTensorCPU: NestedTensor_softmax_dropout
NestedTensorCUDA: NestedTensor_softmax_dropout_cuda
tags: nondeterministic_seeded
+- func: _safe_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
+ dispatch:
+ CompositeExplicitAutograd: _safe_softmax
+ NestedTensorCPU, NestedTensorCUDA: _safe_softmax
+
# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
variants: function
dispatch:
CPU, CUDA, NestedTensorCPU, NestedTensorCUDA: transformer_encoder_layer_forward
@@ -14672,28 +14729,33 @@
dispatch:
CPU, NestedTensorCPU: native_multi_head_attention_cpu
CUDA, NestedTensorCUDA: native_multi_head_attention_cuda
autogen: _native_multi_head_attention.out
-- func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> Tensor
+- func: scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> Tensor
python_module: nn
variants: function
autogen: scaled_dot_product_attention.out
tags: nondeterministic_seeded
# This aten function is kept so that we can test the choice function from Python
-- func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> int
+- func: _fused_sdp_choice(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, *, float? scale=None, bool enable_gqa=False) -> int
dispatch:
Meta: _fused_sdp_choice_meta
CPU, NestedTensorCPU: _fused_sdp_choice_cpp
CUDA, NestedTensorCUDA: _fused_sdp_choice_cuda
tags: nondeterministic_seeded
-- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
+- func: _scaled_dot_product_attention_math(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None, bool enable_gqa=False) -> (Tensor, Tensor)
variants: function
tags: nondeterministic_seeded
+- func: _scaled_dot_product_attention_math_for_mps(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float dropout_p=0.0, bool is_causal=False, Tensor? dropout_mask=None, *, float? scale=None) -> (Tensor, Tensor)
+ dispatch:
+ MPS: _scaled_dot_product_attention_math_mps
+ tags: nondeterministic_seeded
+
- func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
dispatch:
CUDA: _scaled_dot_product_flash_attention_cuda
NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
tags: nondeterministic_seeded
@@ -14701,10 +14763,15 @@
- func: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
dispatch:
CPU: _scaled_dot_product_flash_attention_cpu
tags: nondeterministic_seeded
+- func: _scaled_dot_product_fused_attention_overrideable(Tensor query, Tensor key, Tensor value, Tensor? attn_bias=None, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+ dispatch:
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable
+ tags: nondeterministic_seeded
+
- func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
device_check: NoCheck
variants: function
dispatch:
CUDA: _scaled_dot_product_flash_attention_backward_cuda
@@ -14714,10 +14781,16 @@
device_check: NoCheck
variants: function
dispatch:
CPU: _scaled_dot_product_flash_attention_cpu_backward
+- func: _scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)
+ device_check: NoCheck
+ variants: function
+ dispatch:
+ CompositeExplicitAutograd: _scaled_dot_product_fused_attention_overrideable_backward
+
- func: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
dispatch:
CUDA: _scaled_dot_product_efficient_attention_cuda
NestedTensorCUDA: _scaled_dot_product_efficient_attention_nestedtensor_cuda
tags: nondeterministic_seeded
@@ -14726,16 +14799,16 @@
device_check: NoCheck
dispatch:
CUDA: _scaled_dot_product_efficient_attention_backward_cuda
tags: nondeterministic_seeded
-- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+- func: _scaled_dot_product_cudnn_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
dispatch:
CUDA: _scaled_dot_product_cudnn_attention_cuda
tags: nondeterministic_seeded
-- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
+- func: _scaled_dot_product_cudnn_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor attn_bias, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, *, float? scale=None) -> (Tensor, Tensor, Tensor)
dispatch:
CUDA: _scaled_dot_product_cudnn_attention_backward_cuda
tags: nondeterministic_seeded
- func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None, SymInt? window_size_left=None, SymInt? window_size_right=None, Tensor? seqused_k=None, Tensor? alibi_slopes=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
@@ -15561,55 +15634,61 @@
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
variants: function
dispatch:
CPU: _fused_adam_kernel_cpu_
CUDA: _fused_adam_kernel_cuda_
+ MPS: _fused_adam_kernel_mps_
autogen: _fused_adam, _fused_adam.out
- func: _fused_adam_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now),
# but still skip the device check as the Tensor LR can be on CPU
device_check: NoCheck
variants: function
dispatch:
CPU: _fused_adam_kernel_cpu_
CUDA: _fused_adam_kernel_cuda_
+ MPS: _fused_adam_kernel_mps_
autogen: _fused_adam.tensor_lr, _fused_adam.tensor_lr_out
- func: _fused_adamw_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, float lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
variants: function
dispatch:
CPU: _fused_adamw_kernel_cpu_
CUDA: _fused_adamw_kernel_cuda_
+ MPS: _fused_adamw_kernel_mps_
autogen: _fused_adamw, _fused_adamw.out
- func: _fused_adamw_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] exp_avgs, Tensor(d!)[] exp_avg_sqs, Tensor(e!)[] max_exp_avg_sqs, Tensor[] state_steps, *, Tensor lr, float beta1, float beta2, float weight_decay, float eps, bool amsgrad, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now),
# but still skip the device check as the Tensor LR can be on CPU
device_check: NoCheck
variants: function
dispatch:
CPU: _fused_adamw_kernel_cpu_
CUDA: _fused_adamw_kernel_cuda_
+ MPS: _fused_adamw_kernel_mps_
autogen: _fused_adamw.tensor_lr, _fused_adamw.tensor_lr_out
- func: _fused_sgd_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, float lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
variants: function
dispatch:
CPU: _fused_sgd_kernel_cpu_
CUDA: _fused_sgd_kernel_cuda_
+ MPS: _fused_sgd_kernel_mps_
autogen: _fused_sgd, _fused_sgd.out
- func: _fused_sgd_.tensor_lr(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] momentum_buffer_list, *, float weight_decay, float momentum, Tensor lr, float dampening, bool nesterov, bool maximize, bool is_first_step, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
# Unlike "foreach" functions, lists of tensors should be guaranteed to be on the same device (for now).
# but still skip the device check as the Tensor LR can be on CPU
device_check: NoCheck
variants: function
dispatch:
CPU: _fused_sgd_kernel_cpu_
CUDA: _fused_sgd_kernel_cuda_
+ MPS: _fused_sgd_kernel_mps_
autogen: _fused_sgd.tensor_lr, _fused_sgd.tensor_lr_out
- func: _fused_adagrad_(Tensor(a!)[] self, Tensor(b!)[] grads, Tensor(c!)[] state_sums, Tensor(d!)[] state_steps, *, float lr, float lr_decay, float weight_decay, float eps, bool maximize, Tensor? grad_scale=None, Tensor? found_inf=None) -> ()
variants: function
dispatch: