codegen/native_functions.yaml in torch-rb-0.9.2 vs codegen/native_functions.yaml in torch-rb-0.10.0

- old
+ new

@@ -98,14 +98,53 @@ dispatch: CompositeExplicitAutograd: _fw_primal - func: _make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a) variants: function + dispatch: + CompositeExplicitAutograd: _make_dual - func: _unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent) variants: function +# NOTE: [_new_zeros_with_same_feature_meta] +# This function creates a new tensor with the layout and TensorOptions +# of `other` but also takes into account the batch dimensions of `self` +# +# This function has a couple extra constraints because it is also used for `jvp` +# in functorch. +# - is used for forward AD because there is the restriction +# that the primal and tangent must have the same layout +# - We cannot assume that `self` and `other` have the same sizes or even dim +# because in the inplace over view case, `other` is the base tensor, and +# `self` is the forward grad with respect to the view, which can have an +# entirely different shape +# - takes the number of batch dims for `self` because we also handle +# some batching logic. We handle that here instead of a batching rule because +# we'd like to avoid calling as_strided in the batching rule (as to enable +# nested vmap in functorch). +# - needs to be CompositeExplicitAutograd for jvp support in functorch. +# functorch currently relies on TensorWrapper which does not have storage +# CompositeExplicitAutograd makes sure the TensorWrapper is unwrapped. +# - this function may eventually take on another int argument to store the +# the number of batch dims for other once we support that use case +- func: _new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor + variants: function + dispatch: + CompositeExplicitAutograd: _new_zeros_with_same_feature_meta + +# This function compares the storage numel of self with that of other, where +# storage numel is cumputed as: `other.storage().nbytes() / other.itemsize()`. +# We create this function for composite compliance purposes. The batching rule +# always returns true because vmapped as_strided does not support accessing +# storage locations not indexable by the input tensor. +# See the note above for more information. +- func: _has_same_storage_numel(Tensor self, Tensor other) -> bool + variants: function + dispatch: + CompositeExplicitAutograd: _has_same_storage_numel + - func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!) variants: method - func: rename(Tensor(a) self, Dimname[]? names) -> Tensor(a) variants: method @@ -174,10 +213,21 @@ - func: _masked_scale(Tensor self, Tensor mask, float scale) -> Tensor variants: function dispatch: CUDA: masked_scale_cuda +- func: native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor) + variants: function + dispatch: + CPU: native_dropout_cpu + CUDA: native_dropout_cuda + +- func: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor + dispatch: + CPU: native_dropout_backward_cpu + CUDA: native_dropout_backward_cuda + - func: _sobol_engine_draw(Tensor quasi, int n, Tensor sobolstate, int dimension, int num_generated, ScalarType? dtype) -> (Tensor, Tensor) - func: _sobol_engine_ff_(Tensor(a!) self, int n, Tensor sobolstate, int dimension, int num_generated) -> Tensor(a!) - func: _sobol_engine_scramble_(Tensor(a!) self, Tensor ltm, int dimension) -> Tensor(a!) @@ -207,21 +257,27 @@ - func: abs(Tensor self) -> Tensor device_check: NoCheck # TensorIterator variants: function, method dispatch: CompositeExplicitAutograd: abs + SparseCPU, SparseCUDA: abs_sparse + SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr - func: abs_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: function, method dispatch: CompositeExplicitAutograd: abs_ + SparseCPU, SparseCUDA: abs_sparse_ + SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_ - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: abs_out + SparseCPU, SparseCUDA: abs_sparse_out + SparseCsrCPU, SparseCsrCUDA: abs_sparse_csr_out # Note [Adding an alias] # To add an alias do the following: # # 1) Copy the original functions native_functions.yaml entry, but replace the @@ -229,22 +285,19 @@ # keys for the aliases. Specifying a dispatch key will prevent # autograd from recording the operations the alias performs, which # will stop it from "inheriting" the original operation's autograd behavior. # 2) Implement the corresponding functions and have them redispatch to the # original function. -# 3) Add entries for the alias (and original function, if needed) to -# aten/src/ATen/core/interned_strings.h -# (This may require removing an entry from ATen/core/aten_interned_strings.h.) -# 4) Add docstrings to the new function that reference the original function, +# 3) Add docstrings to the new function that reference the original function, # and document the method as usual (if it exists.) # (See torch/_torch_docs.py and docs/source/torch.rst if adding a function, # torch/_tensor_docs.py and docs/source/tensors.rst if adding a method, # or module-specific doc bindings (like torch/linalg/__init__.py) if # adding an alias in a namespace.) -# 5) Update torch/overrides.py consistent with the original function. -# 6) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp. -# 7) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry +# 4) Update torch/overrides.py consistent with the original function. +# 5) Update the alias_map in torch/csrc/jit/passes/normalize_ops.cpp. +# 6) Add aliases argument to existing OpInfo/UnaryUfuncInfo or create new OpInfo/UnaryUfuncInfo entry # in op_db list in torch/testing/_internal/common_methods_invocations.py # # See torch.absolute, an alias for torch.abs, as an example. # Absolute, alias for abs @@ -262,15 +315,17 @@ - func: angle(Tensor self) -> Tensor device_check: NoCheck # TensorIterator variants: function, method dispatch: CPU, CUDA: angle + SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr - func: angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: angle_out + SparseCsrCPU, SparseCsrCUDA: angle_sparse_csr_out - func: view_as_real(Tensor(a) self) -> Tensor(a) variants: function dispatch: CPU, CUDA: view_as_real @@ -281,20 +336,28 @@ CPU, CUDA: view_as_complex - func: sgn(Tensor self) -> Tensor variants: function, method structured_delegate: sgn.out + dispatch: + SparseCPU, SparseCUDA: sgn_sparse + SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr - func: sgn_(Tensor(a!) self) -> Tensor(a!) variants: method structured_delegate: sgn.out + dispatch: + SparseCPU, SparseCUDA: sgn_sparse_ + SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_ - func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: sgn_out + SparseCPU, SparseCUDA: sgn_sparse_out + SparseCsrCPU, SparseCsrCUDA: sgn_sparse_csr_out - func: real(Tensor(a) self) -> Tensor(a) device_check: NoCheck # TensorIterator variants: function @@ -313,23 +376,26 @@ - func: _conj_physical(Tensor self) -> Tensor variants: function, method dispatch: CompositeExplicitAutograd: _conj_physical + SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr - func: conj_physical(Tensor self) -> Tensor variants: function, method - func: conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) dispatch: CPU, CUDA: conj_physical_out SparseCPU, SparseCUDA: conj_physical_out_sparse + SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_out - func: conj_physical_(Tensor(a!) self) -> Tensor(a!) variants: function, method dispatch: CompositeExplicitAutograd: conj_physical_ + SparseCsrCPU, SparseCsrCUDA: conj_physical_sparse_csr_ - func: resolve_conj(Tensor(a) self) -> Tensor(a) variants: function, method - func: resolve_neg(Tensor(a) self) -> Tensor(a) @@ -379,10 +445,11 @@ variants: function, method dispatch: SparseCPU, SparseCUDA: add_sparse SparseCsrCPU, SparseCsrCUDA: add_sparse_csr MkldnnCPU: mkldnn_add + ZeroTensor: add_zerotensor - func: add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method structured_delegate: add.out @@ -452,10 +519,12 @@ - func: addmv.out(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) structured: True dispatch: CPU: addmv_out_cpu CUDA: addmv_out_cuda + SparseCsrCPU: addmv_out_sparse_csr + SparseCsrCUDA: addmv_out_sparse_csr_cuda - func: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor variants: function, method dispatch: CPU, CUDA: addr @@ -530,11 +599,11 @@ - func: arange.out(Scalar end, *, Tensor(a!) out) -> Tensor(a!) - func: arange.start_out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!) dispatch: - CPU: arange_cpu_out + CPU, Meta: arange_out CUDA: arange_cuda_out # This function is a temporary hack to allow tracing of arange like constructs with dynamic # bounds on arange. Normal arange is not traceable because it does not take any tensor inputs; # if the range you need is based on another tensor, calling this function directly will @@ -586,20 +655,28 @@ - func: arccosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) - func: asinh(Tensor self) -> Tensor variants: function, method structured_delegate: asinh.out + dispatch: + SparseCPU, SparseCUDA: asinh_sparse + SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr - func: asinh_(Tensor(a!) self) -> Tensor(a!) variants: function, method structured_delegate: asinh.out + dispatch: + SparseCPU, SparseCUDA: asinh_sparse_ + SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_ - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: asinh_out + SparseCPU, SparseCUDA: asinh_sparse_out + SparseCsrCPU, SparseCsrCUDA: asinh_sparse_csr_out # arcsinh, alias for asinh - func: arcsinh(Tensor self) -> Tensor variants: function, method @@ -609,20 +686,29 @@ - func: arcsinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) - func: atanh(Tensor self) -> Tensor structured_delegate: atanh.out variants: function, method + dispatch: + CompositeExplicitAutograd: atanh + SparseCPU, SparseCUDA: atanh_sparse + SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr - func: atanh_(Tensor(a!) self) -> Tensor(a!) structured_delegate: atanh.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: atanh_sparse_ + SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_ - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: atanh_out + SparseCPU, SparseCUDA: atanh_sparse_out + SparseCsrCPU, SparseCsrCUDA: atanh_sparse_csr_out # arctanh, alias for atanh - func: arctanh(Tensor self) -> Tensor variants: function, method @@ -632,44 +718,48 @@ - func: arctanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) - func: as_strided(Tensor(a) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a) variants: function, method dispatch: - CPU, CUDA, Meta: as_strided_tensorimpl + ZeroTensor, CPU, CUDA, Meta: as_strided_tensorimpl QuantizedCPU, QuantizedCUDA: as_strided_qtensorimpl device_check: NoCheck device_guard: False - func: as_strided_(Tensor(a!) self, int[] size, int[] stride, int? storage_offset=None) -> Tensor(a!) use_const_ref_for_mutable_tensors: True variants: function, method device_check: NoCheck device_guard: False + tags: inplace_view dispatch: CompositeExplicitAutograd: as_strided_ - func: asin(Tensor self) -> Tensor device_check: NoCheck # TensorIterator variants: function, method structured_delegate: asin.out dispatch: SparseCPU, SparseCUDA: asin_sparse + SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr - func: asin_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: function, method structured_delegate: asin.out dispatch: SparseCPU, SparseCUDA: asin_sparse_ + SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_ - func: asin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: asin_out - SparseCPU, SparseCUDA: asin_out_sparse + SparseCPU, SparseCUDA: asin_sparse_out + SparseCsrCPU, SparseCsrCUDA: asin_sparse_csr_out # arcsin, alias of asin - func: arcsin(Tensor self) -> Tensor variants: function, method @@ -680,22 +770,30 @@ - func: atan(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: atan.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: atan_sparse + SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr - func: atan_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: atan.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: atan_sparse_ + SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_ - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: atan_out + SparseCPU, SparseCUDA: atan_sparse_out + SparseCsrCPU, SparseCsrCUDA: atan_sparse_csr_out # arctan, alias of atan - func: arctan(Tensor self) -> Tensor variants: function, method @@ -721,28 +819,23 @@ - func: atleast_3d.Sequence(Tensor[] tensors) -> Tensor[] variants: function - func: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor variants: function, method - dispatch: - CPU: baddbmm_cpu - CUDA: baddbmm_cuda + structured_delegate: baddbmm.out - func: baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) variants: method - dispatch: - CPU: baddbmm__cpu - CUDA: baddbmm__cuda + structured_delegate: baddbmm.out -- func: _baddbmm_mkl_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) - variants: function - - func: baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) + structured: True variants: function dispatch: CPU: baddbmm_out_cpu CUDA: baddbmm_out_cuda + SparseCsrCUDA: baddbmm_out_sparse_csr_cuda - func: bartlett_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor - func: bartlett_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -786,11 +879,11 @@ # with `bernoulli(Tensor self, *, Generator? generator=None)` declaration. - func: bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor device_check: NoCheck # TensorIterator variants: function, method -- func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias) -> Tensor +- func: bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor - func: binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor device_check: NoCheck # TensorIterator python_module: nn variants: function @@ -884,53 +977,69 @@ CompositeExplicitAutograd: copysign_out - func: logical_not(Tensor self) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: logical_not - func: logical_not_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method + dispatch: + CompositeExplicitAutograd: logical_not_ - func: logical_not.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: logical_not_out - func: logical_xor(Tensor self, Tensor other) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: logical_xor - func: logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method + dispatch: + CompositeExplicitAutograd: logical_xor_ - func: logical_xor.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: logical_xor_out - func: logical_and(Tensor self, Tensor other) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: logical_and - func: logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method + dispatch: + CompositeExplicitAutograd: logical_and_ - func: logical_and.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: logical_and_out - func: logical_or(Tensor self, Tensor other) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: logical_or - func: logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method + dispatch: + CompositeExplicitAutograd: logical_or_ - func: logical_or.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: logical_or_out @@ -938,32 +1047,38 @@ - func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor - func: blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor - func: bmm(Tensor self, Tensor mat2) -> Tensor + structured_delegate: bmm.out variants: function, method dispatch: - CPU: bmm_cpu - CUDA: bmm_cuda SparseCPU: bmm_sparse_cpu SparseCUDA: bmm_sparse_cuda - func: bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) + structured: True variants: function dispatch: CPU: bmm_out_cpu CUDA: bmm_out_cuda SparseCPU: bmm_out_sparse_cpu SparseCUDA: bmm_out_sparse_cuda + SparseCsrCUDA: bmm_out_sparse_csr_cuda - func: broadcast_tensors(Tensor[] tensors) -> Tensor[] device_check: NoCheck device_guard: False - func: broadcast_to(Tensor(a) self, int[] size) -> Tensor(a) variants: function, method +- func: _sparse_broadcast_to(Tensor(a) self, int[] size) -> Tensor(a) + variants: function + dispatch: + SparseCPU, SparseCUDA: sparse_broadcast_to + - func: cat(Tensor[] tensors, int dim=0) -> Tensor dispatch: CompositeExplicitAutograd: cat - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) @@ -990,24 +1105,30 @@ device_check: NoCheck # TensorIterator structured_delegate: ceil.out variants: function, method dispatch: CompositeExplicitAutograd: ceil + SparseCPU, SparseCUDA: ceil_sparse + SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr - func: ceil_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: ceil.out variants: function, method dispatch: CompositeExplicitAutograd: ceil_ + SparseCPU, SparseCUDA: ceil_sparse_ + SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_ - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: ceil_out + SparseCPU, SparseCUDA: ceil_sparse_out + SparseCsrCPU, SparseCsrCUDA: ceil_sparse_csr_out # alias for torch.linalg.multi_dot - func: chain_matmul(Tensor[] matrices) -> Tensor variants: function @@ -1017,22 +1138,22 @@ - func: unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[] variants: function, method device_check: NoCheck device_guard: False -- func: chunk(Tensor(a) self, int chunks, int dim=0) -> Tensor(a)[] +- func: chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[] variants: function, method device_check: NoCheck device_guard: False -- func: tensor_split.sections(Tensor(a) self, int sections, int dim=0) -> Tensor(a)[] +- func: tensor_split.sections(Tensor(a -> *) self, int sections, int dim=0) -> Tensor(a)[] variants: function, method -- func: tensor_split.indices(Tensor(a) self, int[] indices, int dim=0) -> Tensor(a)[] +- func: tensor_split.indices(Tensor(a -> *) self, int[] indices, int dim=0) -> Tensor(a)[] variants: function, method -- func: tensor_split.tensor_indices_or_sections(Tensor(a) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[] +- func: tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[] variants: function, method - func: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor device_check: NoCheck # TensorIterator variants: function, method @@ -1184,29 +1305,35 @@ - func: contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a) variants: method manual_cpp_binding: True - func: convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor + dispatch: + CompositeExplicitAutograd: convolution +- func: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, int[]? bias_sizes, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) + dispatch: + CompositeExplicitAutograd, CUDA: convolution_backward + - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor dispatch: CompositeExplicitAutograd: convolution_overrideable - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) dispatch: CompositeExplicitAutograd: convolution_backward_overrideable - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor + dispatch: + CompositeExplicitAutograd: _convolution - func: _convolution.deprecated(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled) -> Tensor - func: _convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] dilation, int groups) -> Tensor -- func: _convolution_nogroup(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding) -> Tensor +- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) -- func: _convolution_double_backward(Tensor? ggI, Tensor? ggW, Tensor? ggb, Tensor gO, Tensor weight, Tensor self, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, bool[3] output_mask) -> (Tensor, Tensor, Tensor) - - func: conv1d(Tensor input, Tensor weight, Tensor? bias=None, int[1] stride=1, int[1] padding=0, int[1] dilation=1, int groups=1) -> Tensor - func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor - func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor @@ -1237,11 +1364,13 @@ variants: method device_check: NoCheck device_guard: False dispatch: MkldnnCPU: copy_mkldnn_ + SparseCPU, SparseCUDA, SparseHIP: copy_sparse_wrapper_ CompositeExplicitAutograd: copy_ + SparseCsrCPU, SparseCsrCUDA: copy_sparse_csr_ - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor dispatch: {} # We need this to be able to properly copy from a CPU to an XLA tensor with different sizes. @@ -1318,60 +1447,18 @@ # NB: You can only use this if you used cudnn_batch_norm training=True - func: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor) dispatch: CUDA: cudnn_batch_norm_backward -- func: cudnn_convolution.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: cudnn_convolution_deprecated - -- func: cudnn_convolution.deprecated2(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: cudnn_convolution_deprecated2 - - func: cudnn_convolution(Tensor self, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor dispatch: CUDA: cudnn_convolution -- func: cudnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor - dispatch: - CUDA: cudnn_convolution_backward_input - -- func: cudnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor) - dispatch: - CUDA: cudnn_convolution_backward - -- func: cudnn_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor - dispatch: - CUDA: cudnn_convolution_backward_weight - -- func: cudnn_convolution_transpose.deprecated(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: cudnn_convolution_transpose_deprecated - -- func: cudnn_convolution_transpose.deprecated2(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: cudnn_convolution_transpose_deprecated2 - - func: cudnn_convolution_transpose(Tensor self, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor dispatch: CUDA: cudnn_convolution_transpose -# NB: output_padding not strictly needed here, but it's helpful for the float -# backwards -- func: cudnn_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32, bool[2] output_mask) -> (Tensor, Tensor) - dispatch: - CUDA: cudnn_convolution_transpose_backward - -- func: cudnn_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor - dispatch: - CUDA: cudnn_convolution_transpose_backward_input - -- func: cudnn_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor - dispatch: - CUDA: cudnn_convolution_transpose_backward_weight - - func: cudnn_convolution_relu(Tensor self, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor dispatch: CUDA: cudnn_convolution_relu - func: cudnn_convolution_add_relu(Tensor self, Tensor weight, Tensor z, Scalar? alpha, Tensor? bias, int[] stride, int[] padding, int[] dilation, int groups) -> Tensor @@ -1514,19 +1601,25 @@ CPU: ctc_loss_backward_cpu CUDA: ctc_loss_backward_gpu - func: diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor variants: function, method + dispatch: + CompositeExplicitAutograd: diag_embed - func: diagflat(Tensor self, int offset=0) -> Tensor variants: function, method - func: diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a) variants: function, method dispatch: CompositeExplicitAutograd: diagonal +- func: linalg_diagonal(Tensor(a) A, *, int offset=0, int dim1=-2, int dim2=-1) -> Tensor(a) + python_module: linalg + variants: function + - func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a) variants: function, method - func: diagonal_backward(Tensor grad_output, int[] input_sizes, int offset, int dim1, int dim2) -> Tensor variants: function @@ -1569,10 +1662,11 @@ device_check: NoCheck # TensorIterator variants: function, method structured_delegate: div.out dispatch: SparseCPU, SparseCUDA: div_sparse + ZeroTensor: div_zerotensor - func: div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method structured_delegate: div.out @@ -1779,16 +1873,23 @@ CPU: empty_cpu CUDA: empty_cuda Meta: empty_meta MkldnnCPU: empty_mkldnn SparseCPU, SparseCUDA: empty_sparse + SparseCsrCPU, SparseCsrCUDA: empty_sparse_csr +# We do not make new_empty a composite that calls into new_empty_strided, as the strided version +# is significantly more difficult to implement by different backends - func: new_empty(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method + dispatch: + CompositeExplicitAutograd: new_empty - func: new_empty_strided(Tensor self, int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method + dispatch: + CompositeExplicitAutograd: new_empty_strided - func: new_full(Tensor self, int[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor variants: method - func: new_zeros(Tensor self, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -1818,10 +1919,11 @@ device_guard: False dispatch: CPU, Meta: resize_ CUDA: resize_cuda_ QuantizedCPU: quantized_resize_cpu_ + SparseCsrCPU, SparseCsrCUDA: resize_sparse_csr_ - func: empty_quantized(int[] size, Tensor qtensor, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor category_override: factory variants: function dispatch: @@ -1832,10 +1934,14 @@ device_guard: False - func: empty_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor device_check: NoCheck device_guard: False + dispatch: + CompositeExplicitAutograd: empty_like + SparseCPU, SparseCUDA: empty_like_sparse_coo + SparseCsrCPU, SparseCsrCUDA: empty_like_sparse_csr - func: empty_strided(int[] size, int[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor dispatch: CPU: empty_strided_cpu CUDA: empty_strided_cuda @@ -1843,22 +1949,30 @@ - func: erf(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: erf.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: erf_sparse + SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr - func: erf_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: erf.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: erf_sparse_ + SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_ - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: erf_out + SparseCPU, SparseCUDA: erf_sparse_out + SparseCsrCPU, SparseCsrCUDA: erf_sparse_csr_out - func: erfc(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: erfc.out variants: function, method @@ -1908,22 +2022,30 @@ - func: expm1(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: expm1.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: expm1_sparse + SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr - func: expm1_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: expm1.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: expm1_sparse_ + SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_ - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: expm1_out + SparseCPU, SparseCUDA: expm1_sparse_out + SparseCsrCPU, SparseCsrCUDA: expm1_sparse_csr_out - func: expand(Tensor(a) self, int[] size, *, bool implicit=False) -> Tensor(a) variants: method # This is method-only to match the previous tensor API. In the future we could make this a function too. device_check: NoCheck device_guard: False @@ -1969,40 +2091,48 @@ - func: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: function, method dispatch: - CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_ + CPU, CUDA: fill_ + QuantizedCPU, QuantizedCUDA: fill_quantized_ Meta: fill_meta_ - func: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: function, method dispatch: - CPU, CUDA, QuantizedCPU, QuantizedCUDA: fill_ + CPU, CUDA: fill_ + QuantizedCPU, QuantizedCUDA: fill_quantized_ Meta: fill_meta_ - func: floor(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: floor.out variants: function, method dispatch: CompositeExplicitAutograd: floor + SparseCPU, SparseCUDA: floor_sparse + SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr - func: floor_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: floor.out variants: function, method dispatch: CompositeExplicitAutograd: floor_ + SparseCPU, SparseCUDA: floor_sparse_ + SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_ - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: floor_out + SparseCPU, SparseCUDA: floor_sparse_out + SparseCsrCPU, SparseCsrCUDA: floor_sparse_csr_out - func: floor_divide(Tensor self, Tensor other) -> Tensor device_check: NoCheck # TensorIterator variants: function, method dispatch: @@ -2106,14 +2236,17 @@ # `align_corners = True`. - func: grid_sampler(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor - func: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor dispatch: - CPU: grid_sampler_2d_cpu + CPU, QuantizedCPU: grid_sampler_2d_cpu CUDA: grid_sampler_2d_cuda -- func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor) +# `grid_sampler_2d_backward` takes in `output_mask` to optimize performance for +# the case where `input` doesn't require gradient. Gradient for `grid` is always +# computed (only `output_mask[0]` is checked by the implementations). +- func: grid_sampler_2d_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, bool[2] output_mask) -> (Tensor, Tensor) dispatch: CPU: grid_sampler_2d_backward_cpu CUDA: grid_sampler_2d_backward_cuda # See NOTE [ grid_sample CPU fallback ] @@ -2227,10 +2360,12 @@ dispatch: CompositeExplicitAutograd: index_copy_ - func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor variants: function, method + dispatch: + CompositeExplicitAutograd: index_copy - func: index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!) variants: method - func: index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor @@ -2248,10 +2383,12 @@ # - Tensor & Tensor::index_put_(std::initializer_list<TensorIndex> indices, Scalar v) - func: index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor device_check: NoCheck # delegate to _index_put_impl_ after clone, which leverages TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: index_put - func: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: function dispatch: @@ -2267,16 +2404,10 @@ - func: inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) dispatch: CompositeExplicitAutograd: inverse_out -- func: _inverse_helper(Tensor self) -> Tensor - variants: function - dispatch: - CPU: _inverse_helper_cpu - CUDA: _inverse_helper_cuda - - func: isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor variants: function, method - func: isin.Tensor_Tensor_out(Tensor elements, Tensor test_elements, *, bool assume_unique=False, bool invert=False, Tensor(a!) out) -> Tensor(a!) variants: function @@ -2313,10 +2444,11 @@ device_check: NoCheck device_guard: False dispatch: CPU, CUDA: isnan SparseCPU, SparseCUDA: isnan_sparse + SparseCsrCPU, SparseCsrCUDA: isnan_sparse_csr - func: is_distributed(Tensor self) -> bool variants: function, method device_check: NoCheck device_guard: False @@ -2336,10 +2468,15 @@ - func: is_conj(Tensor self) -> bool variants: function, method device_guard: False manual_cpp_binding: True +- func: _is_zerotensor(Tensor self) -> bool + variants: function, method + device_guard: False + manual_cpp_binding: True + - func: is_neg(Tensor self) -> bool variants: function, method device_guard: False manual_cpp_binding: True @@ -2403,28 +2540,36 @@ dispatch: CPU: layer_norm_cpu CUDA: layer_norm_cuda CompositeImplicitAutograd: math_native_layer_norm +- func: _native_multi_head_self_attention(Tensor query, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, Tensor? mask=None) -> Tensor + dispatch: + CPU: multi_head_self_attention_cpu + CUDA: multi_head_self_attention_cuda + - func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor) dispatch: CPU: layer_norm_backward_cpu CUDA: layer_norm_backward_cuda - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor variants: function, method dispatch: CompositeExplicitAutograd: nan_to_num + SparseCPU, SparseCUDA: nan_to_num_sparse - func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!) variants: function, method dispatch: CompositeExplicitAutograd: nan_to_num_ + SparseCPU, SparseCUDA: nan_to_num_sparse_ - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!) dispatch: CPU, CUDA: nan_to_num_out + SparseCPU, SparseCUDA: nan_to_num_sparse_out - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor python_module: nn - func: linear.out(Tensor input, Tensor weight, Tensor? bias=None, *, Tensor(a!) out) -> Tensor(a!) @@ -2469,15 +2614,15 @@ - func: ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!) variants: function, method - func: ldexp.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) -- func: linspace(Scalar start, Scalar end, int? steps=None, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +- func: linspace(Scalar start, Scalar end, int steps, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor -- func: linspace.out(Scalar start, Scalar end, int? steps=None, *, Tensor(a!) out) -> Tensor(a!) +- func: linspace.out(Scalar start, Scalar end, int steps, *, Tensor(a!) out) -> Tensor(a!) dispatch: - CPU: linspace_cpu_out + CPU, Meta: linspace_out CUDA: linspace_cuda_out - func: log(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: log.out @@ -2497,10 +2642,12 @@ - func: log10(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: log10.out variants: function, method + dispatch: + CompositeExplicitAutograd: log10 - func: log10_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: log10.out variants: function, method @@ -2516,25 +2663,28 @@ device_check: NoCheck # TensorIterator structured_delegate: log1p.out variants: function, method dispatch: SparseCPU, SparseCUDA: log1p_sparse + SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr - func: log1p_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: log1p.out variants: function, method dispatch: SparseCPU, SparseCUDA: log1p_sparse_ + SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_ - func: log1p.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: log1p_out - SparseCPU, SparseCUDA: log1p_out_sparse + SparseCPU, SparseCUDA: log1p_sparse_out + SparseCsrCPU, SparseCsrCUDA: log1p_sparse_csr_out - func: log2(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: log2.out variants: function, method @@ -2628,15 +2778,15 @@ - func: logdet(Tensor self) -> Tensor variants: function, method dispatch: CompositeExplicitAutograd: logdet -- func: logspace(Scalar start, Scalar end, int? steps=None, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +- func: logspace(Scalar start, Scalar end, int steps, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor -- func: logspace.out(Scalar start, Scalar end, int? steps=None, float base=10.0, *, Tensor(a!) out) -> Tensor(a!) +- func: logspace.out(Scalar start, Scalar end, int steps, float base=10.0, *, Tensor(a!) out) -> Tensor(a!) dispatch: - CPU: logspace_cpu_out + CPU, Meta: logspace_out CUDA: logspace_cuda_out # log_softmax allows positional dtype, unlike most operators, because kwonly is BC-breaking when loading jit models. - func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor variants: function, method @@ -2651,14 +2801,14 @@ structured: True dispatch: CPU: log_softmax_cpu_out CUDA: log_softmax_cuda_out -- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor +- func: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor structured_delegate: _log_softmax_backward_data.out -- func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) out) -> Tensor(a!) +- func: _log_softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) out) -> Tensor(a!) structured: True dispatch: CPU: log_softmax_backward_cpu_out CUDA: log_softmax_backward_cuda_out @@ -2720,15 +2870,15 @@ variants: function, method # Alias to linalg.matrix_power - func: matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!) +# Alias to linalg.matrix_exp - func: matrix_exp(Tensor self) -> Tensor variants: function, method - dispatch: - CPU, CUDA: matrix_exp +# This function should be deprecated in favor of differential_analytic_matrix_function in FunctionsManual.cpp - func: matrix_exp_backward(Tensor self, Tensor grad) -> Tensor # DEPRECATED: Use torch.aminmax instead - func: _aminmax(Tensor self) -> (Tensor, Tensor) dispatch: @@ -2758,16 +2908,20 @@ dispatch: CPU, CUDA: _compute_linear_combination_out - func: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) device_check: NoCheck # TensorIterator + structured_delegate: max.dim_max variants: function, method dispatch: - CPU, CUDA, QuantizedCPU, QuantizedCUDA: max + QuantizedCPU, QuantizedCUDA: qmax - func: max.dim_max(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) max, Tensor(b!) max_values) -> (Tensor(a!) values, Tensor(b!) indices) device_check: NoCheck # TensorIterator + structured: True + precomputed: + - dim -> int dim dispatch: CPU, CUDA: max_out - func: max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) device_check: NoCheck # TensorIterator @@ -2901,16 +3055,20 @@ - func: nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) - func: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) device_check: NoCheck # TensorIterator + structured_delegate: min.dim_min variants: function, method dispatch: - CPU, CUDA, QuantizedCPU, QuantizedCUDA: min + QuantizedCPU, QuantizedCUDA: qmin - func: min.dim_min(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) min, Tensor(b!) min_indices) -> (Tensor(a!) values, Tensor(b!) indices) device_check: NoCheck # TensorIterator + structured: True + precomputed: + - dim -> int dim dispatch: CPU, CUDA: min_out - func: min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) device_check: NoCheck # TensorIterator @@ -2930,18 +3088,10 @@ - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor dispatch: CompositeExplicitAutograd: mkldnn_convolution -- func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor - -- func: mkldnn_convolution_backward_weights(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> (Tensor, Tensor) - -- func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor) - dispatch: - CompositeExplicitAutograd: mkldnn_convolution_backward - - func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor) dispatch: CUDA: miopen_batch_norm - func: miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor) @@ -2950,60 +3100,18 @@ - func: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor dispatch: CUDA: miopen_convolution -- func: miopen_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: miopen_convolution_backward_input - -- func: miopen_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor) - dispatch: - CUDA: miopen_convolution_backward - -- func: miopen_convolution_backward_bias(Tensor grad_output) -> Tensor - dispatch: - CUDA: miopen_convolution_backward_bias - -- func: miopen_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: miopen_convolution_backward_weight - - func: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor dispatch: CUDA: miopen_convolution_transpose -# NB: output_padding not strictly needed here, but it's helpful for the float -# backwards -- func: miopen_convolution_transpose_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] output_padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor) - dispatch: - CUDA: miopen_convolution_transpose_backward - -- func: miopen_convolution_transpose_backward_input(Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: miopen_convolution_transpose_backward_input - -- func: miopen_convolution_transpose_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: miopen_convolution_transpose_backward_weight - - func: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor dispatch: CUDA: miopen_depthwise_convolution -- func: miopen_depthwise_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: miopen_depthwise_convolution_backward_input - -- func: miopen_depthwise_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic, bool[3] output_mask) -> (Tensor, Tensor, Tensor) - dispatch: - CUDA: miopen_depthwise_convolution_backward - -- func: miopen_depthwise_convolution_backward_weight(int[] weight_size, Tensor grad_output, Tensor self, int[] padding, int[] stride, int[] dilation, int groups, bool benchmark, bool deterministic) -> Tensor - dispatch: - CUDA: miopen_depthwise_convolution_backward_weight - - func: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor) dispatch: CUDA: miopen_rnn - func: miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[]) @@ -3012,11 +3120,12 @@ - func: mm(Tensor self, Tensor mat2) -> Tensor structured_delegate: mm.out variants: function, method dispatch: - SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: _sparse_mm + SparseCPU, SparseCUDA: _sparse_mm + SparseCsrCPU, SparseCsrCUDA: _sparse_csr_mm - func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!) structured: True dispatch: CPU: mm_out_cpu @@ -3055,10 +3164,11 @@ structured_delegate: mul.out variants: function, method dispatch: SparseCPU, SparseCUDA: mul_sparse MkldnnCPU: mkldnn_mul + ZeroTensor: mul_zerotensor - func: mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: mul.out variants: method @@ -3105,12 +3215,12 @@ variants: method - func: mv(Tensor self, Tensor vec) -> Tensor variants: function, method dispatch: - CPU, CUDA: mv - SparseCPU, SparseCUDA, SparseCsrCPU, SparseCsrCUDA: mv_sparse + CompositeExplicitAutograd: mv + SparseCPU, SparseCUDA: mv_sparse - func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!) dispatch: CompositeExplicitAutograd: mv_out @@ -3208,19 +3318,10 @@ - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor variants: function dispatch: CompositeExplicitAutograd: _nnpack_spatial_convolution -- func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor) - variants: function - -- func: _nnpack_spatial_convolution_backward_input(Tensor input, Tensor grad_output, Tensor weight, int[2] padding) -> Tensor - variants: function - -- func: _nnpack_spatial_convolution_backward_weight(Tensor input, int[] weightsize, Tensor grad_output, int[2] padding) -> Tensor - variants: function - - func: ones.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_check: NoCheck device_guard: False - func: ones(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor @@ -3284,19 +3385,39 @@ # behavior on Windows, for reasons I don't understand # (maybe related to capital letter collation somehow...) - func: numpy_T(Tensor(a) self) -> Tensor(a) variants: method +# Exposed on Python as an attribute 'H' +- func: matrix_H(Tensor(a) self) -> Tensor(a) + variants: method + +# Exposed on Python as an attribute 'mT' +- func: mT(Tensor(a) self) -> Tensor(a) + variants: method + +# Exposed on Python as an attribute 'mH' +- func: mH(Tensor(a) self) -> Tensor(a) + variants: method + +- func: adjoint(Tensor(a) self) -> Tensor(a) + variants: function, method + - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor - func: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor - func: channel_shuffle(Tensor self, int groups) -> Tensor dispatch: CPU: channel_shuffle QuantizedCPU: channel_shuffle_quantized_cpu +- func: native_channel_shuffle(Tensor self, int groups) -> Tensor + dispatch: + CPU: channel_shuffle_cpu + CompositeImplicitAutograd: math_channel_shuffle + - func: is_pinned(Tensor self, Device? device=None) -> bool variants: method dispatch: CUDA: is_pinned_cuda CompositeExplicitAutograd: is_pinned_default @@ -3319,19 +3440,22 @@ - func: rad2deg(Tensor self) -> Tensor variants: function, method dispatch: CompositeExplicitAutograd: rad2deg + SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr - func: rad2deg_(Tensor(a!) self) -> Tensor(a!) variants: function, method dispatch: CompositeExplicitAutograd: rad2deg_ + SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_ - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) dispatch: CompositeExplicitAutograd: rad2deg_out + SparseCsrCPU, SparseCsrCUDA: rad2deg_sparse_csr_out - func: deg2rad(Tensor self) -> Tensor variants: function, method dispatch: CompositeExplicitAutograd: deg2rad @@ -3418,11 +3542,11 @@ - func: range(Scalar start, Scalar end, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor - func: range.out(Scalar start, Scalar end, Scalar step=1, *, Tensor(a!) out) -> Tensor(a!) dispatch: - CPU: range_cpu_out + CPU, Meta: range_out CUDA: range_cuda_out - func: ravel(Tensor(a) self) -> Tensor(a) variants: function, method @@ -3447,25 +3571,28 @@ device_check: NoCheck # TensorIterator structured_delegate: neg.out variants: function, method dispatch: SparseCPU, SparseCUDA: neg_sparse + SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr - func: neg_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: neg.out variants: function, method dispatch: SparseCPU, SparseCUDA: neg_sparse_ + SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_ - func: neg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: neg_out SparseCPU, SparseCUDA: neg_out_sparse + SparseCsrCPU, SparseCsrCUDA: neg_sparse_csr_out # Alias for neg - func: negative(Tensor self) -> Tensor variants: function, method @@ -3502,11 +3629,11 @@ - func: _reshape_alias(Tensor(a) self, int[] size, int[] stride) -> Tensor(a) variants: function, method device_check: NoCheck device_guard: False dispatch: - CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: _reshape_alias + CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, ZeroTensor: _reshape_alias # We don't need to support mkldnn since this is handled explicitly by the reshape operator. - func: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor device_check: NoCheck device_guard: False @@ -3520,24 +3647,50 @@ - func: round(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: round.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: round_sparse + SparseCsrCPU, SparseCsrCUDA: round_sparse_csr - func: round_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: round.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: round_sparse_ + SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_ - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU: round_out CUDA: round_out + SparseCPU, SparseCUDA: round_sparse_out + SparseCsrCPU, SparseCsrCUDA: round_sparse_csr_out +- func: round.decimals(Tensor self, *, int decimals) -> Tensor + device_check: NoCheck # TensorIterator + structured_delegate: round.decimals_out + variants: function, method + +- func: round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!) + device_check: NoCheck # TensorIterator + structured_delegate: round.decimals_out + variants: function, method + +- func: round.decimals_out(Tensor self, *, int decimals, Tensor(a!) out) -> Tensor(a!) + device_check: NoCheck # TensorIterator + structured: True + structured_inherits: TensorIteratorBase + dispatch: + CPU: round_decimals_out + CUDA: round_decimals_out + - func: rrelu(Tensor self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor device_check: NoCheck # TensorIterator - func: rrelu_(Tensor(a!) self, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!) device_check: NoCheck # TensorIterator @@ -3589,10 +3742,11 @@ structured_delegate: gelu.out device_check: NoCheck # TensorIterator python_module: nn dispatch: MkldnnCPU: mkldnn_gelu + QuantizedCPU: gelu_quantized_cpu - func: gelu_backward.grad_input(Tensor grad, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase python_module: nn @@ -3781,22 +3935,30 @@ - func: sin(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: sin.out variants: function, method + dispatch: + SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr + SparseCPU, SparseCUDA: sin_sparse - func: sin_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: sin.out variants: function, method + dispatch: + SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_ + SparseCPU, SparseCUDA: sin_sparse_ - func: sin.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: sin_out + SparseCsrCPU, SparseCsrCUDA: sin_sparse_csr_out + SparseCPU, SparseCUDA: sin_sparse_out - func: sinc(Tensor self) -> Tensor structured_delegate: sinc.out variants: function, method @@ -3812,22 +3974,30 @@ - func: sinh(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: sinh.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: sinh_sparse + SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr - func: sinh_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: sinh.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: sinh_sparse_ + SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_ - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: sinh_out + SparseCPU, SparseCUDA: sinh_sparse_out + SparseCsrCPU, SparseCsrCUDA: sinh_sparse_csr_out # Returns a copy of this `Variable` that is detached from its autograd graph. # This method is OK to call if the `Variable` is a view. # # NOTE: Previously, if we change the tensor metadata (e.g. sizes / strides / @@ -3846,10 +4016,11 @@ # Like `detach()`, but modifies this `Variable` in-place. This method may # only be called on non-view `Variable`s. You can use `is_view()` to check # this. If this `Variable` is a view, throws an `std::runtime_error()`. - func: detach_(Tensor(a!) self) -> Tensor(a!) variants: function, method + tags: inplace_view dispatch: CompositeExplicitAutograd: detach_ - func: size.int(Tensor self, int dim) -> int variants: function @@ -3874,10 +4045,31 @@ device_check: NoCheck device_guard: False dispatch: CompositeExplicitAutograd: slice_backward +- func: slice_scatter(Tensor self, Tensor src, int dim=0, int? start=None, int? end=None, int step=1) -> Tensor + variants: function, method + device_check: NoCheck + device_guard: False + dispatch: + CompositeExplicitAutograd: slice_scatter + +- func: select_scatter(Tensor self, Tensor src, int dim, int index) -> Tensor + variants: function, method + device_check: NoCheck + device_guard: False + dispatch: + CompositeExplicitAutograd: select_scatter + +- func: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor + variants: function, method + device_check: NoCheck + device_guard: False + dispatch: + CompositeExplicitAutograd: diagonal_scatter + - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) variants: function, method dispatch: CompositeExplicitAutograd: slogdet @@ -3900,14 +4092,14 @@ structured: True dispatch: CPU: softmax_cpu_out CUDA: softmax_cuda_out -- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor +- func: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor structured_delegate: _softmax_backward_data.out -- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, Tensor self, *, Tensor(a!) grad_input) -> Tensor(a!) +- func: _softmax_backward_data.out(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype, *, Tensor(a!) grad_input) -> Tensor(a!) structured: True dispatch: CPU: softmax_backward_cpu_out CUDA: softmax_backward_cuda_out @@ -3916,11 +4108,11 @@ device_check: NoCheck device_guard: False dispatch: CompositeExplicitAutograd: unsafe_split -- func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[] +- func: split.Tensor(Tensor(a -> *) self, int split_size, int dim=0) -> Tensor(a)[] variants: function, method device_check: NoCheck device_guard: False dispatch: CompositeExplicitAutograd: split @@ -3930,72 +4122,77 @@ device_check: NoCheck device_guard: False dispatch: CompositeExplicitAutograd: unsafe_split_with_sizes -- func: split_with_sizes(Tensor(a) self, int[] split_sizes, int dim=0) -> Tensor(a)[] +- func: split_with_sizes(Tensor(a -> *) self, int[] split_sizes, int dim=0) -> Tensor(a)[] variants: function, method device_check: NoCheck device_guard: False dispatch: CompositeExplicitAutograd: split_with_sizes -- func: hsplit.int(Tensor(a) self, int sections) -> Tensor(a)[] +- func: hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] variants: function, method -- func: hsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[] +- func: hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] variants: function, method -- func: vsplit.int(Tensor(a) self, int sections) -> Tensor(a)[] +- func: vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] variants: function, method -- func: vsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[] +- func: vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] variants: function, method -- func: dsplit.int(Tensor(a) self, int sections) -> Tensor(a)[] +- func: dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] variants: function, method -- func: dsplit.array(Tensor(a) self, int[] indices) -> Tensor(a)[] +- func: dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] variants: function, method - func: squeeze(Tensor(a) self) -> Tensor(a) variants: function, method device_check: NoCheck device_guard: False dispatch: - CompositeExplicitAutograd: squeeze + CPU, CUDA: squeeze + QuantizedCPU, QuantizedCUDA: squeeze_quantized - func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a) variants: function, method device_check: NoCheck device_guard: False dispatch: - CompositeExplicitAutograd: squeeze + CPU, CUDA: squeeze + QuantizedCPU, QuantizedCUDA: squeeze_quantized - func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a) variants: function, method device_check: NoCheck device_guard: False - func: squeeze_(Tensor(a!) self) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view dispatch: CompositeExplicitAutograd: squeeze_ - func: squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view dispatch: CompositeExplicitAutograd: squeeze_ - func: squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view - func: sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor variants: function, method - func: sspaddmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) @@ -4103,23 +4300,28 @@ device_check: NoCheck # TensorIterator structured_delegate: sqrt.out variants: function, method dispatch: SparseCPU, SparseCUDA: sqrt_sparse + SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr - func: sqrt_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: sqrt.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: sqrt_sparse_ + SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_ - func: sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: sqrt_out - SparseCPU, SparseCUDA: sqrt_out_sparse + SparseCPU, SparseCUDA: sqrt_sparse_out + SparseCsrCPU, SparseCsrCUDA: sqrt_sparse_csr_out - func: square(Tensor self) -> Tensor device_check: NoCheck # TensorIterator variants: function, method @@ -4223,50 +4425,66 @@ - func: t_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck device_guard: False variants: method + tags: inplace_view dispatch: CompositeExplicitAutograd: t_ - func: tan(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: tan.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: tan_sparse + SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr - func: tan_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: tan.out variants: function, method + dispatch: + SparseCPU, SparseCUDA: tan_sparse_ + SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_ - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: tan_out + SparseCPU, SparseCUDA: tan_sparse_out + SparseCsrCPU, SparseCsrCUDA: tan_sparse_csr_out - func: tanh(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: tanh.out variants: function, method dispatch: QuantizedCPU: tanh_quantized_cpu MkldnnCPU: mkldnn_tanh + SparseCPU, SparseCUDA: tanh_sparse + SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr - func: tanh_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: tanh.out variants: function, method dispatch: MkldnnCPU: mkldnn_tanh_ + SparseCPU, SparseCUDA: tanh_sparse_ + SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_ + - func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: tanh_out + SparseCPU, SparseCUDA: tanh_sparse_out + SparseCsrCPU, SparseCsrCUDA: tanh_sparse_csr_out - func: tensordot(Tensor self, Tensor other, int[] dims_self, int[] dims_other) -> Tensor variants: function - func: tensordot.out(Tensor self, Tensor other, int[] dims_self, int[] dims_other, *, Tensor(a!) out) -> Tensor(a!) @@ -4329,10 +4547,11 @@ - func: transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view dispatch: CompositeExplicitAutograd: transpose_ - func: _mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) device_check: NoCheck @@ -4386,24 +4605,30 @@ structured_delegate: trunc.out device_check: NoCheck # TensorIterator variants: function, method dispatch: CompositeExplicitAutograd: trunc + SparseCPU, SparseCUDA: trunc_sparse + SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr - func: trunc_(Tensor(a!) self) -> Tensor(a!) structured_delegate: trunc.out device_check: NoCheck # TensorIterator variants: function, method dispatch: CompositeExplicitAutograd: trunc_ + SparseCPU, SparseCUDA: trunc_sparse_ + SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_ - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase device_check: NoCheck # TensorIterator dispatch: CPU, CUDA: trunc_out + SparseCPU, SparseCUDA: trunc_sparse_out + SparseCsrCPU, SparseCsrCUDA: trunc_sparse_csr_out # Alias for trunc - func: fix(Tensor self) -> Tensor variants: function, method @@ -4459,16 +4684,19 @@ - func: unsqueeze(Tensor(a) self, int dim) -> Tensor(a) variants: function, method device_check: NoCheck device_guard: False dispatch: - CompositeExplicitAutograd: unsqueeze + CPU, CUDA: unsqueeze + SparseCPU, SparseCUDA: unsqueeze_sparse + QuantizedCPU, QuantizedCUDA: unsqueeze_quantized - func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view dispatch: CompositeExplicitAutograd: unsqueeze_ - func: vander(Tensor x, int? N=None, bool increasing=False) -> Tensor @@ -4584,10 +4812,15 @@ - func: zeros.names(int[] size, *, Dimname[]? names, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor device_check: NoCheck device_guard: False +- func: _efficientzerotensor(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor + dispatch: + CPU: _efficientzerotensor + CUDA: _efficientzerotensor_cuda + - func: zeros(int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor - func: zeros.out(int[] size, *, Tensor(a!) out) -> Tensor(a!) - func: zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor @@ -4653,32 +4886,38 @@ dispatch: SparseCPU: _sparse_sum_backward_cpu SparseCUDA: _sparse_sum_backward_cuda - func: _sparse_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor + python_module: sparse variants: function - func: _sparse_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor + python_module: sparse variants: function - func: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor + python_module: sparse dispatch: SparseCPU: softmax_sparse_cpu SparseCUDA: softmax_sparse_cuda - func: _sparse_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor dispatch: SparseCPU: softmax_backward_sparse_cpu SparseCUDA: softmax_backward_sparse_cuda - func: _sparse_log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor + python_module: sparse variants: function - func: _sparse_log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor + python_module: sparse variants: function - func: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor + python_module: sparse dispatch: SparseCPU: log_softmax_sparse_cpu SparseCUDA: log_softmax_sparse_cuda - func: _sparse_log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, Tensor self) -> Tensor @@ -4772,10 +5011,11 @@ - func: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor variants: function, method dispatch: CompositeExplicitAutograd: clone SparseCPU, SparseCUDA: clone_sparse + SparseCsrCPU, SparseCsrCUDA: clone_sparse_csr MkldnnCPU: mkldnn_clone QuantizedCPU, QuantizedCUDA: quantized_clone - func: positive(Tensor(a) self) -> Tensor(a) variants: function, method @@ -4884,22 +5124,33 @@ CompositeExplicitAutograd: rsub # Functionally the same as addmm, but we give it a different derivative formula # that doesn't propagate gradients to non-present entries on sparse. - func: _sparse_addmm(Tensor self, Tensor sparse, Tensor dense, *, Scalar beta=1, Scalar alpha=1) -> Tensor + python_module: sparse dispatch: CompositeExplicitAutograd: _sparse_addmm +- func: sparse_sampled_addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) + python_module: sparse + dispatch: + SparseCsrCUDA: sparse_sampled_addmm_out_sparse_csr_cuda + +- func: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor + python_module: sparse + dispatch: + SparseCsrCUDA: sparse_sampled_addmm_sparse_csr_cuda + - func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) structured: True dispatch: CPU: addmm_out_cpu CUDA: addmm_out_cuda SparseCPU: addmm_out_sparse_dense_cpu SparseCUDA: addmm_out_sparse_dense_cuda - SparseCsrCPU: addmm_out_sparse_csr_dense_cpu - SparseCsrCUDA: addmm_out_sparse_csr_dense_cuda + SparseCsrCPU: addmm_out_sparse_csr_cpu + SparseCsrCUDA: addmm_out_sparse_csr_cuda - func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor structured_delegate: addmm.out variants: function, method dispatch: @@ -5207,16 +5458,16 @@ device_check: NoCheck # Allows copy into different device variants: function dispatch: SparseCPU, SparseCUDA: copy_sparse_ -- func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[] +- func: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[] variants: function, method dispatch: CompositeExplicitAutograd: unbind -- func: unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[] +- func: unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[] variants: function, method - func: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor variants: method dispatch: @@ -5244,10 +5495,15 @@ dispatch: MkldnnCPU: mkldnn_reorder_conv3d_weight - func: to_mkldnn_backward(Tensor grad, Tensor input) -> Tensor +- func: quantize_per_tensor_dynamic(Tensor self, ScalarType dtype, bool reduce_range) -> Tensor + variants: function + dispatch: + CPU, CUDA: quantize_per_tensor_dynamic + - func: quantize_per_tensor(Tensor self, float scale, int zero_point, ScalarType dtype) -> Tensor variants: function dispatch: CPU, CUDA: quantize_per_tensor @@ -5267,11 +5523,11 @@ CPU, CUDA: quantize_per_channel - func: dequantize.self(Tensor self) -> Tensor variants: function, method dispatch: - CPU: dequantize_cpu + CPU, CUDA: dequantize_cpu_or_cuda QuantizedCPU, QuantizedCUDA: dequantize_quantized - func: dequantize.tensors(Tensor[] tensors) -> Tensor[] variants: function dispatch: @@ -5389,10 +5645,18 @@ variants: function - func: choose_qparams_optimized(Tensor input, int numel, int n_bins, float ratio, int bit_width) -> (Tensor, Tensor) variants: function +- func: _autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a) + variants: method + device_guard: False + +- func: _autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a) + variants: method + device_guard: False + - func: _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor device_check: NoCheck device_guard: False dispatch: CompositeExplicitAutograd: _to_copy @@ -5587,10 +5851,12 @@ CUDA: masked_fill__cuda - func: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: masked_fill - func: masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method dispatch: @@ -5598,26 +5864,35 @@ CUDA: masked_fill__cuda - func: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: masked_fill - func: masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!) variants: method dispatch: CPU: masked_scatter__cpu CUDA: masked_scatter__cuda - func: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor variants: function, method + dispatch: + CompositeExplicitAutograd: masked_scatter +- func: _masked_softmax(Tensor self, Tensor mask) -> Tensor + dispatch: + CUDA: masked_softmax_cuda + CPU: masked_softmax_cpu + - func: view(Tensor(a) self, int[] size) -> Tensor(a) variants: method device_check: NoCheck device_guard: False dispatch: - CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view + ZeroTensor, CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA: view MkldnnCPU: mkldnn_view # Warning: If you want to change the name or overload name of this # operator, you might also want to change the `isBlockListedSchema` # function in `torch/csrc/jit/frontend/schema_catching.cpp`. @@ -5637,25 +5912,27 @@ CPU, CUDA: put_ - func: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor variants: function, method -- func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!) - variants: method +- func: index_add.out(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) + structured: True + variants: function + precomputed: + - dim -> int dim + dispatch: + CPU: index_add_cpu_out + CUDA: index_add_cuda_out -- func: index_add_.alpha(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor(a!) +- func: index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!) + structured_delegate: index_add.out variants: method - dispatch: - CPU: index_add_cpu_ - CUDA: index_add_cuda_ -- func: index_add(Tensor self, int dim, Tensor index, Tensor source) -> Tensor +- func: index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor + structured_delegate: index_add.out variants: function, method -- func: index_add.alpha(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha) -> Tensor - variants: function, method - - func: index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor variants: function, method - func: index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) device_check: NoCheck # TensorIterator @@ -5665,20 +5942,24 @@ CUDA: index_fill_ - func: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: index_fill - func: index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method dispatch: CPU, CUDA: index_fill_ - func: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor device_check: NoCheck # TensorIterator variants: function, method + dispatch: + CompositeExplicitAutograd: index_fill - func: index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method @@ -5771,10 +6052,15 @@ CPU, CUDA: scatter_add - func: scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor variants: function, method +- func: scatter_reduce.two(Tensor self, int dim, Tensor index, str reduce, *, int? output_size=None) -> Tensor + variants: function, method + dispatch: + CPU: scatter_reduce_two_cpu + - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) structured_delegate: eq.Scalar_out device_check: NoCheck # TensorIterator variants: method dispatch: @@ -6062,39 +6348,31 @@ variants: function dispatch: CPU, CUDA: bitwise_right_shift - func: tril_(Tensor(a!) self, int diagonal=0) -> Tensor(a!) + structured_delegate: tril.out variants: method - dispatch: - CPU: tril_cpu_ - CUDA: tril_cuda_ - func: triu_(Tensor(a!) self, int diagonal=0) -> Tensor(a!) + structured_delegate: triu.out variants: method - dispatch: - CPU: triu_cpu_ - CUDA: triu_cuda_ - func: digamma_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: digamma.out variants: method - func: lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method - dispatch: - CPU: lerp_cpu_scalar_ - CUDA: lerp_cuda_scalar_ + structured_delegate: lerp.Scalar_out - func: lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method - dispatch: - CPU: lerp_cpu_tensor_ - CUDA: lerp_cuda_tensor_ + structured_delegate: lerp.Tensor_out - func: addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) variants: method dispatch: CPU, CUDA: addbmm_ @@ -6176,37 +6454,33 @@ variants: function device_check: NoCheck device_guard: False - func: cross.out(Tensor self, Tensor other, int? dim=None, *, Tensor(a!) out) -> Tensor(a!) - dispatch: - CPU, CUDA: cross_out - func: cross(Tensor self, Tensor other, int? dim=None) -> Tensor variants: method, function - dispatch: - CPU, CUDA: cross - func: triu.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!) + structured: True dispatch: - CPU: triu_cpu_out - CUDA: triu_cuda_out + CPU: triu_cpu + CUDA: triu_cuda - func: triu(Tensor self, int diagonal=0) -> Tensor + structured_delegate: triu.out variants: method, function - dispatch: - CompositeExplicitAutograd: triu - func: tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!) + structured: True dispatch: - CPU: tril_cpu_out - CUDA: tril_cuda_out + CPU: tril_cpu + CUDA: tril_cuda - func: tril(Tensor self, int diagonal=0) -> Tensor + structured_delegate: tril.out variants: method, function - dispatch: - CompositeExplicitAutograd: tril - func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor dispatch: CPU: tril_indices_cpu CUDA: tril_indices_cuda @@ -6582,11 +6856,12 @@ CUDA, QuantizedCUDA: index_select_out_cuda - func: index_select(Tensor self, int dim, Tensor index) -> Tensor variants: method, function dispatch: - CPU, QuantizedCPU: index_select_cpu_ + CPU: index_select_cpu_ + QuantizedCPU: index_select_quantized_cpu_ CUDA, QuantizedCUDA: index_select_cuda SparseCPU: index_select_sparse SparseCUDA: index_select_sparse - func: index_select.dimname_out(Tensor self, Dimname dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!) @@ -6627,10 +6902,13 @@ CUDA: nonzero_cuda - func: nonzero_numpy(Tensor self) -> Tensor[] variants: method, function +- func: argwhere(Tensor self) -> Tensor + variants: method, function + - func: gather.out(Tensor self, int dim, Tensor index, *, bool sparse_grad=False, Tensor(a!) out) -> Tensor(a!) structured: True dispatch: CPU, CUDA: gather_out @@ -6697,18 +6975,35 @@ dispatch: CPU: legacy_lstsq CUDA: legacy_lstsq_cuda - func: triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient) + structured: True dispatch: CPU, CUDA: triangular_solve_out + SparseCsrCPU: triangular_solve_out_sparse_csr_cpu + SparseCsrCUDA: triangular_solve_out_sparse_csr_cuda - func: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient) + structured_delegate: triangular_solve.X variants: method, function + +- func: _linalg_check_errors(Tensor info, str api_name, *, bool is_matrix) -> () dispatch: - CPU, CUDA: triangular_solve + CompositeExplicitAutograd: _linalg_check_errors +- func: linalg_solve_triangular.out(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False, Tensor(a!) out) -> Tensor(a!) + python_module: linalg + dispatch: + CPU, CUDA: linalg_solve_triangular_out + +- func: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor + python_module: linalg + variants: method, function + dispatch: + CPU, CUDA: linalg_solve_triangular + - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors) dispatch: CompositeExplicitAutograd: symeig_out - func: symeig(Tensor self, bool eigenvectors=False, bool upper=True) -> (Tensor eigenvalues, Tensor eigenvectors) @@ -6734,26 +7029,21 @@ - func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) - func: svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V) variants: method, function -- func: _svd_helper(Tensor self, bool some, bool compute_uv) -> (Tensor U, Tensor S, Tensor V) - variants: function - dispatch: - CPU: _svd_helper_cpu - CUDA: _svd_helper_cuda - # swapaxes, alias for transpose - func: swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a) variants: function, method device_check: NoCheck device_guard: False - func: swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view # swapdims, alias for transpose - func: swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a) variants: function, method device_check: NoCheck @@ -6761,10 +7051,11 @@ - func: swapdims_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) variants: method device_check: NoCheck device_guard: False + tags: inplace_view - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!) dispatch: CPU, CUDA: cholesky_out @@ -6841,12 +7132,10 @@ dispatch: CPU, CUDA: ormqr - func: _lu_with_info(Tensor self, bool pivot=True, bool check_errors=True) -> (Tensor LU, Tensor pivots, Tensor info) variants: function - dispatch: - CPU, CUDA: _lu_with_info - func: lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!) dispatch: CPU, CUDA: lu_solve_out @@ -6924,22 +7213,30 @@ - func: erfinv(Tensor self) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: erfinv.out variants: method, function + dispatch: + SparseCPU, SparseCUDA: erfinv_sparse + SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr - func: erfinv_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: erfinv.out variants: method + dispatch: + SparseCPU, SparseCUDA: erfinv_sparse_ + SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_ - func: erfinv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: erfinv_out + SparseCPU, SparseCUDA: erfinv_sparse_out + SparseCsrCPU, SparseCsrCUDA: erfinv_sparse_csr_out - func: i0(Tensor self) -> Tensor structured_delegate: i0.out variants: function, method @@ -6957,35 +7254,46 @@ device_check: NoCheck # TensorIterator structured_delegate: sign.out variants: function, method dispatch: CompositeExplicitAutograd: sign + SparseCPU, SparseCUDA: sign_sparse + SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr - func: sign_(Tensor(a!) self) -> Tensor(a!) device_check: NoCheck # TensorIterator structured_delegate: sign.out variants: method dispatch: CompositeExplicitAutograd: sign_ + SparseCPU, SparseCUDA: sign_sparse_ + SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_ - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: sign_out + SparseCPU, SparseCUDA: sign_sparse_out + SparseCsrCPU, SparseCsrCUDA: sign_sparse_csr_out - func: signbit(Tensor self) -> Tensor variants: function, method structured_delegate: signbit.out + dispatch: + SparseCPU, SparseCUDA: signbit_sparse + SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr - func: signbit.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase dispatch: CPU: signbit_out CUDA: signbit_out + SparseCPU, SparseCUDA: signbit_sparse_out + SparseCsrCPU, SparseCsrCUDA: signbit_sparse_csr_out - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor device_check: NoCheck # TensorIterator variants: method, function dispatch: @@ -7006,35 +7314,43 @@ - func: atan2(Tensor self, Tensor other) -> Tensor device_check: NoCheck # TensorIterator structured_delegate: atan2.out variants: method, function +# arctan2, alias of atan2 +- func: arctan2(Tensor self, Tensor other) -> Tensor + variants: method, function + +- func: arctan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) + device_check: NoCheck # TensorIterator + +- func: arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) + variants: method + - func: lerp.Scalar_out(Tensor self, Tensor end, Scalar weight, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator + structured: True + structured_inherits: TensorIteratorBase dispatch: - CPU: lerp_cpu_scalar_out - CUDA: lerp_cuda_scalar_out + CPU, CUDA: lerp_Scalar - func: lerp.Tensor_out(Tensor self, Tensor end, Tensor weight, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator + structured: True + structured_inherits: TensorIteratorBase dispatch: - CPU: lerp_cpu_tensor_out - CUDA: lerp_cuda_tensor_out + CPU, CUDA: lerp_Tensor - func: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor device_check: NoCheck # TensorIterator variants: method, function - dispatch: - CPU: lerp_cpu_scalar - CUDA: lerp_cuda_scalar + structured_delegate: lerp.Scalar_out - func: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor device_check: NoCheck # TensorIterator variants: method, function - dispatch: - CPU: lerp_cpu_tensor - CUDA: lerp_cuda_tensor + structured_delegate: lerp.Tensor_out - func: histc.out(Tensor self, int bins=100, Scalar min=0, Scalar max=0, *, Tensor(a!) out) -> Tensor(a!) dispatch: CPU: histogram_histc_cpu_out CUDA: _histc_out_cuda @@ -7061,10 +7377,22 @@ - func: histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges) variants: method, function dispatch: CPU: histogram_cpu +- func: _histogramdd_bin_edges(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor[] + dispatch: + CPU: histogramdd_bin_edges_cpu + +- func: _histogramdd_from_bin_cts(Tensor self, int[] bins, *, float[]? range=None, Tensor? weight=None, bool density=False) -> Tensor + dispatch: + CPU: histogramdd_cpu + +- func: _histogramdd_from_bin_tensors(Tensor self, Tensor[] bins, *, Tensor? weight=None, bool density=False) -> Tensor + dispatch: + CPU: histogramdd_cpu + - func: fmod.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CompositeExplicitAutograd: fmod_out @@ -7273,53 +7601,30 @@ - func: min.other(Tensor self, Tensor other) -> Tensor device_check: NoCheck # TensorIterator variants: method, function -# The following quantile signatures are DEPRECATED in favor of the new ones with the interpolation kwarg. -- func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) - -- func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor +- func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor variants: method, function -- func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +- func: quantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!) -- func: quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor +- func: quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor variants: method, function -- func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +- func: quantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!) -- func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False) -> Tensor +- func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor variants: method, function -- func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) +- func: nanquantile.out(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!) -- func: nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False) -> Tensor +- func: nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor variants: method, function -# To keep backward and forward compatibility, and to avoid ambiguity with the original signatures, dim, keepdim and interpolation -# parameters are required for now. Once the deprecated signatures are removed they will be made optional. -- func: quantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!) +- func: nanquantile.scalar_out(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear', Tensor(a!) out) -> Tensor(a!) -- func: quantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor - variants: method, function - -- func: quantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!) - -- func: quantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor - variants: method, function - -- func: nanquantile.new_scalar_out(Tensor self, float q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!) - -- func: nanquantile.new_scalar(Tensor self, float q, int? dim, bool keepdim, *, str interpolation) -> Tensor - variants: method, function - -- func: nanquantile.new_out(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation, Tensor(a!) out) -> Tensor(a!) - -- func: nanquantile.new(Tensor self, Tensor q, int? dim, bool keepdim, *, str interpolation) -> Tensor - variants: method, function - - func: sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) device_check: NoCheck # TensorIterator dispatch: CPU: sort_out_cpu CUDA: sort_out_cuda @@ -7509,10 +7814,11 @@ device_check: NoCheck # TensorIterator variants: method dispatch: CPU, CUDA: normal_ Meta: normal_meta_ + SparseCsrCPU, SparseCsrCUDA: normal_sparse_csr_ - func: normal.Tensor_float_out(Tensor mean, float std=1, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!) dispatch: CPU, CUDA: normal_out @@ -8207,10 +8513,17 @@ variants: function dispatch: CPU: foreach_tensor_minimum_slow CUDA: foreach_tensor_minimum_cuda +- func: _foreach_norm.Scalar(Tensor[] tensors, Scalar ord=2) -> Tensor[] + device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices + variants: function + dispatch: + CPU: foreach_tensor_norm_slow + CUDA: foreach_tensor_norm_cuda + - func: bucketize.Tensor(Tensor self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor dispatch: CPU: bucketize_cpu CUDA: bucketize_cuda @@ -8222,21 +8535,31 @@ - func: bucketize.Scalar(Scalar self, Tensor boundaries, *, bool out_int32=False, bool right=False) -> Tensor dispatch: CPU: bucketize_cpu CUDA: bucketize_cuda -- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False) -> Tensor +- func: searchsorted.Tensor(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor dispatch: CPU: searchsorted_cpu CUDA: searchsorted_cuda -- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, Tensor(a!) out) -> Tensor(a!) +# [Note about _torch_cuda_cu_linker_symbol_op and torch_cuda_cu] +# This is a DUMMY function to force the linking against torch_cuda_cu on Windows. +# Otherwise, the Windows linker will optimize and not include torch_cuda_cu even when we +# want it to be included. This is similar to what we do with warp_size for torch_cuda_cpp, +# described as the solution to this issue: https://github.com/pytorch/pytorch/issues/31611 +# This op should NOT be used or exposed or edited or else Windows builds (with BUILD_SPLIT_CUDA) will break. +- func: _torch_cuda_cu_linker_symbol_op(Tensor self) -> Tensor dispatch: + CUDA: _torch_cuda_cu_linker_symbol_op_cuda + +- func: searchsorted.Tensor_out(Tensor sorted_sequence, Tensor self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None, Tensor(a!) out) -> Tensor(a!) + dispatch: CPU: searchsorted_out_cpu CUDA: searchsorted_out_cuda -- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False) -> Tensor +- func: searchsorted.Scalar(Tensor sorted_sequence, Scalar self, *, bool out_int32=False, bool right=False, str? side=None, Tensor? sorter=None) -> Tensor dispatch: CPU: searchsorted_cpu CUDA: searchsorted_cuda - func: _convert_indices_from_coo_to_csr(Tensor self, int size, *, bool out_int32=False) -> Tensor @@ -8246,10 +8569,19 @@ structured: True dispatch: CPU: _convert_indices_from_coo_to_csr_structured_cpu CUDA: _convert_indices_from_coo_to_csr_structured_cuda +- func: _convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor + structured_delegate: _convert_indices_from_csr_to_coo.out + +- func: _convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!) + structured: True + dispatch: + CPU: _convert_indices_from_csr_to_coo_structured_cpu + CUDA: _convert_indices_from_csr_to_coo_structured_cuda + ## NN wrappers - func: mse_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator python_module: nn @@ -8407,20 +8739,20 @@ CPU: nll_loss2d_backward_cpu CUDA: nll_loss2d_backward_cuda - func: smooth_l1_loss.out(Tensor self, Tensor target, int reduction=Mean, float beta=1.0, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator + structured: True + structured_inherits: TensorIteratorBase python_module: nn dispatch: - CPU: smooth_l1_loss_out - CUDA: smooth_l1_loss_out + CPU, CUDA: smooth_l1_loss_out - func: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor device_check: NoCheck # TensorIterator + structured_delegate: smooth_l1_loss.out python_module: nn - dispatch: - CPU, CUDA: smooth_l1_loss - func: smooth_l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn dispatch: CPU: smooth_l1_loss_backward_out @@ -8531,10 +8863,11 @@ structured_inherits: TensorIteratorBase device_check: NoCheck # TensorIterator python_module: nn dispatch: CPU, CUDA: hardsigmoid_out + QuantizedCPU: hardsigmoid_out_quantized_cpu - func: hardsigmoid(Tensor self) -> Tensor structured_delegate: hardsigmoid.out device_check: NoCheck # TensorIterator python_module: nn @@ -8713,18 +9046,18 @@ - func: softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor structured_delegate: softplus.out device_check: NoCheck # TensorIterator python_module: nn -- func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!) +- func: softplus_backward.grad_input(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, *, Tensor(a!) grad_input) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase python_module: nn dispatch: CPU, CUDA: softplus_backward_out -- func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold, Tensor output) -> Tensor +- func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor structured_delegate: softplus_backward.grad_input python_module: nn - func: softshrink.out(Tensor self, Scalar lambd=0.5, *, Tensor(a!) out) -> Tensor(a!) structured: True @@ -8931,33 +9264,34 @@ python_module: nn structured_delegate: fractional_max_pool2d.output - func: fractional_max_pool2d_backward.grad_input(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn + structured: True dispatch: - CPU: fractional_max_pool2d_backward_out_cpu - CUDA: fractional_max_pool2d_backward_out_cuda + CPU: fractional_max_pool2d_backward_cpu + CUDA: fractional_max_pool2d_backward_cuda - func: fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor python_module: nn - dispatch: - CPU: fractional_max_pool2d_backward_cpu - CUDA: fractional_max_pool2d_backward_cuda + structured_delegate: fractional_max_pool2d_backward.grad_input # Return: (Tensor output, Tensor indices) - func: fractional_max_pool3d.output(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples, *, Tensor(a!) output, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) python_module: nn + structured: True + precomputed: + - kernel_size -> int poolSizeT, int poolSizeH, int poolSizeW + - output_size -> int outputT, int outputH, int outputW dispatch: CPU: fractional_max_pool3d_out_cpu CUDA: fractional_max_pool3d_out_cuda # Return: (Tensor output, Tensor indices) - func: fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor) python_module: nn - dispatch: - CPU: fractional_max_pool3d_cpu - CUDA: fractional_max_pool3d_cuda + structured_delegate: fractional_max_pool3d.output - func: fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn dispatch: CPU: fractional_max_pool3d_backward_out_cpu @@ -9223,10 +9557,20 @@ - func: upsample_bilinear2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_bilinear2d_backward +- func: _upsample_bilinear2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_bilinear2d_aa + +- func: _upsample_bilinear2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_bilinear2d_aa_backward + - func: upsample_trilinear3d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_trilinear3d @@ -9243,43 +9587,86 @@ - func: upsample_bicubic2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_bicubic2d_backward +- func: _upsample_bicubic2d_aa.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_bicubic2d_aa + +- func: _upsample_bicubic2d_aa_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_bicubic2d_aa_backward + - func: upsample_nearest1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_nearest1d +- func: _upsample_nearest_exact1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_nearest_exact1d + - func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_nearest1d_backward +- func: _upsample_nearest_exact1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_nearest_exact1d_backward + - func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_nearest2d +- func: _upsample_nearest_exact2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_nearest_exact2d + - func: upsample_nearest2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor python_module: nn dispatch: CompositeExplicitAutograd: upsample_nearest2d_backward +- func: _upsample_nearest_exact2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _upsample_nearest_exact2d_backward + - func: upsample_nearest3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor python_module: nn dispatch: CPU: upsample_nearest3d_cpu CUDA: upsample_nearest3d_cuda QuantizedCPU: upsample_nearest3d_quantized_cpu +- func: _upsample_nearest_exact3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CPU: _upsample_nearest_exact3d_cpu + CUDA: _upsample_nearest_exact3d_cuda + QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu + - func: upsample_nearest3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor python_module: nn dispatch: CPU: upsample_nearest3d_backward_cpu CUDA: upsample_nearest3d_backward_cuda +- func: _upsample_nearest_exact3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor + python_module: nn + dispatch: + CPU: _upsample_nearest_exact3d_backward_cpu + CUDA: _upsample_nearest_exact3d_backward_cuda + # NOTE: all of the non-"vec" upsample overloads are only kept for backward compatibility. - func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, float? scales=None, *, Tensor(a!) out) -> Tensor(a!) python_module: nn structured: True dispatch: @@ -9323,10 +9710,32 @@ - func: upsample_bilinear2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn structured_delegate: upsample_bilinear2d_backward.grad_input +- func: _upsample_bilinear2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_bilinear2d_aa_out_cpu + CUDA: _upsample_bilinear2d_aa_out_cuda + +- func: _upsample_bilinear2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_bilinear2d_aa.out + +- func: _upsample_bilinear2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_bilinear2d_aa_backward_out_cpu + CUDA: _upsample_bilinear2d_aa_backward_out_cuda + +- func: _upsample_bilinear2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_bilinear2d_aa_backward.grad_input + - func: upsample_bicubic2d.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_bicubic2d_out_cpu @@ -9345,10 +9754,32 @@ - func: upsample_bicubic2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn structured_delegate: upsample_bicubic2d_backward.grad_input +- func: _upsample_bicubic2d_aa.out(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_bicubic2d_aa_out_cpu + CUDA: _upsample_bicubic2d_aa_out_cuda + +- func: _upsample_bicubic2d_aa(Tensor self, int[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_bicubic2d_aa.out + +- func: _upsample_bicubic2d_aa_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_bicubic2d_aa_backward_out_cpu + CUDA: _upsample_bicubic2d_aa_backward_out_cuda + +- func: _upsample_bicubic2d_aa_backward(Tensor grad_output, int[2] output_size, int[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_bicubic2d_aa_backward.grad_input + - func: upsample_trilinear3d.out(Tensor self, int[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_trilinear3d_out_cpu @@ -9374,73 +9805,143 @@ structured: True dispatch: CPU: upsample_nearest1d_out_cpu CUDA: upsample_nearest1d_out_cuda +- func: _upsample_nearest_exact1d.out(Tensor self, int[1] output_size, float? scales=None, *, Tensor(a!) out) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_nearest_exact1d_out_cpu + CUDA: _upsample_nearest_exact1d_out_cuda + - func: upsample_nearest1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor python_module: nn structured_delegate: upsample_nearest1d.out +- func: _upsample_nearest_exact1d(Tensor self, int[1] output_size, float? scales=None) -> Tensor + python_module: nn + structured_delegate: _upsample_nearest_exact1d.out + - func: upsample_nearest1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_nearest1d_backward_out_cpu CUDA: upsample_nearest1d_backward_out_cuda +- func: _upsample_nearest_exact1d_backward.grad_input(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None, *, Tensor(a!) grad_input) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_nearest_exact1d_backward_out_cpu + CUDA: _upsample_nearest_exact1d_backward_out_cuda + - func: upsample_nearest1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor python_module: nn structured_delegate: upsample_nearest1d_backward.grad_input +- func: _upsample_nearest_exact1d_backward(Tensor grad_output, int[1] output_size, int[3] input_size, float? scales=None) -> Tensor + python_module: nn + structured_delegate: _upsample_nearest_exact1d_backward.grad_input + - func: upsample_nearest2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_nearest2d_out_cpu CUDA: upsample_nearest2d_out_cuda +- func: _upsample_nearest_exact2d.out(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_nearest_exact2d_out_cpu + CUDA: _upsample_nearest_exact2d_out_cuda + - func: upsample_nearest2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn structured_delegate: upsample_nearest2d.out dispatch: QuantizedCPU: upsample_nearest2d_quantized_cpu +- func: _upsample_nearest_exact2d(Tensor self, int[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_nearest_exact2d.out + dispatch: + QuantizedCPU: _upsample_nearest_exact2d_quantized_cpu + - func: upsample_nearest2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_nearest2d_backward_out_cpu CUDA: upsample_nearest2d_backward_out_cuda +- func: _upsample_nearest_exact2d_backward.grad_input(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_nearest_exact2d_backward_out_cpu + CUDA: _upsample_nearest_exact2d_backward_out_cuda + - func: upsample_nearest2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn structured_delegate: upsample_nearest2d_backward.grad_input +- func: _upsample_nearest_exact2d_backward(Tensor grad_output, int[2] output_size, int[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_nearest_exact2d_backward.grad_input + - func: upsample_nearest3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_nearest3d_out_cpu CUDA: upsample_nearest3d_out_cuda +- func: _upsample_nearest_exact3d.out(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_nearest_exact3d_out_cpu + CUDA: _upsample_nearest_exact3d_out_cuda + - func: upsample_nearest3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn structured_delegate: upsample_nearest3d.out dispatch: QuantizedCPU: upsample_nearest3d_quantized_cpu +- func: _upsample_nearest_exact3d(Tensor self, int[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_nearest_exact3d.out + dispatch: + QuantizedCPU: _upsample_nearest_exact3d_quantized_cpu + - func: upsample_nearest3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn structured: True dispatch: CPU: upsample_nearest3d_backward_out_cpu CUDA: upsample_nearest3d_backward_out_cuda +- func: _upsample_nearest_exact3d_backward.grad_input(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None, *, Tensor(a!) grad_input) -> Tensor(a!) + python_module: nn + structured: True + dispatch: + CPU: _upsample_nearest_exact3d_backward_out_cpu + CUDA: _upsample_nearest_exact3d_backward_out_cuda + - func: upsample_nearest3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor python_module: nn structured_delegate: upsample_nearest3d_backward.grad_input +- func: _upsample_nearest_exact3d_backward(Tensor grad_output, int[3] output_size, int[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor + python_module: nn + structured_delegate: _upsample_nearest_exact3d_backward.grad_input + - func: sigmoid_backward.grad_input(Tensor grad_output, Tensor output, *, Tensor(a!) grad_input) -> Tensor(a!) python_module: nn structured: True structured_inherits: TensorIteratorBase dispatch: @@ -9499,22 +10000,10 @@ - func: slow_conv_transpose2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] output_padding=0, int[2] dilation=1) -> Tensor python_module: nn structured_delegate: slow_conv_transpose2d.out -- func: slow_conv_transpose2d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) - python_module: nn - dispatch: - CPU: slow_conv_transpose2d_backward_out_cpu - CUDA: slow_conv_transpose2d_backward_out_cuda - -- func: slow_conv_transpose2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] output_padding, int[2] dilation, Tensor columns, Tensor ones, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) - python_module: nn - dispatch: - CPU: slow_conv_transpose2d_backward_cpu - CUDA: slow_conv_transpose2d_backward_cuda - - func: slow_conv_transpose3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] output_padding=0, int[3] dilation=1, *, Tensor(a!) out) -> Tensor(a!) python_module: nn dispatch: CPU: slow_conv_transpose3d_out_cpu CUDA: slow_conv_transpose3d_out_cuda @@ -9523,47 +10012,35 @@ python_module: nn dispatch: CPU: slow_conv_transpose3d_cpu CUDA: slow_conv_transpose3d_cuda -- func: slow_conv_transpose3d_backward.grad_output(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) - python_module: nn - dispatch: - CPU: slow_conv_transpose3d_backward_out_cpu - CUDA: slow_conv_transpose3d_backward_out_cuda - -- func: slow_conv_transpose3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] output_padding, int[3] dilation, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) - python_module: nn - dispatch: - CPU: slow_conv_transpose3d_backward_cpu - CUDA: slow_conv_transpose3d_backward_cuda - - func: thnn_conv2d.out(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn - func: thnn_conv2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0) -> Tensor python_module: nn -- func: thnn_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +- func: _slow_conv2d_forward.output(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, *, Tensor(a!) output) -> Tensor(a!) python_module: nn dispatch: CPU: slow_conv2d_forward_out_cpu CUDA: slow_conv2d_forward_out_cuda -- func: thnn_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input) +- func: _slow_conv2d_forward(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding) -> Tensor python_module: nn dispatch: CPU: slow_conv2d_forward_cpu CUDA: slow_conv2d_forward_cuda -- func: thnn_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +- func: _slow_conv2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) python_module: nn dispatch: CPU: slow_conv2d_backward_out_cpu CUDA: slow_conv2d_backward_out_cuda -- func: thnn_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) +- func: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) python_module: nn dispatch: CPU: slow_conv2d_backward_cpu CUDA: slow_conv2d_backward_cuda @@ -9576,85 +10053,43 @@ - func: _conv_depthwise2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias, int[2] stride, int[2] padding, int[2] dilation) -> Tensor python_module: nn dispatch: CUDA: conv_depthwise2d_cuda -- func: _conv_depthwise2d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight) -> (Tensor(a!), Tensor(b!)) - python_module: nn - dispatch: - CUDA: conv_depthwise2d_backward_cuda_out - -- func: _conv_depthwise2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[2] output_mask) -> (Tensor grad_input, Tensor grad_weight) - python_module: nn - dispatch: - CUDA: conv_depthwise2d_backward_cuda - - func: conv_depthwise3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, int[3] dilation) -> Tensor python_module: nn dispatch: CUDA: conv_depthwise3d_cuda -- func: conv_depthwise3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) - python_module: nn - dispatch: - CUDA: conv_depthwise3d_backward_cuda_out - -- func: conv_depthwise3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) - python_module: nn - dispatch: - CUDA: conv_depthwise3d_backward_cuda - - func: slow_conv3d.out(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, *, Tensor(a!) out) -> Tensor(a!) python_module: nn - func: slow_conv3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0) -> Tensor python_module: nn -- func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output, Tensor(b!) finput, Tensor(c!) fgrad_input) -> (Tensor(a!), Tensor(b!), Tensor(c!)) +- func: slow_conv3d_forward.output(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding, *, Tensor(a!) output) -> Tensor(a!) python_module: nn dispatch: CPU: slow_conv3d_forward_out_cpu -- func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> (Tensor output, Tensor finput, Tensor fgrad_input) +- func: slow_conv3d_forward(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias, int[3] stride, int[3] padding) -> Tensor python_module: nn dispatch: CPU: slow_conv3d_forward_cpu -- func: slow_conv3d_backward.grad_input(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, *, Tensor(a!) grad_input, Tensor(b!) grad_weight, Tensor(c!) grad_bias) -> (Tensor(a!), Tensor(b!), Tensor(c!)) - python_module: nn - dispatch: - CPU: slow_conv3d_backward_out_cpu - -- func: slow_conv3d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, Tensor finput, Tensor fgrad_input, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) - python_module: nn - dispatch: - CPU: slow_conv3d_backward_cpu - - func: slow_conv_dilated2d(Tensor self, Tensor weight, int[2] kernel_size, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1) -> Tensor python_module: nn dispatch: CPU: slow_conv_dilated2d_cpu CUDA: slow_conv_dilated2d_cuda -- func: slow_conv_dilated2d_backward(Tensor grad_output, Tensor self, Tensor weight, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) - python_module: nn - dispatch: - CPU: slow_conv_dilated2d_backward_cpu - CUDA: slow_conv_dilated2d_backward_cuda - - func: slow_conv_dilated3d(Tensor self, Tensor weight, int[3] kernel_size, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1) -> Tensor python_module: nn dispatch: CPU: slow_conv_dilated3d_cpu CUDA: slow_conv_dilated3d_cuda -- func: slow_conv_dilated3d_backward(Tensor grad_output, Tensor self, Tensor weight, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias) - python_module: nn - dispatch: - CPU: slow_conv_dilated3d_backward_cpu - CUDA: slow_conv_dilated3d_backward_cuda - - func: col2im.out(Tensor self, int[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!) python_module: nn dispatch: CPU: col2im_out_cpu CUDA: col2im_out_cuda @@ -9712,35 +10147,49 @@ - func: isinf(Tensor self) -> Tensor variants: function, method device_check: NoCheck device_guard: False + dispatch: + CompositeExplicitAutograd: isinf + SparseCPU, SparseCUDA: isinf_sparse + SparseCsrCPU, SparseCsrCUDA: isinf_sparse_csr - func: record_stream(Tensor(a!) self, Stream s) -> () variants: method dispatch: CUDA: record_stream_cuda - func: isposinf(Tensor self) -> Tensor variants: function, method structured_delegate: isposinf.out + dispatch: + SparseCPU, SparseCUDA: isposinf_sparse + SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr - func: isposinf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: isposinf_out + SparseCPU, SparseCUDA: isposinf_sparse_out + SparseCsrCPU, SparseCsrCUDA: isposinf_sparse_csr_out - func: isneginf(Tensor self) -> Tensor variants: function, method structured_delegate: isneginf.out + dispatch: + SparseCPU, SparseCUDA: isneginf_sparse + SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr - func: isneginf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) structured: True structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: isneginf_out + SparseCPU, SparseCUDA: isneginf_sparse_out + SparseCsrCPU, SparseCsrCUDA: isneginf_sparse_csr_out # NOTE [_add_batch_dim and _remove_batch_dim] # _add_batch_dim and _remove_batch_dim are meant to be used in the implementation # of the vmap frontend API (see torch/_vmap_internals.py). They are not # user-facing, hence the leading underscore. Please don't use them them anywhere else. @@ -10063,15 +10512,15 @@ - func: special_sinc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) python_module: special variants: function -- func: special_round(Tensor self) -> Tensor +- func: special_round(Tensor self, *, int decimals=0) -> Tensor python_module: special variants: function -- func: special_round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +- func: special_round.out(Tensor self, *, int decimals=0, Tensor(a!) out) -> Tensor(a!) python_module: special variants: function - func: special_log1p(Tensor self) -> Tensor python_module: special @@ -10107,10 +10556,14 @@ - func: special_multigammaln.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!) python_module: special variants: function +- func: special_softmax(Tensor self, int dim, ScalarType? dtype=None) -> Tensor + python_module: special + variants: function + ## Functions related to the fast Fourier transform and the torch.fft namespace # Note [FFT namespace binding] # Functions in the fft python module should have their names start with # "fft_" underscore and be bound to the desired Python name in # torch/fft/__init__.py, and the desired C++ name in torch/csrc/api/include/torch/fft.h. @@ -10198,10 +10651,30 @@ - func: fft_irfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) python_module: fft variants: function +- func: fft_hfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + +- func: fft_hfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + +- func: fft_ihfft2(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + +- func: fft_ihfft2.out(Tensor self, int[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!) + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + - func: fft_fftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor python_module: fft variants: function - func: fft_fftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) @@ -10230,10 +10703,30 @@ - func: fft_irfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) python_module: fft variants: function +- func: fft_hfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + +- func: fft_hfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + +- func: fft_ihfftn(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None) -> Tensor + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + +- func: fft_ihfftn.out(Tensor self, int[1]? s=None, int[1]? dim=None, str? norm=None, *, Tensor(a!) out) -> Tensor(a!) + use_const_ref_for_mutable_tensors: True + python_module: fft + variants: function + - func: fft_fftfreq(int n, float d=1.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor python_module: fft variants: function - func: fft_fftfreq.out(int n, float d=1.0, *, Tensor(a!) out) -> Tensor(a!) @@ -10284,10 +10777,42 @@ - func: linalg_cholesky.out(Tensor self, *, bool upper=False, Tensor(a!) out) -> Tensor(a!) python_module: linalg variants: function +- func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor + python_module: linalg + variants: function + dispatch: + CPU, CUDA: linalg_cross + +- func: linalg_cross.out(Tensor self, Tensor other, *, int dim=-1, Tensor(a!) out) -> Tensor(a!) + python_module: linalg + dispatch: + CPU, CUDA: linalg_cross_out + +# linalg.lu_factor +- func: linalg_lu_factor(Tensor A, *, bool pivot=True) -> (Tensor LU, Tensor pivots) + python_module: linalg + variants: function + +- func: linalg_lu_factor.out(Tensor A, *, bool pivot=True, Tensor(a!) LU, Tensor(b!) pivots) -> (Tensor(a!) LU, Tensor(b!) pivots) + python_module: linalg + variants: function + +- func: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info) + python_module: linalg + structured_delegate: linalg_lu_factor_ex.out + variants: function + +- func: linalg_lu_factor_ex.out(Tensor A, *, bool pivot=True, bool check_errors=False, Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) -> (Tensor(a!) LU, Tensor(b!) pivots, Tensor(c!) info) + python_module: linalg + variants: function + structured: True + dispatch: + CPU, CUDA: linalg_lu_factor_ex_out + - func: linalg_det(Tensor self) -> Tensor python_module: linalg variants: function - func: linalg_det.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) @@ -10325,10 +10850,16 @@ variants: function - func: linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg +- func: linalg_matrix_exp(Tensor self) -> Tensor + python_module: linalg + variants: function + dispatch: + CPU, CUDA: linalg_matrix_exp + - func: linalg_slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) python_module: linalg variants: function dispatch: CPU, CUDA: linalg_slogdet @@ -10465,22 +10996,34 @@ python_module: linalg - func: linalg_matrix_norm.str_ord_out(Tensor self, str ord='fro', int[] dim=[-2,-1], bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) python_module: linalg -- func: linalg_svd.U(Tensor self, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) +# This function is exposes the `compute_uv` flag, which is then used to implement `linalg.svd` and +# `linalg.svdvals` as composite functions that call this one +- func: _linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor Vh) + variants: function + structured_delegate: _linalg_svd.U + +- func: _linalg_svd.U(Tensor A, bool full_matrices=False, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) + structured: True + dispatch: + CPU, CUDA: _linalg_svd_out + +- func: linalg_svd(Tensor A, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh) python_module: linalg + variants: function -- func: linalg_svd(Tensor self, bool full_matrices=True) -> (Tensor U, Tensor S, Tensor Vh) +- func: linalg_svd.U(Tensor A, bool full_matrices=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) Vh) python_module: linalg variants: function -- func: linalg_svdvals(Tensor input) -> Tensor +- func: linalg_svdvals(Tensor A) -> Tensor python_module: linalg variants: function -- func: linalg_svdvals.out(Tensor input, *, Tensor(a!) out) -> Tensor(a!) +- func: linalg_svdvals.out(Tensor A, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg variants: function - func: linalg_cond(Tensor self, Scalar? p=None) -> Tensor python_module: linalg @@ -10496,19 +11039,41 @@ - func: linalg_cond.p_str_out(Tensor self, str p, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg variants: function -- func: linalg_pinv(Tensor self, float rcond=1e-15, bool hermitian=False) -> Tensor +- func: linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor python_module: linalg variants: function + dispatch: + CompositeExplicitAutograd: linalg_pinv +- func: linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) + python_module: linalg + variants: function + dispatch: + CompositeExplicitAutograd: linalg_pinv_out + +- func: linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor + cpp_no_default_args: ['atol', 'rtol'] + python_module: linalg + variants: function + +- func: linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) + cpp_no_default_args: ['atol', 'rtol'] + python_module: linalg + variants: function + +- func: linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor + python_module: linalg + variants: function + - func: linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor python_module: linalg variants: function -- func: linalg_pinv.out(Tensor self, float rcond=1e-15, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +- func: linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg variants: function - func: linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg @@ -10563,18 +11128,36 @@ python_module: linalg - func: linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!) python_module: linalg -- func: linalg_matrix_rank(Tensor self, float? tol=None, bool hermitian=False) -> Tensor +- func: linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor python_module: linalg variants: function -- func: linalg_matrix_rank.out(Tensor self, float? tol=None, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +- func: linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) python_module: linalg variants: function +- func: linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor + cpp_no_default_args: ['atol', 'rtol'] + python_module: linalg + variants: function + +- func: linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) + cpp_no_default_args: ['atol', 'rtol'] + python_module: linalg + variants: function + +- func: linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor + python_module: linalg + variants: function + +- func: linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) + python_module: linalg + variants: function + - func: linalg_matrix_rank.tol_tensor(Tensor input, Tensor tol, bool hermitian=False) -> Tensor python_module: linalg variants: function - func: linalg_matrix_rank.out_tol_tensor(Tensor input, Tensor tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) @@ -10619,9 +11202,15 @@ # Note: this function is only for testing. - func: _test_ambiguous_defaults.b(Tensor dummy, int a=2, str b="2") -> Tensor cpp_no_default_args: ['a', 'b'] python_module: nn + +# Note: this function is only for testing. +- func: _test_warn_in_autograd(Tensor self) -> Tensor + python_module: nn + dispatch: + CompositeExplicitAutograd: _test_warn_in_autograd - func: segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor variants: function dispatch: CPU, CUDA: segment_reduce_kernel