ext/ruby_gumath/gumath/libgumath/gumath.h in gumath-0.2.0dev5 vs ext/ruby_gumath/gumath/libgumath/gumath.h in gumath-0.2.0dev8
- old
+ new
@@ -32,10 +32,21 @@
#ifndef GUMATH_H
#define GUMATH_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+ #include <cstdint>
+#else
+ #include <stdint.h>
+#endif
+
#include "ndtypes.h"
#include "xnd.h"
#ifdef _MSC_VER
@@ -63,28 +74,39 @@
#define ALLOCA(type, name, nmemb) type name[nmemb]
#endif
-#define GM_MAX_KERNELS 512
+#define GM_MAX_KERNELS 8192
+#define GM_THREAD_CUTOFF 1000000
typedef float float32_t;
typedef double float64_t;
typedef int (* gm_xnd_kernel_t)(xnd_t stack[], ndt_context_t *ctx);
typedef int (* gm_strided_kernel_t)(char **args, intptr_t *dimensions, intptr_t *steps, void *data);
-/* Collection of specialized kernels for a single function signature. */
+/*
+ * Collection of specialized kernels for a single function signature.
+ *
+ * NOTE: The specialized kernel lookup scheme is transitional and may
+ * be replaced by something else.
+ *
+ * This should be considered as a first version of a kernel request
+ * protocol.
+ */
typedef struct {
- ndt_t *sig;
+ const ndt_t *sig;
const ndt_constraint_t *constraint;
/* Xnd signatures */
- gm_xnd_kernel_t Opt; /* dispatch ensures elementwise, at least 1D, contiguous in last dimensions */
- gm_xnd_kernel_t C; /* dispatch ensures c-contiguous in inner dimensions */
- gm_xnd_kernel_t Fortran; /* dispatch ensures f-contiguous in inner dimensions */
+ gm_xnd_kernel_t OptC; /* C in inner+1 dimensions */
+ gm_xnd_kernel_t OptZ; /* C in inner dimensions, C or zero stride in (inner+1)th. */
+ gm_xnd_kernel_t OptS; /* strided in (inner+1)th. */
+ gm_xnd_kernel_t C; /* C in inner dimensions */
+ gm_xnd_kernel_t Fortran; /* Fortran in inner dimensions */
gm_xnd_kernel_t Xnd; /* selected if non-contiguous or the other fields are NULL */
/* NumPy signature */
gm_strided_kernel_t Strided;
} gm_kernel_set_t;
@@ -97,15 +119,21 @@
typedef struct {
const char *name;
const char *sig;
const ndt_constraint_t *constraint;
+ uint32_t cap;
- gm_xnd_kernel_t Opt;
+ /* Xnd signatures */
+ gm_xnd_kernel_t OptC;
+ gm_xnd_kernel_t OptZ;
+ gm_xnd_kernel_t OptS;
gm_xnd_kernel_t C;
gm_xnd_kernel_t Fortran;
gm_xnd_kernel_t Xnd;
+
+ /* NumPy signature */
gm_strided_kernel_t Strided;
} gm_kernel_init_t;
/* Actual kernel selected for application */
typedef struct {
@@ -113,11 +141,14 @@
const gm_kernel_set_t *set;
} gm_kernel_t;
/* Multimethod with associated kernels */
typedef struct gm_func gm_func_t;
-typedef const gm_kernel_set_t *(*gm_typecheck_t)(ndt_apply_spec_t *spec, const gm_func_t *f, const ndt_t *in[], int nin, ndt_context_t *ctx);
+typedef const gm_kernel_set_t *(*gm_typecheck_t)(ndt_apply_spec_t *spec, const gm_func_t *f,
+ const ndt_t *in[], const int64_t li[],
+ int nin, int nout, bool check_broadcast,
+ ndt_context_t *ctx);
struct gm_func {
char *name;
gm_typecheck_t typecheck; /* Experimental optimized type-checking, may be NULL. */
int nkernels;
gm_kernel_set_t kernels[GM_MAX_KERNELS];
@@ -137,14 +168,14 @@
GM_API gm_func_t *gm_add_func(gm_tbl_t *tbl, const char *name, ndt_context_t *ctx);
GM_API int gm_add_kernel(gm_tbl_t *tbl, const gm_kernel_init_t *kernel, ndt_context_t *ctx);
GM_API int gm_add_kernel_typecheck(gm_tbl_t *tbl, const gm_kernel_init_t *kernel, ndt_context_t *ctx, gm_typecheck_t f);
GM_API gm_kernel_t gm_select(ndt_apply_spec_t *spec, const gm_tbl_t *tbl, const char *name,
- const ndt_t *in_types[], int nin, const xnd_t args[],
- ndt_context_t *ctx);
+ const ndt_t *types[], const int64_t li[], int nin, int nout,
+ bool check_broadcast, const xnd_t args[], ndt_context_t *ctx);
GM_API int gm_apply(const gm_kernel_t *kernel, xnd_t stack[], int outer_dims, ndt_context_t *ctx);
-GM_API int gm_apply_thread(const gm_kernel_t *kernel, xnd_t stack[], int outer_dims, uint32_t flags, const int64_t nthreads, ndt_context_t *ctx);
+GM_API int gm_apply_thread(const gm_kernel_t *kernel, xnd_t stack[], int outer_dims, const int64_t nthreads, ndt_context_t *ctx);
/******************************************************************************/
/* NumPy loops */
/******************************************************************************/
@@ -169,10 +200,11 @@
/******************************************************************************/
/* Xnd loops */
/******************************************************************************/
+GM_API int array_shape_check(xnd_t *x, const int64_t shape, ndt_context_t *ctx);
GM_API int gm_xnd_map(const gm_xnd_kernel_t f, xnd_t stack[], const int nargs,
const int outer_dims, ndt_context_t *ctx);
/******************************************************************************/
@@ -189,17 +221,26 @@
/******************************************************************************/
/* Library initialization and tables */
/******************************************************************************/
GM_API void gm_init(void);
-GM_API int gm_init_unary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
-GM_API int gm_init_binary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
+GM_API int gm_init_cpu_unary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
+GM_API int gm_init_cpu_binary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
+GM_API int gm_init_bitwise_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
+
+GM_API int gm_init_cuda_unary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
+GM_API int gm_init_cuda_binary_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
+
GM_API int gm_init_example_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
-GM_API int gm_init_bfloat16_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
GM_API int gm_init_graph_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
GM_API int gm_init_quaternion_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
GM_API int gm_init_pdist_kernels(gm_tbl_t *tbl, ndt_context_t *ctx);
GM_API void gm_finalize(void);
+
+
+#ifdef __cplusplus
+} /* END extern "C" */
+#endif
#endif /* GUMATH_H */