diff options
author | Justin Bedo <cu@cua0.org> | 2024-06-20 17:08:56 +1000 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2024-06-20 17:09:58 +1000 |
commit | 6d3684d1b32087b385bebce9ea0fa22cb522ab21 (patch) | |
tree | 58a5a803524108fa74a9d97483b763ff09fea6bc | |
parent | 56d3507d774357da1281fa3b0c49ed4e0466800d (diff) |
update futhark
-rw-r--r-- | pca.c | 27445 | ||||
-rw-r--r-- | pca.fut | 6 | ||||
-rw-r--r-- | pca.h | 7 |
3 files changed, 12710 insertions, 14748 deletions
@@ -1,4 +1,5 @@ -// Generated by Futhark 0.24.0 (prerelease - include info below when reporting bugs) +// Generated by Futhark 0.25.17. +// Compiled with GHC 9.6.5. // We need to define _GNU_SOURCE before // _any_ headers files are imported to get @@ -11,11 +12,14 @@ #ifdef __clang__ #pragma clang diagnostic ignored "-Wunused-function" #pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-const-variable" #pragma clang diagnostic ignored "-Wparentheses" #pragma clang diagnostic ignored "-Wunused-label" +#pragma clang diagnostic ignored "-Wunused-but-set-variable" #elif __GNUC__ #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-const-variable" #pragma GCC diagnostic ignored "-Wparentheses" #pragma GCC diagnostic ignored "-Wunused-label" #pragma GCC diagnostic ignored "-Wunused-but-set-variable" @@ -51,7 +55,7 @@ const char *futhark_get_tuning_param_class(int); // Arrays struct futhark_f64_2d; struct futhark_f64_2d *futhark_new_f64_2d(struct futhark_context *ctx, const double *data, int64_t dim0, int64_t dim1); -struct futhark_f64_2d *futhark_new_raw_f64_2d(struct futhark_context *ctx, const unsigned char *data, int64_t offset, int64_t dim0, int64_t dim1); +struct futhark_f64_2d *futhark_new_raw_f64_2d(struct futhark_context *ctx, unsigned char *data, int64_t dim0, int64_t dim1); int futhark_free_f64_2d(struct futhark_context *ctx, struct futhark_f64_2d *arr); int futhark_values_f64_2d(struct futhark_context *ctx, struct futhark_f64_2d *arr, double *data); unsigned char *futhark_values_raw_f64_2d(struct futhark_context *ctx, struct futhark_f64_2d *arr); @@ -67,11 +71,11 @@ int futhark_entry_pcaWithQuantile(struct futhark_context *ctx, struct futhark_f6 // Miscellaneous int futhark_context_sync(struct futhark_context *ctx); void futhark_context_config_set_cache_file(struct futhark_context_config *cfg, const char *f); -char *futhark_context_report(struct futhark_context *ctx); char *futhark_context_get_error(struct futhark_context *ctx); void futhark_context_set_logging_file(struct futhark_context *ctx, FILE *f); void futhark_context_pause_profiling(struct futhark_context *ctx); void futhark_context_unpause_profiling(struct futhark_context *ctx); +char *futhark_context_report(struct futhark_context *ctx); int futhark_context_clear_caches(struct futhark_context *ctx); #define FUTHARK_BACKEND_multicore #define FUTHARK_SUCCESS 0 @@ -93,6 +97,7 @@ int futhark_context_clear_caches(struct futhark_context *ctx); #undef NDEBUG #include <assert.h> #include <stdarg.h> +#define SCALAR_FUN_ATTR static inline // Start of util.h. // // Various helper functions that are useful in all generated C code. @@ -231,6 +236,44 @@ static void str_builder(struct str_builder *b, const char *s, ...) { b->used += needed; } +static void str_builder_str(struct str_builder *b, const char *s) { + size_t needed = strlen(s); + if (b->capacity < b->used + needed + 1) { + b->capacity *= 2; + b->str = realloc(b->str, b->capacity); + } + strcpy(b->str+b->used, s); + b->used += needed; +} + +static void str_builder_char(struct str_builder *b, char c) { + size_t needed = 1; + if (b->capacity < b->used + needed + 1) { + b->capacity *= 2; + b->str = realloc(b->str, b->capacity); + } + b->str[b->used] = c; + b->str[b->used+1] = 0; + b->used += needed; +} + +static void str_builder_json_str(struct str_builder* sb, const char* s) { + str_builder_char(sb, '"'); + for (int j = 0; s[j]; j++) { + char c = s[j]; + switch (c) { + case '\n': + str_builder_str(sb, "\\n"); + break; + case '"': + str_builder_str(sb, "\\\""); + break; + default: + str_builder_char(sb, c); + } + } + str_builder_char(sb, '"'); +} static char *strclone(const char *str) { size_t size = strlen(str) + 1; @@ -243,6 +286,25 @@ static char *strclone(const char *str) { return copy; } +// Assumes NULL-terminated. +static char *strconcat(const char *src_fragments[]) { + size_t src_len = 0; + const char **p; + + for (p = src_fragments; *p; p++) { + src_len += strlen(*p); + } + + char *src = (char*) malloc(src_len + 1); + size_t n = 0; + for (p = src_fragments; *p; p++) { + strcpy(src + n, *p); + n += strlen(*p); + } + + return src; +} + // End of util.h. // Start of cache.h @@ -611,7 +673,7 @@ __constant static const unsigned short offset_table[64] = { 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; -static uint16_t float2halfbits(float value) { +SCALAR_FUN_ATTR uint16_t float2halfbits(float value) { union { float x; uint32_t y; } u; u.x = value; uint32_t bits = u.y; @@ -621,7 +683,7 @@ static uint16_t float2halfbits(float value) { return hbits; } -static float halfbits2float(uint16_t value) { +SCALAR_FUN_ATTR float halfbits2float(uint16_t value) { uint32_t bits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; union { uint32_t x; float y; } u; @@ -629,7 +691,7 @@ static float halfbits2float(uint16_t value) { return u.y; } -static uint16_t halfbitsnextafter(uint16_t from, uint16_t to) { +SCALAR_FUN_ATTR uint16_t halfbitsnextafter(uint16_t from, uint16_t to) { int fabs = from & 0x7FFF, tabs = to & 0x7FFF; if(fabs > 0x7C00 || tabs > 0x7C00) { return ((from&0x7FFF)>0x7C00) ? (from|0x200) : (to|0x200); @@ -932,6 +994,73 @@ static int free_list_first(struct free_list *l, fl_mem *mem_out) { } // End of free_list.h. +// Start of event_list.h + +typedef int (*event_report_fn)(struct str_builder*, void*); + +struct event { + void* data; + event_report_fn f; + const char* name; + char *description; +}; + +struct event_list { + struct event *events; + int num_events; + int capacity; +}; + +static void event_list_init(struct event_list *l) { + l->capacity = 100; + l->num_events = 0; + l->events = calloc(l->capacity, sizeof(struct event)); +} + +static void event_list_free(struct event_list *l) { + free(l->events); +} + +static void add_event_to_list(struct event_list *l, + const char* name, + char* description, + void* data, + event_report_fn f) { + if (l->num_events == l->capacity) { + l->capacity *= 2; + l->events = realloc(l->events, l->capacity * sizeof(struct event)); + } + l->events[l->num_events].name = name; + l->events[l->num_events].description = description; + l->events[l->num_events].data = data; + l->events[l->num_events].f = f; + l->num_events++; +} + +static int report_events_in_list(struct event_list *l, + struct str_builder* sb) { + int ret = 0; + for (int i = 0; i < l->num_events; i++) { + if (i != 0) { + str_builder_str(sb, ","); + } + str_builder_str(sb, "{\"name\":"); + str_builder_json_str(sb, l->events[i].name); + str_builder_str(sb, ",\"description\":"); + str_builder_json_str(sb, l->events[i].description); + free(l->events[i].description); + if (l->events[i].f(sb, l->events[i].data) != 0) { + ret = 1; + break; + } + str_builder(sb, "}"); + } + event_list_free(l); + event_list_init(l); + return ret; +} + +// End of event_list.h #ifdef _MSC_VER #define inline __inline @@ -965,57 +1094,60 @@ static int free_list_first(struct free_list *l, fl_mem *mem_out) { // Double-precision definitions are only included if the preprocessor // macro FUTHARK_F64_ENABLED is set. -static inline uint8_t add8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR int32_t futrts_to_bits32(float x); +SCALAR_FUN_ATTR float futrts_from_bits32(int32_t x); + +SCALAR_FUN_ATTR uint8_t add8(uint8_t x, uint8_t y) { return x + y; } -static inline uint16_t add16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t add16(uint16_t x, uint16_t y) { return x + y; } -static inline uint32_t add32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t add32(uint32_t x, uint32_t y) { return x + y; } -static inline uint64_t add64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t add64(uint64_t x, uint64_t y) { return x + y; } -static inline uint8_t sub8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t sub8(uint8_t x, uint8_t y) { return x - y; } -static inline uint16_t sub16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t sub16(uint16_t x, uint16_t y) { return x - y; } -static inline uint32_t sub32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t sub32(uint32_t x, uint32_t y) { return x - y; } -static inline uint64_t sub64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t sub64(uint64_t x, uint64_t y) { return x - y; } -static inline uint8_t mul8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t mul8(uint8_t x, uint8_t y) { return x * y; } -static inline uint16_t mul16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t mul16(uint16_t x, uint16_t y) { return x * y; } -static inline uint32_t mul32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t mul32(uint32_t x, uint32_t y) { return x * y; } -static inline uint64_t mul64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t mul64(uint64_t x, uint64_t y) { return x * y; } #if ISPC -static inline uint8_t udiv8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv8(uint8_t x, uint8_t y) { // This strange pattern is used to prevent the ISPC compiler from // causing SIGFPEs and bogus results on divisions where inactive lanes // have 0-valued divisors. It ensures that any inactive lane instead @@ -1028,242 +1160,242 @@ static inline uint8_t udiv8(uint8_t x, uint8_t y) { return x / ys; } -static inline uint16_t udiv16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv16(uint16_t x, uint16_t y) { uint16_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline uint32_t udiv32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv32(uint32_t x, uint32_t y) { uint32_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline uint64_t udiv64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv64(uint64_t x, uint64_t y) { uint64_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline uint8_t udiv_up8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv_up8(uint8_t x, uint8_t y) { uint8_t ys = 1; foreach_active(i){ ys = y; } - + return (x + y - 1) / ys; } -static inline uint16_t udiv_up16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv_up16(uint16_t x, uint16_t y) { uint16_t ys = 1; foreach_active(i){ ys = y; } - + return (x + y - 1) / ys; } -static inline uint32_t udiv_up32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv_up32(uint32_t x, uint32_t y) { uint32_t ys = 1; foreach_active(i){ ys = y; } - + return (x + y - 1) / ys; } -static inline uint64_t udiv_up64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv_up64(uint64_t x, uint64_t y) { uint64_t ys = 1; foreach_active(i){ ys = y; } - + return (x + y - 1) / ys; } -static inline uint8_t umod8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t umod8(uint8_t x, uint8_t y) { uint8_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline uint16_t umod16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t umod16(uint16_t x, uint16_t y) { uint16_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline uint32_t umod32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t umod32(uint32_t x, uint32_t y) { uint32_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline uint64_t umod64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t umod64(uint64_t x, uint64_t y) { uint64_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline uint8_t udiv_safe8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv_safe8(uint8_t x, uint8_t y) { uint8_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline uint16_t udiv_safe16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv_safe16(uint16_t x, uint16_t y) { uint16_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline uint32_t udiv_safe32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv_safe32(uint32_t x, uint32_t y) { uint32_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline uint64_t udiv_safe64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv_safe64(uint64_t x, uint64_t y) { uint64_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv_up_safe8(uint8_t x, uint8_t y) { uint8_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : (x + y - 1) / ys; } -static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv_up_safe16(uint16_t x, uint16_t y) { uint16_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : (x + y - 1) / ys; } -static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv_up_safe32(uint32_t x, uint32_t y) { uint32_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : (x + y - 1) / ys; } -static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv_up_safe64(uint64_t x, uint64_t y) { uint64_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : (x + y - 1) / ys; } -static inline uint8_t umod_safe8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t umod_safe8(uint8_t x, uint8_t y) { uint8_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline uint16_t umod_safe16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t umod_safe16(uint16_t x, uint16_t y) { uint16_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline uint32_t umod_safe32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t umod_safe32(uint32_t x, uint32_t y) { uint32_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline uint64_t umod_safe64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t umod_safe64(uint64_t x, uint64_t y) { uint64_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline int8_t sdiv8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv8(int8_t x, int8_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + int8_t q = x / ys; int8_t r = x % ys; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int16_t sdiv16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv16(int16_t x, int16_t y) { int16_t ys = 1; foreach_active(i){ ys = y; } - + int16_t q = x / ys; int16_t r = x % ys; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int32_t sdiv32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv32(int32_t x, int32_t y) { int32_t ys = 1; foreach_active(i){ ys = y; @@ -1274,775 +1406,775 @@ static inline int32_t sdiv32(int32_t x, int32_t y) { return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int64_t sdiv64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv64(int64_t x, int64_t y) { int64_t ys = 1; foreach_active(i){ ys = y; } - + int64_t q = x / ys; int64_t r = x % ys; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int8_t sdiv_up8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv_up8(int8_t x, int8_t y) { return sdiv8(x + y - 1, y); } -static inline int16_t sdiv_up16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv_up16(int16_t x, int16_t y) { return sdiv16(x + y - 1, y); } -static inline int32_t sdiv_up32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv_up32(int32_t x, int32_t y) { return sdiv32(x + y - 1, y); } -static inline int64_t sdiv_up64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv_up64(int64_t x, int64_t y) { return sdiv64(x + y - 1, y); } -static inline int8_t smod8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t smod8(int8_t x, int8_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + int8_t r = x % ys; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int16_t smod16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t smod16(int16_t x, int16_t y) { int16_t ys = 1; foreach_active(i){ ys = y; } - + int16_t r = x % ys; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int32_t smod32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t smod32(int32_t x, int32_t y) { int32_t ys = 1; foreach_active(i){ ys = y; } - + int32_t r = x % ys; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int64_t smod64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t smod64(int64_t x, int64_t y) { int64_t ys = 1; foreach_active(i){ ys = y; } - + int64_t r = x % ys; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int8_t sdiv_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : sdiv8(x, y); } -static inline int16_t sdiv_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : sdiv16(x, y); } -static inline int32_t sdiv_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : sdiv32(x, y); } -static inline int64_t sdiv_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : sdiv64(x, y); } -static inline int8_t sdiv_up_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv_up_safe8(int8_t x, int8_t y) { return sdiv_safe8(x + y - 1, y); } -static inline int16_t sdiv_up_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv_up_safe16(int16_t x, int16_t y) { return sdiv_safe16(x + y - 1, y); } -static inline int32_t sdiv_up_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv_up_safe32(int32_t x, int32_t y) { return sdiv_safe32(x + y - 1, y); } -static inline int64_t sdiv_up_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv_up_safe64(int64_t x, int64_t y) { return sdiv_safe64(x + y - 1, y); } -static inline int8_t smod_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t smod_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : smod8(x, y); } -static inline int16_t smod_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t smod_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : smod16(x, y); } -static inline int32_t smod_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t smod_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : smod32(x, y); } -static inline int64_t smod_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t smod_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : smod64(x, y); } -static inline int8_t squot8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t squot8(int8_t x, int8_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline int16_t squot16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t squot16(int16_t x, int16_t y) { int16_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline int32_t squot32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t squot32(int32_t x, int32_t y) { int32_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline int64_t squot64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t squot64(int64_t x, int64_t y) { int64_t ys = 1; foreach_active(i){ ys = y; } - + return x / ys; } -static inline int8_t srem8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t srem8(int8_t x, int8_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline int16_t srem16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t srem16(int16_t x, int16_t y) { int16_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline int32_t srem32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t srem32(int32_t x, int32_t y) { int32_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline int64_t srem64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t srem64(int64_t x, int64_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + return x % ys; } -static inline int8_t squot_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t squot_safe8(int8_t x, int8_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline int16_t squot_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t squot_safe16(int16_t x, int16_t y) { int16_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline int32_t squot_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t squot_safe32(int32_t x, int32_t y) { int32_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline int64_t squot_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t squot_safe64(int64_t x, int64_t y) { int64_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x / ys; } -static inline int8_t srem_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t srem_safe8(int8_t x, int8_t y) { int8_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline int16_t srem_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t srem_safe16(int16_t x, int16_t y) { int16_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline int32_t srem_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t srem_safe32(int32_t x, int32_t y) { int32_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } -static inline int64_t srem_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t srem_safe64(int64_t x, int64_t y) { int64_t ys = 1; foreach_active(i){ ys = y; } - + return y == 0 ? 0 : x % ys; } #else -static inline uint8_t udiv8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv8(uint8_t x, uint8_t y) { return x / y; } -static inline uint16_t udiv16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv16(uint16_t x, uint16_t y) { return x / y; } -static inline uint32_t udiv32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv32(uint32_t x, uint32_t y) { return x / y; } -static inline uint64_t udiv64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv64(uint64_t x, uint64_t y) { return x / y; } -static inline uint8_t udiv_up8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv_up8(uint8_t x, uint8_t y) { return (x + y - 1) / y; } -static inline uint16_t udiv_up16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv_up16(uint16_t x, uint16_t y) { return (x + y - 1) / y; } -static inline uint32_t udiv_up32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv_up32(uint32_t x, uint32_t y) { return (x + y - 1) / y; } -static inline uint64_t udiv_up64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv_up64(uint64_t x, uint64_t y) { return (x + y - 1) / y; } -static inline uint8_t umod8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t umod8(uint8_t x, uint8_t y) { return x % y; } -static inline uint16_t umod16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t umod16(uint16_t x, uint16_t y) { return x % y; } -static inline uint32_t umod32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t umod32(uint32_t x, uint32_t y) { return x % y; } -static inline uint64_t umod64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t umod64(uint64_t x, uint64_t y) { return x % y; } -static inline uint8_t udiv_safe8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv_safe8(uint8_t x, uint8_t y) { return y == 0 ? 0 : x / y; } -static inline uint16_t udiv_safe16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv_safe16(uint16_t x, uint16_t y) { return y == 0 ? 0 : x / y; } -static inline uint32_t udiv_safe32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv_safe32(uint32_t x, uint32_t y) { return y == 0 ? 0 : x / y; } -static inline uint64_t udiv_safe64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv_safe64(uint64_t x, uint64_t y) { return y == 0 ? 0 : x / y; } -static inline uint8_t udiv_up_safe8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t udiv_up_safe8(uint8_t x, uint8_t y) { return y == 0 ? 0 : (x + y - 1) / y; } -static inline uint16_t udiv_up_safe16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t udiv_up_safe16(uint16_t x, uint16_t y) { return y == 0 ? 0 : (x + y - 1) / y; } -static inline uint32_t udiv_up_safe32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t udiv_up_safe32(uint32_t x, uint32_t y) { return y == 0 ? 0 : (x + y - 1) / y; } -static inline uint64_t udiv_up_safe64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t udiv_up_safe64(uint64_t x, uint64_t y) { return y == 0 ? 0 : (x + y - 1) / y; } -static inline uint8_t umod_safe8(uint8_t x, uint8_t y) { +SCALAR_FUN_ATTR uint8_t umod_safe8(uint8_t x, uint8_t y) { return y == 0 ? 0 : x % y; } -static inline uint16_t umod_safe16(uint16_t x, uint16_t y) { +SCALAR_FUN_ATTR uint16_t umod_safe16(uint16_t x, uint16_t y) { return y == 0 ? 0 : x % y; } -static inline uint32_t umod_safe32(uint32_t x, uint32_t y) { +SCALAR_FUN_ATTR uint32_t umod_safe32(uint32_t x, uint32_t y) { return y == 0 ? 0 : x % y; } -static inline uint64_t umod_safe64(uint64_t x, uint64_t y) { +SCALAR_FUN_ATTR uint64_t umod_safe64(uint64_t x, uint64_t y) { return y == 0 ? 0 : x % y; } -static inline int8_t sdiv8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv8(int8_t x, int8_t y) { int8_t q = x / y; int8_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int16_t sdiv16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv16(int16_t x, int16_t y) { int16_t q = x / y; int16_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int32_t sdiv32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv32(int32_t x, int32_t y) { int32_t q = x / y; int32_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int64_t sdiv64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv64(int64_t x, int64_t y) { int64_t q = x / y; int64_t r = x % y; return q - ((r != 0 && r < 0 != y < 0) ? 1 : 0); } -static inline int8_t sdiv_up8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv_up8(int8_t x, int8_t y) { return sdiv8(x + y - 1, y); } -static inline int16_t sdiv_up16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv_up16(int16_t x, int16_t y) { return sdiv16(x + y - 1, y); } -static inline int32_t sdiv_up32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv_up32(int32_t x, int32_t y) { return sdiv32(x + y - 1, y); } -static inline int64_t sdiv_up64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv_up64(int64_t x, int64_t y) { return sdiv64(x + y - 1, y); } -static inline int8_t smod8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t smod8(int8_t x, int8_t y) { int8_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int16_t smod16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t smod16(int16_t x, int16_t y) { int16_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int32_t smod32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t smod32(int32_t x, int32_t y) { int32_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int64_t smod64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t smod64(int64_t x, int64_t y) { int64_t r = x % y; return r + (r == 0 || (x > 0 && y > 0) || (x < 0 && y < 0) ? 0 : y); } -static inline int8_t sdiv_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : sdiv8(x, y); } -static inline int16_t sdiv_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : sdiv16(x, y); } -static inline int32_t sdiv_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : sdiv32(x, y); } -static inline int64_t sdiv_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : sdiv64(x, y); } -static inline int8_t sdiv_up_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t sdiv_up_safe8(int8_t x, int8_t y) { return sdiv_safe8(x + y - 1, y); } -static inline int16_t sdiv_up_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t sdiv_up_safe16(int16_t x, int16_t y) { return sdiv_safe16(x + y - 1, y); } -static inline int32_t sdiv_up_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t sdiv_up_safe32(int32_t x, int32_t y) { return sdiv_safe32(x + y - 1, y); } -static inline int64_t sdiv_up_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t sdiv_up_safe64(int64_t x, int64_t y) { return sdiv_safe64(x + y - 1, y); } -static inline int8_t smod_safe8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t smod_safe8(int8_t x, int8_t y) { return y == 0 ? 0 : smod8(x, y); } -static inline int16_t smod_safe16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t smod_safe16(int16_t x, int16_t y) { return y == 0 ? 0 : smod16(x, y); } -static inline int32_t smod_safe32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t smod_safe32(int32_t x, int32_t y) { return y == 0 ? 0 : smod32(x, y); } -static inline int64_t smod_safe64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t smod_safe64(int64_t x, int64_t y) { return y == 0 ? 0 : smod64(x, y); } -static inline int8_t squot8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t squot8(int8_t x, int8_t y) { return x / y; } -static inline int16_t squot16(int16_t x, int16_t y) { +SCALAR_FUN_ATTR int16_t squot16(int16_t x, int16_t y) { return x / y; } -static inline int32_t squot32(int32_t x, int32_t y) { +SCALAR_FUN_ATTR int32_t squot32(int32_t x, int32_t y) { return x / y; } -static inline int64_t squot64(int64_t x, int64_t y) { +SCALAR_FUN_ATTR int64_t squot64(int64_t x, int64_t y) { return x / y; } -static inline int8_t srem8(int8_t x, int8_t y) { +SCALAR_FUN_ATTR int8_t srem8(int8_t x, int8_t y) { return x % y; } -static inline int16_t srem16(int16_t x, int16_t y) { |