ggml-hexagon: create generalized functions for cpu side op (llama/17500)
* refactor: replace ggml_hexagon_mul_mat with template-based binary operation for improved flexibility * refactor: replace ggml_hexagon_mul_mat_id with template-based binary operation for improved flexibility * refactor: initialize buffer types and streamline dspqueue_buffers_init calls for clarity * add comment * refactor: remove redundant buffer checks in hexagon supported operations * wip * add missing include to fix weak symbol warning * add ggml_hexagon_op_generic * refactor: simplify tensor operation initialization and buffer management in hexagon implementation * refactor: streamline hexagon operation initialization and buffer management * refactor: update function signatures and streamline request handling in hexagon operations * wip * ggml-hexagon: clean up code formatting and improve unary operation handling * wip * rename * fix: add support for permuted F16 tensors and enhance quantization checks in matrix operations * refactor: replace ggml_hexagon_mul_mat with template-based binary operation for improved flexibility refactor: replace ggml_hexagon_mul_mat_id with template-based binary operation for improved flexibility refactor: initialize buffer types and streamline dspqueue_buffers_init calls for clarity refactor: remove redundant buffer checks in hexagon supported operations add missing include to fix weak symbol warning add ggml_hexagon_op_generic refactor: simplify tensor operation initialization and buffer management in hexagon implementation refactor: streamline hexagon operation initialization and buffer management refactor: update function signatures and streamline request handling in hexagon operations ggml-hexagon: clean up code formatting and improve unary operation handling fix: add support for permuted F16 tensors and enhance quantization checks in matrix operations # Conflicts: # ggml/src/ggml-hexagon/ggml-hexagon.cpp * hexagon: fix merge conflicts * hexagon: minor cleanup for buffer support checks * hexagon: factor out op_desc and the overal op logging * hexagon: further simplify and cleanup op dispatch logic * snapdragon: update adb scripts to use llama-cli and llama-completion * fix pipeline failure --------- Co-authored-by: Max Krasnyansky <maxk@qti.qualcomm.com>
This commit is contained in:
parent
2f33395197
commit
e4c89612cd
File diff suppressed because it is too large
Load Diff
|
|
@ -8,6 +8,7 @@ extern "C" {
|
|||
#include <AEEStdErr.h>
|
||||
#include <inttypes.h>
|
||||
#include <remote.h>
|
||||
#include <rpcmem.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/* Offset to differentiate HLOS and Hexagon error codes.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,153 @@
|
|||
#ifndef OP_DESC_H
|
||||
#define OP_DESC_H
|
||||
|
||||
#define GGML_COMMON_IMPL_CPP
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
|
||||
struct op_desc {
|
||||
char strides[64 * GGML_MAX_SRC];
|
||||
char dims[64 * GGML_MAX_SRC];
|
||||
char types[16 * GGML_MAX_SRC];
|
||||
char buffs[64 * GGML_MAX_SRC];
|
||||
char names[64 * GGML_MAX_SRC];
|
||||
|
||||
int format_tensor_dims(char * str, const struct ggml_tensor * t) {
|
||||
if (t->ne[2] == 1 && t->ne[3] == 1) {
|
||||
return sprintf(str, "%d:%d", (int) t->ne[0], (int) t->ne[1]);
|
||||
} else {
|
||||
return sprintf(str, "%d:%d:%d:%d", (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void format_op_dims(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += format_tensor_dims(p, t->src[0]);
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += format_tensor_dims(p, t->src[i]);
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
// format self dims separately for better visual alignment
|
||||
char self[64];
|
||||
format_tensor_dims(self, t);
|
||||
|
||||
p += sprintf(p, "%s", self);
|
||||
}
|
||||
|
||||
int format_tensor_strides(char * str, const struct ggml_tensor * t) {
|
||||
const char * c = ggml_is_contiguous(t) ? "" : "!";
|
||||
|
||||
if (t->ne[2] == 1 && t->ne[3] == 1) {
|
||||
return sprintf(str, "%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], c);
|
||||
} else {
|
||||
return sprintf(str, "%zu:%zu:%zu:%zu%s", (size_t) t->nb[0], (size_t) t->nb[1], (size_t) t->nb[2], (size_t) t->nb[3], c);
|
||||
}
|
||||
}
|
||||
|
||||
void format_op_strides(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += format_tensor_strides(p, t->src[0]);
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += format_tensor_strides(p, t->src[i]);
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
// format self dims separately for better visual alignment
|
||||
char self[64];
|
||||
format_tensor_strides(self, t);
|
||||
|
||||
p += sprintf(p, "%s", self);
|
||||
}
|
||||
|
||||
void format_op_types(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += sprintf(p, "%s", ggml_type_name(t->src[0]->type));
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += sprintf(p, "%s", ggml_type_name(t->src[i]->type));
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
p += sprintf(p, "%s", ggml_type_name(t->type));
|
||||
}
|
||||
|
||||
const char * tensor_buff_name(const struct ggml_tensor * t) {
|
||||
if (t->buffer) {
|
||||
return ggml_backend_buffer_name(t->buffer);
|
||||
}
|
||||
return "NONE";
|
||||
}
|
||||
|
||||
void format_op_buffs(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += sprintf(p, "%s", tensor_buff_name(t->src[0]));
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += sprintf(p, "%s", tensor_buff_name(t->src[i]));
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
p += sprintf(p, "%s", tensor_buff_name(t));
|
||||
}
|
||||
|
||||
void format_op_names(char * str, const struct ggml_tensor * t) {
|
||||
char * p = str;
|
||||
|
||||
// append src0 and src1 (if any)
|
||||
if (t->src[0]) {
|
||||
p += sprintf(p, "%s", t->src[0]->name);
|
||||
|
||||
for (int i = 1; i < GGML_MAX_SRC && t->src[i]; i++) {
|
||||
p += sprintf(p, " x ");
|
||||
p += sprintf(p, "%s", t->src[i]->name);
|
||||
}
|
||||
|
||||
p += sprintf(p, " -> ");
|
||||
}
|
||||
|
||||
p += sprintf(p, "%s", t->name);
|
||||
}
|
||||
|
||||
void format(const ggml_tensor * op) {
|
||||
format_op_dims(dims, op);
|
||||
format_op_strides(strides, op);
|
||||
format_op_types(types, op);
|
||||
format_op_buffs(buffs, op);
|
||||
format_op_names(names, op);
|
||||
}
|
||||
|
||||
op_desc() {}
|
||||
op_desc(const ggml_tensor * op) { format(op); }
|
||||
};
|
||||
|
||||
#endif // OP_DESC_H
|
||||
Loading…
Reference in New Issue