talk-llama : sync llama.cpp
This commit is contained in:
parent
69500f5502
commit
46ca43d639
|
|
@ -2475,11 +2475,29 @@ public:
|
|||
}
|
||||
|
||||
if (need_alloc) {
|
||||
mbuf_cur = std::move(mbuf);
|
||||
if (!mbuf_cur.buf || mbuf_cur.total_size != mbuf.total_size) {
|
||||
mbuf_cur = std::move(mbuf);
|
||||
|
||||
mbuf_cur.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(mbuf_cur.ctx.get(), buft));
|
||||
mbuf_cur.buf.reset(ggml_backend_alloc_ctx_tensors_from_buft(mbuf_cur.ctx.get(), buft));
|
||||
|
||||
LLAMA_LOG_INFO("%s: allocated '%s' buffer %.3f MiB\n", __func__, ggml_backend_buft_name(buft), mbuf.total_size/1024.0/1024.0);
|
||||
LLAMA_LOG_INFO("%s: allocated '%s' buffer %.3f MiB\n", __func__, ggml_backend_buft_name(buft), mbuf.total_size/1024.0/1024.0);
|
||||
} else {
|
||||
//LLAMA_LOG_INFO("%s: reallocating tensors in '%s' buffer %.3f MiB\n", __func__, ggml_backend_buft_name(buft), mbuf.total_size/1024.0/1024.0);
|
||||
|
||||
// save the old buffer and allocate the new tensors in it
|
||||
auto buf = std::move(mbuf_cur.buf);
|
||||
|
||||
mbuf_cur = std::move(mbuf);
|
||||
|
||||
ggml_tallocr talloc = ggml_tallocr_new(buf.get());
|
||||
|
||||
for (size_t i = 0; i < mbuf_cur.org.size(); ++i) {
|
||||
ggml_backend_view_init(mbuf_cur.org[i]);
|
||||
ggml_tallocr_alloc(&talloc, mbuf_cur.cpy[i]);
|
||||
}
|
||||
|
||||
mbuf_cur.buf = std::move(buf);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < mbuf_cur.org.size(); ++i) {
|
||||
|
|
@ -2559,8 +2577,7 @@ public:
|
|||
|
||||
mbuf.org.push_back(ggml_view_1d(mbuf.ctx.get(), rinfo.tensor, n, rinfo.offset));
|
||||
|
||||
auto & view = mbuf.org.back();
|
||||
view->buffer = rinfo.tensor->buffer;
|
||||
ggml_backend_view_init(mbuf.org.back());
|
||||
}
|
||||
|
||||
for (auto & [buft, mbuf] : mbufs_new) {
|
||||
|
|
|
|||
|
|
@ -858,6 +858,8 @@ extern "C" {
|
|||
size_t n_token_capacity,
|
||||
size_t * n_token_count_out);
|
||||
|
||||
#define LLAMA_STATE_SEQ_FLAGS_NONE 0
|
||||
|
||||
// for backwards-compat
|
||||
#define LLAMA_STATE_SEQ_FLAGS_SWA_ONLY 1
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue