sycl : support to malloc memory on device more than 4GB, update the doc and script (llama/17566)
Co-authored-by: Neo Zhang Jianyu <jianyu.zhang@intel.com>
This commit is contained in:
parent
28dff06555
commit
a3459484bf
|
|
@ -91,7 +91,10 @@ if (GGML_SYCL_F16)
|
|||
add_compile_definitions(GGML_SYCL_F16)
|
||||
endif()
|
||||
|
||||
if (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
||||
if (GGML_SYCL_TARGET STREQUAL "INTEL")
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
|
||||
target_link_options(ggml-sycl PRIVATE -Xs -ze-intel-greater-than-4GB-buffer-required)
|
||||
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
||||
elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
||||
# INFO: Allowed Sub_group_sizes are not consistent through all
|
||||
|
|
@ -100,7 +103,8 @@ elseif (GGML_SYCL_TARGET STREQUAL "AMD")
|
|||
# Target archs tested working: gfx1030, gfx1031, (Only tested sub_group_size = 32)
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
||||
else()
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=16)
|
||||
# default for other target
|
||||
add_compile_definitions(GGML_SYCL_WARP_SIZE=32)
|
||||
endif()
|
||||
|
||||
if (GGML_SYCL_GRAPH)
|
||||
|
|
|
|||
|
|
@ -515,9 +515,6 @@ void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
|
|||
const int64_t ne = ggml_nelements(src0);
|
||||
GGML_ASSERT(ne == ggml_nelements(src1));
|
||||
|
||||
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
||||
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS01;
|
||||
|
||||
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
||||
|
|
|
|||
Loading…
Reference in New Issue