opencl: batch profiling to improve speed and prevent memory leaks (llama/23495)
This commit is contained in:
parent
b84d03487c
commit
1435988ab3
|
|
@ -661,11 +661,10 @@ struct ggml_backend_opencl_context {
|
|||
cl_kernel kernel_mul_mm_iq4_nl_f32_l4_lm;
|
||||
|
||||
std::vector<ProfilingInfo> profiling_info;
|
||||
std::vector<ProfilingInfo> profiling_results;
|
||||
|
||||
void write_profiling_info() {
|
||||
FILE * fperf = fopen("cl_profiling.csv", "w");
|
||||
if (!fperf) {
|
||||
GGML_LOG_ERROR("Failed to open cl_profiling.csv\n");
|
||||
void flush_profiling_batch() {
|
||||
if (profiling_info.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -689,6 +688,7 @@ struct ggml_backend_opencl_context {
|
|||
CL_CHECK(clGetEventProfilingInfo(
|
||||
info.evt, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &cmd_complete, NULL));
|
||||
CL_CHECK(clReleaseEvent(info.evt));
|
||||
info.evt = nullptr;
|
||||
|
||||
char kernel_name[512];
|
||||
CL_CHECK(clGetKernelInfo(info.kernel, CL_KERNEL_FUNCTION_NAME,
|
||||
|
|
@ -706,10 +706,26 @@ struct ggml_backend_opencl_context {
|
|||
info.cmd_complete_duration_ns = cmd_complete - cmd_end;
|
||||
info.cmd_total_duration_ns = cmd_complete - cmd_queued;
|
||||
}
|
||||
profiling_results.insert(profiling_results.end(),
|
||||
std::make_move_iterator(profiling_info.begin()),
|
||||
std::make_move_iterator(profiling_info.end()));
|
||||
profiling_info.clear();
|
||||
}
|
||||
|
||||
void write_profiling_info() {
|
||||
if (profiling_results.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Dump a csv
|
||||
FILE * fperf = fopen("cl_profiling.csv", "w");
|
||||
if (!fperf) {
|
||||
GGML_LOG_ERROR("Failed to open cl_profiling.csv\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(fperf, "op name, kernel name, exec duration (ms), global size, local size, output size\n");
|
||||
for (const ProfilingInfo & info : profiling_info) {
|
||||
for (const ProfilingInfo & info : profiling_results) {
|
||||
fprintf(fperf, "%s,%s,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n",
|
||||
info.op_name.c_str(), info.kernel_name.c_str(),
|
||||
info.cmd_duration_ns/1.e6f,
|
||||
|
|
@ -720,14 +736,14 @@ struct ggml_backend_opencl_context {
|
|||
fclose(fperf);
|
||||
|
||||
// Dump a simple chrome trace
|
||||
FILE* ftrace = fopen("cl_trace.json", "w");
|
||||
FILE * ftrace = fopen("cl_trace.json", "w");
|
||||
if (!ftrace) {
|
||||
GGML_LOG_ERROR("Failed to open cl_trace.json\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(ftrace, "[\n");
|
||||
for (const ProfilingInfo & info : profiling_info) {
|
||||
for (const ProfilingInfo & info : profiling_results) {
|
||||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
|
||||
info.kernel_name.c_str(), info.cmd_queued/1000);
|
||||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Host\"},\n",
|
||||
|
|
@ -738,6 +754,7 @@ struct ggml_backend_opencl_context {
|
|||
fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %" PRIu64 ", \"pid\": \"\", \"tid\": \"Device\"},\n",
|
||||
info.kernel_name.c_str(), info.cmd_end/1000);
|
||||
}
|
||||
fprintf(ftrace, "]\n");
|
||||
fclose(ftrace);
|
||||
}
|
||||
|
||||
|
|
@ -758,6 +775,9 @@ struct ggml_backend_opencl_context {
|
|||
|
||||
profiling_info.emplace_back();
|
||||
populateProfilingInfo(profiling_info.back(), evt, kernel, work_dim, global_work_size, local_work_size, tensor);
|
||||
if (profiling_info.size() >= 2048) {
|
||||
flush_profiling_batch();
|
||||
}
|
||||
#else
|
||||
GGML_UNUSED(tensor);
|
||||
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL));
|
||||
|
|
@ -804,7 +824,7 @@ struct ggml_backend_opencl_context {
|
|||
if (ref_count == 0) {
|
||||
#ifdef GGML_OPENCL_PROFILING
|
||||
write_profiling_info();
|
||||
profiling_info.clear();
|
||||
profiling_results.clear();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue