diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp
index 0101b276..5abf2290 100644
--- a/ggml/src/ggml-sycl/common.hpp
+++ b/ggml/src/ggml-sycl/common.hpp
@@ -224,7 +224,7 @@ struct sycl_device_info {
                        // cudaOccupancyMaxActiveBlocksPerMultiprocessor
     bool    vmm;                // virtual memory support
     size_t  total_vram;
-    //sycl_hw_info hw_info;     \\ device id and aarch, currently not used
+    sycl_hw_info hw_info;
     optimize_feature opt_feature;
 };
 
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
index 36923160..1eead625 100644
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -104,6 +104,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
 
         info.max_work_group_sizes[i] = prop.get_max_work_group_size();
         info.devices[i].max_wg_per_cu = info.max_work_group_sizes[i] / prop.get_max_compute_units();
+        info.devices[i].hw_info = get_device_hw_info(&device);
 
     }
 
@@ -3703,9 +3704,16 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
     // Dispatch becomes obscure with the reorder, MMVQ when the reorder optimization
     // is enabled takes precedence over DMMV, the current if-else implementation
     // requires disabling DMMV if both conditions are met
+
     if (!g_ggml_sycl_prioritize_dmmv && ((should_reorder_tensor(ctx, dst) &&
                                           ggml_sycl_supports_reorder_mmvq(src0->type)))) {
-        use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
+      // Arc770 get benefit with Q4_0 by skipping it.
+      if (!(ggml_sycl_info().devices[ctx.device].hw_info.arch ==
+                gpu_arch::intel_gpu_acm_g10 &&
+            src0->type == GGML_TYPE_Q4_0)) {
+        use_dequantize_mul_mat_vec =
+            use_dequantize_mul_mat_vec && !use_mul_mat_vec_q;
+      }
     }
 
     if (!split && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
diff --git a/ggml/src/ggml-sycl/sycl_hw.cpp b/ggml/src/ggml-sycl/sycl_hw.cpp
index 70411400..03b0c37a 100644
--- a/ggml/src/ggml-sycl/sycl_hw.cpp
+++ b/ggml/src/ggml-sycl/sycl_hw.cpp
@@ -1,15 +1,67 @@
 #include "sycl_hw.hpp"
 
-// TODO: currently not used
-/*
-sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
-  sycl_hw_info res;
-  int32_t id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
-  res.device_id = id;
+using namespace std;
 
-  syclex::architecture arch = device_ptr->get_info<syclex::info::device::architecture>();
-  res.arch = arch;
-
-  return res;
-}
+/*defined in
+* /opt/intel/oneapi/compiler/latest/include/sycl/ext/oneapi/experimental/device_architecture.def
 */
+static map<gpu_arch, std::pair<const char*, sycl_intel_gpu_family>> arch2name = {
+    {gpu_arch::intel_gpu_bdw,     {"intel_gpu_bdw",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_skl,     {"intel_gpu_skl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_kbl,     {"intel_gpu_kbl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_cfl,     {"intel_gpu_cfl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_apl,     {"intel_gpu_apl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_glk,     {"intel_gpu_glk",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_whl,     {"intel_gpu_whl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_aml,     {"intel_gpu_aml",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_cml,     {"intel_gpu_cml",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_icllp,   {"intel_gpu_icllp",   GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_ehl,     {"intel_gpu_ehl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_tgllp,   {"intel_gpu_tgllp",   GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_rkl,     {"intel_gpu_rkl",     GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_adl_s,   {"intel_gpu_adl_s",   GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_adl_p,   {"intel_gpu_adl_p",   GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_adl_n,   {"intel_gpu_adl_n",   GPU_FAMILY_IGPU_NON_XE}},
+    {gpu_arch::intel_gpu_dg1,     {"intel_gpu_dg1",     GPU_FAMILY_DGPU_CLIENT_GAME}},
+    {gpu_arch::intel_gpu_acm_g10, {"intel_gpu_acm_g10", GPU_FAMILY_DGPU_CLIENT_GAME}},
+    {gpu_arch::intel_gpu_acm_g11, {"intel_gpu_acm_g11", GPU_FAMILY_DGPU_CLIENT_GAME}},
+    {gpu_arch::intel_gpu_acm_g12, {"intel_gpu_acm_g12", GPU_FAMILY_DGPU_CLIENT_GAME}},
+    {gpu_arch::intel_gpu_pvc,     {"intel_gpu_pvc",     GPU_FAMILY_DGPU_CLOUD}},
+    {gpu_arch::intel_gpu_pvc_vg,  {"intel_gpu_pvc_vg",  GPU_FAMILY_DGPU_CLOUD}},
+    {gpu_arch::intel_gpu_mtl_u,   {"intel_gpu_mtl_u",   GPU_FAMILY_IGPU_XE}},
+    {gpu_arch::intel_gpu_mtl_h,   {"intel_gpu_mtl_h",   GPU_FAMILY_IGPU_XE}},
+    {gpu_arch::intel_gpu_arl_h,   {"intel_gpu_arl_h",   GPU_FAMILY_IGPU_XE}},
+    {gpu_arch::intel_gpu_bmg_g21, {"intel_gpu_bmg_g21", GPU_FAMILY_DGPU_CLIENT_GAME}},
+    {gpu_arch::intel_gpu_bmg_g31, {"intel_gpu_bmg_g31", GPU_FAMILY_DGPU_CLIENT_GAME}},
+    {gpu_arch::intel_gpu_lnl_m,   {"intel_gpu_lnl_m",   GPU_FAMILY_IGPU_XE}},
+    {gpu_arch::intel_gpu_ptl_h,   {"intel_gpu_ptl_h",   GPU_FAMILY_IGPU_XE}},
+    {gpu_arch::intel_gpu_ptl_u,   {"intel_gpu_ptl_u",   GPU_FAMILY_IGPU_XE}},
+    {gpu_arch::intel_gpu_wcl,     {"intel_gpu_wcl",     GPU_FAMILY_IGPU_XE}}
+};
+
+
+sycl_hw_info get_device_hw_info(sycl::device* device_ptr) {
+    sycl_hw_info res;
+    int32_t id =
+        device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
+    res.device_id = id;
+
+    res.name = device_ptr->get_info<sycl::info::device::name>();
+
+    syclex::architecture arch =
+        device_ptr->get_info<syclex::info::device::architecture>();
+    res.arch = arch;
+
+    map<syclex::architecture,
+        std::pair<const char*, sycl_intel_gpu_family>>::iterator it =
+        arch2name.find(res.arch);
+    if (it != arch2name.end()) {
+        res.arch_name = it->second.first;
+        res.gpu_family = it->second.second;
+    } else {
+        res.arch_name = "unknown";
+        res.gpu_family = GPU_FAMILY_UKNOWN;
+    }
+
+    return res;
+}
diff --git a/ggml/src/ggml-sycl/sycl_hw.hpp b/ggml/src/ggml-sycl/sycl_hw.hpp
index 36b140bf..a5d20462 100644
--- a/ggml/src/ggml-sycl/sycl_hw.hpp
+++ b/ggml/src/ggml-sycl/sycl_hw.hpp
@@ -9,18 +9,30 @@
 #include <sycl/sycl.hpp>
 
 namespace syclex = sycl::ext::oneapi::experimental;
+using gpu_arch = sycl::ext::oneapi::experimental::architecture;
 
-// TODO: currently not used
-/*
-struct sycl_hw_info {
-  syclex::architecture arch;
-  int32_t device_id;
+// It's used to mark the GPU computing capacity
+// The value must flow the order of performance.
+enum sycl_intel_gpu_family {
+  GPU_FAMILY_UKNOWN = -1,
+  // iGPU without Xe core, before Meteor Lake iGPU(Xe)
+  GPU_FAMILY_IGPU_NON_XE = 0,
+  // iGPU with Xe core, Meteor Lake iGPU or newer.
+  GPU_FAMILY_IGPU_XE = 1,
+  // dGPU for gaming in client/data center (DG1/FLex 140 or newer).
+  GPU_FAMILY_DGPU_CLIENT_GAME = 2,
+  // dGPU for AI in cloud, PVC or newer.
+  GPU_FAMILY_DGPU_CLOUD = 3
 };
 
-bool is_in_vector(std::vector<int> &vec, int item);
+struct sycl_hw_info {
+  syclex::architecture arch;
+  const char* arch_name;
+  int32_t device_id;
+  std::string name;
+  sycl_intel_gpu_family gpu_family;
+};
 
 sycl_hw_info get_device_hw_info(sycl::device *device_ptr);
-*/
-
 
 #endif // SYCL_HW_HPP