diff --git a/src/cpu/x64/jit_uni_tbb_batch_normalization.cpp b/src/cpu/x64/jit_uni_tbb_batch_normalization.cpp index 5e3d72be4fa..57492122ee5 100644 --- a/src/cpu/x64/jit_uni_tbb_batch_normalization.cpp +++ b/src/cpu/x64/jit_uni_tbb_batch_normalization.cpp @@ -2169,7 +2169,7 @@ struct driver_t : public c_compatible { dim_t total_size = size_src_dst + size_stats_ss_tensors; // Try to create at least nthr_ chunks for realtime inference - const int n_chunks_min = nthr_ <= 4 ? nstl::min(4, nthr_) : 1; + const int n_chunks_min = nthr_ <= 8 ? nthr_ : 1; const size_t l2_per_core = platform::get_per_core_cache_size(2); dim_t n_chunks = nstl::max(n_chunks_min, total_size / l2_per_core);