loop unrolling

This commit is contained in:
Joe Fioti
2025-09-17 07:37:37 -07:00
parent 8a6afcc28c
commit 355378ddda
2 changed files with 9 additions and 1 deletions

View File

@@ -399,7 +399,6 @@
; Swapping
; Fission
; TensorCore
(ruleset tc)

View File

@@ -469,6 +469,9 @@ fn make_kernel(
.next()
.unwrap();
// Use a single loop with correct striding from the input
if loads.to_usize().map(|i| i <= 16).unwrap_or_default() {
kernel_lines.push(format!("{spacing}#pragma unroll")); // Loop unroll short loops
}
kernel_lines.push(format!(
"{spacing}for (int load = 0; load < {}; ++load) {{",
loads.to_kernel()
@@ -546,6 +549,9 @@ fn make_kernel(
// for
*prev_max_var += 1;
let loop_var = var_to_char(*prev_max_var);
if range.to_usize().map(|i| i <= 16).unwrap_or_default() {
kernel_lines.push(format!("{spacing}#pragma unroll")); // Loop unroll short loops
}
kernel_lines.push(format!("{spacing}for (int loop_{loop_var} = 0; loop_{loop_var} < {}; ++loop_{loop_var}) {{", range.to_kernel()));
};
let loop_var = var_to_char(*prev_max_var);
@@ -737,6 +743,9 @@ fn make_kernel(
);
current_elem_size *= range;
}
if size.to_usize().map(|i| i <= 16).unwrap_or_default() {
kernel_lines.push(format!("{spacing}#pragma unroll")); // Loop unroll short loops
}
kernel_lines.push(format!(
"{spacing}for (int save = 0; save < {}; ++save) {{",
size.to_kernel()