mirror of
https://github.com/jafioti/luminal.git
synced 2026-06-01 21:49:47 +09:00
loop unrolling
This commit is contained in:
@@ -399,7 +399,6 @@
|
||||
|
||||
; Swapping
|
||||
|
||||
; Fission
|
||||
|
||||
; TensorCore
|
||||
(ruleset tc)
|
||||
|
||||
@@ -469,6 +469,9 @@ fn make_kernel(
|
||||
.next()
|
||||
.unwrap();
|
||||
// Use a single loop with correct striding from the input
|
||||
if loads.to_usize().map(|i| i <= 16).unwrap_or_default() {
|
||||
kernel_lines.push(format!("{spacing}#pragma unroll")); // Loop unroll short loops
|
||||
}
|
||||
kernel_lines.push(format!(
|
||||
"{spacing}for (int load = 0; load < {}; ++load) {{",
|
||||
loads.to_kernel()
|
||||
@@ -546,6 +549,9 @@ fn make_kernel(
|
||||
// for
|
||||
*prev_max_var += 1;
|
||||
let loop_var = var_to_char(*prev_max_var);
|
||||
if range.to_usize().map(|i| i <= 16).unwrap_or_default() {
|
||||
kernel_lines.push(format!("{spacing}#pragma unroll")); // Loop unroll short loops
|
||||
}
|
||||
kernel_lines.push(format!("{spacing}for (int loop_{loop_var} = 0; loop_{loop_var} < {}; ++loop_{loop_var}) {{", range.to_kernel()));
|
||||
};
|
||||
let loop_var = var_to_char(*prev_max_var);
|
||||
@@ -737,6 +743,9 @@ fn make_kernel(
|
||||
);
|
||||
current_elem_size *= range;
|
||||
}
|
||||
if size.to_usize().map(|i| i <= 16).unwrap_or_default() {
|
||||
kernel_lines.push(format!("{spacing}#pragma unroll")); // Loop unroll short loops
|
||||
}
|
||||
kernel_lines.push(format!(
|
||||
"{spacing}for (int save = 0; save < {}; ++save) {{",
|
||||
size.to_kernel()
|
||||
|
||||
Reference in New Issue
Block a user