From 37eae8066f140ae3bd1acfcdad24a4bca6754424 Mon Sep 17 00:00:00 2001 From: drisspg Date: Tue, 11 Feb 2025 17:06:52 -0800 Subject: [PATCH] misc stack-info: PR: https://github.com/drisspg/transformer_nuggets/pull/44, branch: drisspg/stack/7 --- transformer_nuggets/flex/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/transformer_nuggets/flex/__init__.py b/transformer_nuggets/flex/__init__.py index d9b0d36..600a188 100644 --- a/transformer_nuggets/flex/__init__.py +++ b/transformer_nuggets/flex/__init__.py @@ -10,16 +10,16 @@ class FlexAttentionKernelArgs: # Performance tuning options num_warps: Optional[int] = None - """Number of warps to use in the CUDA kernel. If None, will be autotuned.""" + """Number of warps to use in the CUDA kernel.""" num_stages: Optional[int] = None - """Number of pipeline stages to use in the CUDA kernel. If None, will be autotuned.""" + """Number of pipeline stages to use in the CUDA kernel.""" BLOCK_M: Optional[int] = None - """Thread block size across the seqlen dim of Q. If None, will be autotuned.""" + """Thread block size across the seqlen dim of Q.""" BLOCK_N: Optional[int] = None - """Block size to iterate over across the seqlen dim of K/V in each thread block. If None, will be autotuned.""" + """Block size to iterate over across the seqlen dim of K/V in each thread block.""" # Numerical behavior options PRESCALE_QK: bool = False