forget comma

huggingface · Sep 13, 2024 · 6249264 · 6249264
1 parent 67115a5
commit 6249264
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/src/nanotron/models/llama.py b/src/nanotron/models/llama.py
@@ -919,7 +919,7 @@ def get_block_compute_costs(self):
             # CausalSelfAttention (qkv proj + attn out) + MLP
             LlamaDecoderLayer: 2 * model_config.num_attention_heads * d_qkv * model_config.hidden_size # Q output projection
             + 2 * model_config.num_key_value_heads * d_qkv * model_config.hidden_size # KV 
-            + 3 * d_ff * model_config.hidden_size # for the MLP (3 because of the gated mechanism)
+            + 3 * d_ff * model_config.hidden_size # for the MLP (3 because of the gated mechanism),
             # This is the last lm_head
             TensorParallelColumnLinear: model_config.vocab_size * model_config.hidden_size,
         }