tenstorrent · jjiangTT · Feb 7, 2025 · Feb 8, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/models/common/rmsnorm.py b/models/common/rmsnorm.py
@@ -80,7 +80,7 @@ def __init__(
             layout=ttnn.ROW_MAJOR_LAYOUT,
             memory_config=weight_memory_config,
             cache_file_name=cache_name,
-            mesh_mapper=ttnn.ReplicateTensorToMesh(device) if is_mesh_device else None,
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(device) if is_mesh_device else None,
         )
 
         if self.is_distributed:

diff --git a/models/common/tests/test_rmsnorm.py b/models/common/tests/test_rmsnorm.py
@@ -12,7 +12,7 @@
     os.environ["WH_ARCH_YAML"] = "wormhole_b0_80_arch_eth_dispatch.yaml"
 
 import ttnn
-from ttnn import ReplicateTensorToMesh, ConcatMeshToTensor
+from ttnn import replicate_tensor_to_mesh_mapper, ConcatMeshToTensor
 
 from models.common.rmsnorm import RMSNorm as TtRMSNorm
 from models.utility_functions import (
@@ -130,7 +130,7 @@ def test_rmsnorm_multidevice(t3k_mesh_device, is_sharded, use_program_cache, res
         device=t3k_mesh_device,
         dtype=dtype,
         layout=ttnn.TILE_LAYOUT,
-        mesh_mapper=ReplicateTensorToMesh(t3k_mesh_device),
+        mesh_mapper=replicate_tensor_to_mesh_mapper(t3k_mesh_device),
     )
 
     tt_output = tt_model(tt_input)

@@ -6,7 +6,7 @@
 import torch
 from loguru import logger
 import ttnn
-from ttnn import ShardTensorToMesh
+from ttnn import shard_tensor_to_mesh_mapper
 from models.demos.falcon7b_common.tt.falcon_mlp import TtFalconMLPDecode, TtFalconMLPPrefill
 from models.demos.falcon7b_common.tt.model_config import get_model_config
 from models.demos.falcon7b_common.tests.test_utils import load_hf_model, tt_from_torch, get_num_devices
@@ -79,7 +79,7 @@ def run_test_FalconMLP_inference(
         dtype=model_config["DEFAULT_DTYPE"],
         device=mesh_device,
         layout=ttnn.TILE_LAYOUT,
-        mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+        mesh_mapper=shard_tensor_to_mesh_mapper(mesh_device, dim=0),
     )
 
     tt_out = tt_FalconMLP_model(tt_mlp_input)

@@ -4,7 +4,7 @@
 
 import torch
 import ttnn
-from ttnn import ShardTensorToMesh, ReplicateTensorToMesh
+from ttnn import shard_tensor_to_mesh_mapper, replicate_tensor_to_mesh_mapper
 from transformers import FalconForCausalLM
 from models.utility_functions import tt_tensors_to_torch_tensors
 
@@ -20,14 +20,14 @@ def initialize_kv_cache(configuration, num_layers, batch_size, max_seq_len, mesh
             dtype=ttnn.bfloat16,
             device=mesh_device,
             layout=ttnn.TILE_LAYOUT,
-            mesh_mapper=ReplicateTensorToMesh(mesh_device),
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
         )
         tt_v_cache = tt_from_torch(
             v_cache,
             dtype=ttnn.bfloat16,
             device=mesh_device,
             layout=ttnn.TILE_LAYOUT,
-            mesh_mapper=ReplicateTensorToMesh(mesh_device),
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
         )
         kv_cache += ((tt_k_cache, tt_v_cache),)
     return kv_cache
@@ -106,7 +106,7 @@ def get_rand_falcon_inputs(
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
             )
 
             if model_config["PREFILL_OPTIMIZED_MODE"] and seq_len in [2048, 128, 1024]:
@@ -121,7 +121,7 @@ def get_rand_falcon_inputs(
                         dtype=ttnn.bfloat4_b,
                         device=mesh_device,
                         layout=ttnn.TILE_LAYOUT,
-                        mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                        mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
                     )
                     for attn_mask in attn_masks
                 ]
@@ -131,7 +131,7 @@ def get_rand_falcon_inputs(
                     dtype=model_config["DEFAULT_DTYPE"],
                     device=mesh_device,
                     layout=ttnn.TILE_LAYOUT,
-                    mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                    mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
                 )
 
         # Generate kvcache for each layer
@@ -145,14 +145,14 @@ def get_rand_falcon_inputs(
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
             )
             tt_v_cache = tt_from_torch(
                 tt_v_cache.unsqueeze(1),
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
             )
             tt_layer_past += ((tt_k_cache, tt_v_cache),)
 
@@ -169,7 +169,7 @@ def get_rand_falcon_inputs(
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=2),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=2),
             )
 
             attention_mask_bool = torch.zeros(global_batch, 1, q_len, kv_len, dtype=bool)
@@ -200,7 +200,7 @@ def get_rand_falcon_inputs(
                 device=mesh_device,
                 layout=ttnn.ROW_MAJOR_LAYOUT,
                 memory_config=model_config["ATTN_MASK_MEMCFG"],
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=device_shard_dim),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=device_shard_dim),
             )
             if not model_config["l1_sharded"]:
                 # Tilize attn masks
@@ -227,14 +227,14 @@ def get_rand_falcon_inputs(
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
             )
             tt_v_cache = tt_from_torch(
                 tt_v_cache.unsqueeze(1),
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ShardTensorToMesh(mesh_device, dim=0),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=0),
             )
             tt_layer_past += ((tt_k_cache, tt_v_cache),)
 

@@ -9,7 +9,7 @@
 
 from models.demos.falcon7b_common.tt.model_utils import get_falcon_default_core_grid
 import ttnn
-from ttnn import ReplicateTensorToMesh
+from ttnn import replicate_tensor_to_mesh_mapper
 
 from models.utility_functions import (
     nearest_32,
@@ -155,7 +155,7 @@ def __init__(
             dtype=model_config["DEFAULT_DTYPE"],
             device=mesh_device,
             layout=ttnn.TILE_LAYOUT,
-            mesh_mapper=ReplicateTensorToMesh(mesh_device),
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
         )
 
         # optimized version can utilize single float value for softmax
@@ -175,7 +175,7 @@ def __init__(
                     device=self.mesh_device,
                     layout=ttnn.TILE_LAYOUT,
                     memory_config=self.model_config["ATTN_OPTIMIZED_MEMCFG"],
-                    mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+                    mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
                 )
                 self.model_config["ATTN_OUTPUT_TENSORS"][seq_len] = tt_tensors
 
@@ -553,7 +553,7 @@ def __init__(
                 dtype=model_config["DEFAULT_DTYPE"],
                 device=mesh_device,
                 layout=ttnn.TILE_LAYOUT,
-                mesh_mapper=ReplicateTensorToMesh(mesh_device),
+                mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
             )
 
     def forward(

@@ -6,7 +6,7 @@
 
 import torch
 import ttnn
-from ttnn import ReplicateTensorToMesh
+from ttnn import replicate_tensor_to_mesh_mapper
 from models.demos.falcon7b_common.tt.falcon_lm_head import falcon_lm_head_matmul_2d
 from models.demos.falcon7b_common.tt.falcon_model import TtFalconModelShared
 from models.demos.falcon7b_common.tt.model_utils import (
@@ -123,7 +123,7 @@ def __init__(
                 device=self.mesh_device,
                 layout=ttnn.TILE_LAYOUT,
                 memory_config=self.model_config["LM_HEAD_MM_INPUT_MEMCFG"],
-                mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+                mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
             )
 
         self.lm_head_weights = get_weights_cached(

@@ -4,7 +4,7 @@
 
 import torch
 import ttnn
-from ttnn import ReplicateTensorToMesh
+from ttnn import replicate_tensor_to_mesh_mapper
 from models.demos.falcon7b_common.tt.model_utils import (
     get_falcon_default_core_grid,
     get_weights_cached,
@@ -176,7 +176,7 @@ def _load_mlp_padded_tensors(self):
                 device=self.mesh_device,
                 layout=ttnn.TILE_LAYOUT,
                 memory_config=ttnn.DRAM_MEMORY_CONFIG,
-                mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+                mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
             )
             mlp_padding_tensors[seq_len] = tt_padding
         self.model_config["MLP_PREFILL_PADDING_TENSORS"] = mlp_padding_tensors
@@ -191,7 +191,7 @@ def _allocate_output_mlp_tensors(self):
             device=self.mesh_device,
             layout=ttnn.TILE_LAYOUT,
             memory_config=ttnn.DRAM_MEMORY_CONFIG,
-            mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
         )
         self.model_config["MLP_OUTPUT_TENSORS"] = out_tt
 
@@ -344,7 +344,7 @@ def _load_mlp_padded_tensors(self):
             device=self.mesh_device,
             layout=ttnn.TILE_LAYOUT,
             memory_config=ttnn.DRAM_MEMORY_CONFIG,
-            mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
         )
         self.model_config["MLP_DECODE_PADDING_TENSORS"] = tt_paddings
 

@@ -7,7 +7,7 @@
 
 import torch
 import ttnn
-from ttnn import ReplicateTensorToMesh, ShardTensorToMesh
+from ttnn import replicate_tensor_to_mesh_mapper, shard_tensor_to_mesh_mapper
 
 from models.demos.falcon7b_common.tt.falcon_decoder import TtFalconDecoderLayer
 from models.demos.falcon7b_common.tt.model_utils import get_weights_cached, layernorm
@@ -134,7 +134,7 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
                         device=self.mesh_device,
                         layout=ttnn.ROW_MAJOR_LAYOUT,
                         memory_config=self.model_config["ATTN_MASK_MEMCFG"],
-                        mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+                        mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
                     )
                     for attention_mask_slice in attention_mask_
                 ]
@@ -156,7 +156,7 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
                     device=self.mesh_device,
                     layout=ttnn.ROW_MAJOR_LAYOUT,
                     memory_config=self.model_config["ATTN_MASK_MEMCFG"],
-                    mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+                    mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
                 )
                 # Repeat attn masks for all heads
                 tt_attention_mask = ttnn.repeat(
@@ -177,7 +177,7 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
                 layout=ttnn.ROW_MAJOR_LAYOUT,
                 device=self.mesh_device,
                 memory_config=self.model_config["INPUT_MEMCFG"],
-                mesh_mapper=ShardTensorToMesh(self.mesh_device, dim=0),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(self.mesh_device, dim=0),
             )
         elif llm_mode == "decode":
             assert batch_size % 32 == 0, "For decode, batch_size must be multiple of 32!"
@@ -210,7 +210,7 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
                 device=self.mesh_device,
                 layout=ttnn.ROW_MAJOR_LAYOUT,
                 memory_config=self.model_config["ATTN_MASK_MEMCFG"],
-                mesh_mapper=ReplicateTensorToMesh(self.mesh_device),
+                mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(self.mesh_device),
             )
             if not self.model_config["l1_sharded"]:
                 # Tilize attn masks
@@ -226,7 +226,7 @@ def model_preprocessing(self, llm_mode, input_ids, kv_cache_len, num_input_token
                 layout=ttnn.ROW_MAJOR_LAYOUT,
                 device=self.mesh_device,
                 memory_config=self.model_config["INPUT_MEMCFG"],
-                mesh_mapper=ShardTensorToMesh(self.mesh_device, dim=1),
+                mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(self.mesh_device, dim=1),
             )
         else:
             raise NotImplementedError(f"Llm mode {llm_mode} is not supported! Must be one of prefill or decode.")

@@ -4,7 +4,7 @@
 
 import torch
 import ttnn
-from ttnn import ReplicateTensorToMesh
+from ttnn import replicate_tensor_to_mesh_mapper
 
 from models.utility_functions import is_wormhole_b0
 
@@ -50,7 +50,9 @@ def preprocess_weights(weights_to_cache):
             layout=tt_layout,
             device=mesh_device,
             memory_config=model_config[f"{weight_config_str}_MEMCFG"],
-            mesh_mapper=ReplicateTensorToMesh(mesh_device) if type(mesh_device) == ttnn.MeshDevice else None,
+            mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device)
+            if type(mesh_device) == ttnn.MeshDevice
+            else None,
             cache_file_name=str(path),
             preprocess=preprocess_weights,
         )

@@ -15,7 +15,7 @@
 ##### TTNN imports #####
 import ttnn
 from ttnn import experimental as ttl
-from ttnn import ConcatMeshToTensor, ReplicateTensorToMesh
+from ttnn import ConcatMeshToTensor, replicate_tensor_to_mesh_mapper
 from models.utility_functions import skip_for_grayskull
 from models.utility_functions import (
     comp_pcc,
@@ -108,7 +108,7 @@ def test_llama_class_embedding_inference(
         layout=layout,
         device=mesh_device,
         memory_config=ttnn.DRAM_MEMORY_CONFIG,
-        mesh_mapper=ReplicateTensorToMesh(mesh_device),
+        ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
     )
     logger.info(f"TT Input tensor shape: {tt_input_tensor.shape}")
 

@@ -14,7 +14,7 @@
 ##### TTNN imports #####
 import ttnn
 from ttnn import experimental as ttl
-from ttnn import ConcatMeshToTensor, ReplicateTensorToMesh
+from ttnn import ConcatMeshToTensor, replicate_tensor_to_mesh_mapper
 from models.utility_functions import skip_for_grayskull
 from models.utility_functions import (
     comp_pcc,

@@ -106,7 +106,7 @@ def test_llama_cross_attention_inference(text_seq_len, batch, mesh_device, reset
             layout=ttnn.TILE_LAYOUT,
             memory_config=ttnn.DRAM_MEMORY_CONFIG,
             dtype=ttnn.bfloat16,
-            mesh_mapper=ttnn.ShardTensorToMesh(mesh_device, dim=1),
+            mesh_mapper=ttnn.shard_tensor_to_mesh_mapper(mesh_device, dim=1),
         )
         for _ in range(2)
     ]
@@ -170,15 +170,15 @@ def test_llama_cross_attention_inference(text_seq_len, batch, mesh_device, reset
                     dtype=ttnn.bfloat4_b,
                     layout=ttnn.TILE_LAYOUT,
                     memory_config=ttnn.DRAM_MEMORY_CONFIG,
-                    mesh_mapper=ttnn.ReplicateTensorToMesh(mesh_device),
+                    mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
                 )
                 tt_full_text_mask = ttnn.from_torch(
                     full_text_mask_expand[b : b + 1],
                     device=mesh_device,
                     dtype=ttnn.bfloat4_b,
                     layout=ttnn.TILE_LAYOUT,
                     memory_config=ttnn.DRAM_MEMORY_CONFIG,
-                    mesh_mapper=ttnn.ReplicateTensorToMesh(mesh_device),
+                    mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
                 )
                 tt_out = tt_model(
                     tt_tensor_x,
@@ -209,7 +209,7 @@ def test_llama_cross_attention_inference(text_seq_len, batch, mesh_device, reset
                 dtype=ttnn.bfloat4_b,
                 layout=ttnn.TILE_LAYOUT,
                 memory_config=ttnn.DRAM_MEMORY_CONFIG,
-                mesh_mapper=ttnn.ReplicateTensorToMesh(mesh_device),
+                mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
             )
             tt_xattn_mask = ttnn.reshape(
                 tt_xattn_mask,
@@ -224,7 +224,7 @@ def test_llama_cross_attention_inference(text_seq_len, batch, mesh_device, reset
                 dtype=ttnn.bfloat4_b,
                 layout=ttnn.TILE_LAYOUT,
                 memory_config=ttnn.DRAM_MEMORY_CONFIG,
-                mesh_mapper=ttnn.ReplicateTensorToMesh(mesh_device),
+                mesh_mapper=ttnn.replicate_tensor_to_mesh_mapper(mesh_device),
             )
             tt_full_text_mask = ttnn.reshape(
                 tt_full_text_mask,